16edf637bSAxel Dörfler/*
2add77fd6SAxel Dörfler * Copyright 2001-2017, Axel D��rfler, axeld@pinc-software.de.
3fb02804aSAxel Dörfler * This file may be used under the terms of the MIT License.
4fb02804aSAxel Dörfler */
5c42ee134SAxel Dörfler
6fd91cf4dSAxel Dörfler
76edf637bSAxel Dörfler//! Transaction and logging
86edf637bSAxel Dörfler
9c42ee134SAxel Dörfler
10c42ee134SAxel Dörfler#include "Journal.h"
112e3477e3SAxel Dörfler
12c42ee134SAxel Dörfler#include "Debug.h"
132e3477e3SAxel Dörfler#include "Inode.h"
14c42ee134SAxel Dörfler
15c42ee134SAxel Dörfler
16a59b9affSAxel Dörflerstruct run_array {
17a59b9affSAxel Dörfler	int32		count;
18ae16ab9dSAxel Dörfler	int32		max_runs;
19a59b9affSAxel Dörfler	block_run	runs[0];
20a59b9affSAxel Dörfler
21ae16ab9dSAxel Dörfler	void Init(int32 blockSize);
2202c8f6c8SAxel Dörfler	void Insert(block_run& run);
23ae16ab9dSAxel Dörfler
24a59b9affSAxel Dörfler	int32 CountRuns() const { return BFS_ENDIAN_TO_HOST_INT32(count); }
25bd0b075cSAxel Dörfler	int32 MaxRuns() const { return BFS_ENDIAN_TO_HOST_INT32(max_runs) - 1; }
26bd0b075cSAxel Dörfler		// that -1 accounts for an off-by-one error in Be's BFS implementation
2702c8f6c8SAxel Dörfler	const block_run& RunAt(int32 i) const { return runs[i]; }
28a59b9affSAxel Dörfler
29f4260712SAxel Dörfler	static int32 MaxRuns(int32 blockSize);
30ae16ab9dSAxel Dörfler
31ae16ab9dSAxel Dörflerprivate:
3202c8f6c8SAxel Dörfler	static int _Compare(block_run& a, block_run& b);
3302c8f6c8SAxel Dörfler	int32 _FindInsertionIndex(block_run& run);
34a59b9affSAxel Dörfler};
35a59b9affSAxel Dörfler
36a59b9affSAxel Dörflerclass RunArrays {
3702c8f6c8SAxel Dörflerpublic:
3802c8f6c8SAxel Dörfler							RunArrays(Journal* journal);
3902c8f6c8SAxel Dörfler							~RunArrays();
40a59b9affSAxel Dörfler
4102c8f6c8SAxel Dörfler			status_t		Insert(off_t blockNumber);
42a59b9affSAxel Dörfler
4302c8f6c8SAxel Dörfler			run_array*		ArrayAt(int32 i) { return fArrays.Array()[i]; }
4402c8f6c8SAxel Dörfler			int32			CountArrays() const { return fArrays.CountItems(); }
45a59b9affSAxel Dörfler
4602c8f6c8SAxel Dörfler			uint32			CountBlocks() const { return fBlockCount; }
4702c8f6c8SAxel Dörfler			uint32			LogEntryLength() const
4802c8f6c8SAxel Dörfler								{ return CountBlocks() + CountArrays(); }
49d89cda9fSAxel Dörfler
5002c8f6c8SAxel Dörfler			int32			MaxArrayLength();
51a59b9affSAxel Dörfler
5202c8f6c8SAxel Dörflerprivate:
5302c8f6c8SAxel Dörfler			status_t		_AddArray();
5402c8f6c8SAxel Dörfler			bool			_ContainsRun(block_run& run);
5502c8f6c8SAxel Dörfler			bool			_AddRun(block_run& run);
5602c8f6c8SAxel Dörfler
5702c8f6c8SAxel Dörfler			Journal*		fJournal;
5802c8f6c8SAxel Dörfler			uint32			fBlockCount;
5902c8f6c8SAxel Dörfler			Stack<run_array*> fArrays;
6002c8f6c8SAxel Dörfler			run_array*		fLastArray;
61c0614f32SAxel Dörfler};
62c0614f32SAxel Dörfler
63a59b9affSAxel Dörflerclass LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
6402c8f6c8SAxel Dörflerpublic:
6502c8f6c8SAxel Dörfler							LogEntry(Journal* journal, uint32 logStart,
6602c8f6c8SAxel Dörfler								uint32 length);
6702c8f6c8SAxel Dörfler							~LogEntry();
68a59b9affSAxel Dörfler
6902c8f6c8SAxel Dörfler			uint32			Start() const { return fStart; }
7002c8f6c8SAxel Dörfler			uint32			Length() const { return fLength; }
71a59b9affSAxel Dörfler
72797a92d8SAxel Dörfler#ifdef BFS_DEBUGGER_COMMANDS
7302c8f6c8SAxel Dörfler			void			SetTransactionID(int32 id) { fTransactionID = id; }
7402c8f6c8SAxel Dörfler			int32			TransactionID() const { return fTransactionID; }
75797a92d8SAxel Dörfler#endif
76797a92d8SAxel Dörfler
7702c8f6c8SAxel Dörfler			Journal*		GetJournal() { return fJournal; }
78a59b9affSAxel Dörfler
7902c8f6c8SAxel Dörflerprivate:
8002c8f6c8SAxel Dörfler			Journal*		fJournal;
8102c8f6c8SAxel Dörfler			uint32			fStart;
8202c8f6c8SAxel Dörfler			uint32			fLength;
83797a92d8SAxel Dörfler#ifdef BFS_DEBUGGER_COMMANDS
8402c8f6c8SAxel Dörfler			int32			fTransactionID;
85797a92d8SAxel Dörfler#endif
86797a92d8SAxel Dörfler};
87797a92d8SAxel Dörfler
88797a92d8SAxel Dörfler
89de9c0613SAxel Dörfler#if BFS_TRACING && !defined(FS_SHELL) && !defined(_BOOT_MODE)
90797a92d8SAxel Dörflernamespace BFSJournalTracing {
91797a92d8SAxel Dörfler
92797a92d8SAxel Dörflerclass LogEntry : public AbstractTraceEntry {
9302c8f6c8SAxel Dörflerpublic:
9402c8f6c8SAxel Dörfler	LogEntry(::LogEntry* entry, off_t logPosition, bool started)
9502c8f6c8SAxel Dörfler		:
9602c8f6c8SAxel Dörfler		fEntry(entry),
97797a92d8SAxel Dörfler#ifdef BFS_DEBUGGER_COMMANDS
9802c8f6c8SAxel Dörfler		fTransactionID(entry->TransactionID()),
99797a92d8SAxel Dörfler#endif
10002c8f6c8SAxel Dörfler		fStart(entry->Start()),
10102c8f6c8SAxel Dörfler		fLength(entry->Length()),
10202c8f6c8SAxel Dörfler		fLogPosition(logPosition),
10302c8f6c8SAxel Dörfler		fStarted(started)
10402c8f6c8SAxel Dörfler	{
10502c8f6c8SAxel Dörfler		Initialized();
10602c8f6c8SAxel Dörfler	}
107797a92d8SAxel Dörfler
10802c8f6c8SAxel Dörfler	virtual void AddDump(TraceOutput& out)
10902c8f6c8SAxel Dörfler	{
110797a92d8SAxel Dörfler#ifdef BFS_DEBUGGER_COMMANDS
11102c8f6c8SAxel Dörfler		out.Print("bfs:j:%s entry %p id %ld, start %lu, length %lu, log %s "
11202c8f6c8SAxel Dörfler			"%lu\n", fStarted ? "Started" : "Written", fEntry,
11302c8f6c8SAxel Dörfler			fTransactionID, fStart, fLength,
11402c8f6c8SAxel Dörfler			fStarted ? "end" : "start", fLogPosition);
115797a92d8SAxel Dörfler#else
11602c8f6c8SAxel Dörfler		out.Print("bfs:j:%s entry %p start %lu, length %lu, log %s %lu\n",
11702c8f6c8SAxel Dörfler			fStarted ? "Started" : "Written", fEntry, fStart, fLength,
11802c8f6c8SAxel Dörfler			fStarted ? "end" : "start", fLogPosition);
119797a92d8SAxel Dörfler#endif
12002c8f6c8SAxel Dörfler	}
121797a92d8SAxel Dörfler
12202c8f6c8SAxel Dörflerprivate:
12302c8f6c8SAxel Dörfler	::LogEntry*	fEntry;
124797a92d8SAxel Dörfler#ifdef BFS_DEBUGGER_COMMANDS
12502c8f6c8SAxel Dörfler	int32		fTransactionID;
126797a92d8SAxel Dörfler#endif
12702c8f6c8SAxel Dörfler	uint32		fStart;
12802c8f6c8SAxel Dörfler	uint32		fLength;
12902c8f6c8SAxel Dörfler	uint32		fLogPosition;
13002c8f6c8SAxel Dörfler	bool		fStarted;
131a59b9affSAxel Dörfler};
132a59b9affSAxel Dörfler
133797a92d8SAxel Dörfler}	// namespace BFSJournalTracing
134797a92d8SAxel Dörfler
135797a92d8SAxel Dörfler#	define T(x) new(std::nothrow) BFSJournalTracing::x;
136797a92d8SAxel Dörfler#else
137797a92d8SAxel Dörfler#	define T(x) ;
138797a92d8SAxel Dörfler#endif
139797a92d8SAxel Dörfler
140a59b9affSAxel Dörfler
141a59b9affSAxel Dörfler//	#pragma mark -
142a59b9affSAxel Dörfler
143a59b9affSAxel Dörfler
144a59b9affSAxel Dörflerstatic void
14502c8f6c8SAxel Dörfleradd_to_iovec(iovec* vecs, int32& index, int32 max, const void* address,
1466edf637bSAxel Dörfler	size_t size)
147a59b9affSAxel Dörfler{
1486edf637bSAxel Dörfler	if (index > 0 && (addr_t)vecs[index - 1].iov_base
1496edf637bSAxel Dörfler			+ vecs[index - 1].iov_len == (addr_t)address) {
150a59b9affSAxel Dörfler		// the iovec can be combined with the previous one
151a59b9affSAxel Dörfler		vecs[index - 1].iov_len += size;
152a59b9affSAxel Dörfler		return;
153a59b9affSAxel Dörfler	}
154a59b9affSAxel Dörfler
155a59b9affSAxel Dörfler	if (index == max)
156a59b9affSAxel Dörfler		panic("no more space for iovecs!");
157a59b9affSAxel Dörfler
158a59b9affSAxel Dörfler	// we need to start a new iovec
15902c8f6c8SAxel Dörfler	vecs[index].iov_base = const_cast<void*>(address);
160a59b9affSAxel Dörfler	vecs[index].iov_len = size;
161a59b9affSAxel Dörfler	index++;
162a59b9affSAxel Dörfler}
163a59b9affSAxel Dörfler
164a59b9affSAxel Dörfler
165ae16ab9dSAxel Dörfler//	#pragma mark - LogEntry
166a59b9affSAxel Dörfler
167a59b9affSAxel Dörfler
16802c8f6c8SAxel DörflerLogEntry::LogEntry(Journal* journal, uint32 start, uint32 length)
169a59b9affSAxel Dörfler	:
170a59b9affSAxel Dörfler	fJournal(journal),
171a59b9affSAxel Dörfler	fStart(start),
172a59b9affSAxel Dörfler	fLength(length)
173a59b9affSAxel Dörfler{
174a59b9affSAxel Dörfler}
175a59b9affSAxel Dörfler
176a59b9affSAxel Dörfler
177a59b9affSAxel DörflerLogEntry::~LogEntry()
178a59b9affSAxel Dörfler{
179a59b9affSAxel Dörfler}
180a59b9affSAxel Dörfler
181a59b9affSAxel Dörfler
182ae16ab9dSAxel Dörfler//	#pragma mark - run_array
183ae16ab9dSAxel Dörfler
184ae16ab9dSAxel Dörfler
185ae16ab9dSAxel Dörfler/*!	The run_array's size equals the block size of the BFS volume, so we
186ae16ab9dSAxel Dörfler	cannot use a (non-overridden) new.
187ae16ab9dSAxel Dörfler	This makes a freshly allocated run_array ready to run.
188ae16ab9dSAxel Dörfler*/
189ae16ab9dSAxel Dörflervoid
190ae16ab9dSAxel Dörflerrun_array::Init(int32 blockSize)
191ae16ab9dSAxel Dörfler{
192ae16ab9dSAxel Dörfler	memset(this, 0, blockSize);
193ae16ab9dSAxel Dörfler	count = 0;
194ae16ab9dSAxel Dörfler	max_runs = HOST_ENDIAN_TO_BFS_INT32(MaxRuns(blockSize));
195ae16ab9dSAxel Dörfler}
196ae16ab9dSAxel Dörfler
197ae16ab9dSAxel Dörfler
198ae16ab9dSAxel Dörfler/*!	Inserts the block_run into the array. You will have to make sure the
199ae16ab9dSAxel Dörfler	array is large enough to contain the entry before calling this function.
200ae16ab9dSAxel Dörfler*/
201ae16ab9dSAxel Dörflervoid
20202c8f6c8SAxel Dörflerrun_array::Insert(block_run& run)
203ae16ab9dSAxel Dörfler{
204ae16ab9dSAxel Dörfler	int32 index = _FindInsertionIndex(run);
205ae16ab9dSAxel Dörfler	if (index == -1) {
206ae16ab9dSAxel Dörfler		// add to the end
207ae16ab9dSAxel Dörfler		runs[CountRuns()] = run;
208ae16ab9dSAxel Dörfler	} else {
209ae16ab9dSAxel Dörfler		// insert at index
210ae16ab9dSAxel Dörfler		memmove(&runs[index + 1], &runs[index],
211ae16ab9dSAxel Dörfler			(CountRuns() - index) * sizeof(off_t));
212ae16ab9dSAxel Dörfler		runs[index] = run;
213ae16ab9dSAxel Dörfler	}
214ae16ab9dSAxel Dörfler
215ae16ab9dSAxel Dörfler	count = HOST_ENDIAN_TO_BFS_INT32(CountRuns() + 1);
216ae16ab9dSAxel Dörfler}
217ae16ab9dSAxel Dörfler
218ae16ab9dSAxel Dörfler
219f4260712SAxel Dörfler/*static*/ int32
220f4260712SAxel Dörflerrun_array::MaxRuns(int32 blockSize)
221f4260712SAxel Dörfler{
222f4260712SAxel Dörfler	// For whatever reason, BFS restricts the maximum array size
223f4260712SAxel Dörfler	uint32 maxCount = (blockSize - sizeof(run_array)) / sizeof(block_run);
224f4260712SAxel Dörfler	if (maxCount < 128)
225f4260712SAxel Dörfler		return maxCount;
226f4260712SAxel Dörfler
227f4260712SAxel Dörfler	return 127;
228f4260712SAxel Dörfler}
229f4260712SAxel Dörfler
230f4260712SAxel Dörfler
231ae16ab9dSAxel Dörfler/*static*/ int
23202c8f6c8SAxel Dörflerrun_array::_Compare(block_run& a, block_run& b)
233ae16ab9dSAxel Dörfler{
234ae16ab9dSAxel Dörfler	int cmp = a.AllocationGroup() - b.AllocationGroup();
235ae16ab9dSAxel Dörfler	if (cmp == 0)
236ae16ab9dSAxel Dörfler		return a.Start() - b.Start();
237ae16ab9dSAxel Dörfler
238ae16ab9dSAxel Dörfler	return cmp;
239ae16ab9dSAxel Dörfler}
240ae16ab9dSAxel Dörfler
241ae16ab9dSAxel Dörfler
242ae16ab9dSAxel Dörflerint32
24302c8f6c8SAxel Dörflerrun_array::_FindInsertionIndex(block_run& run)
244ae16ab9dSAxel Dörfler{
245ae16ab9dSAxel Dörfler	int32 min = 0, max = CountRuns() - 1;
246ae16ab9dSAxel Dörfler	int32 i = 0;
247ae16ab9dSAxel Dörfler	if (max >= 8) {
248ae16ab9dSAxel Dörfler		while (min <= max) {
249ae16ab9dSAxel Dörfler			i = (min + max) / 2;
250ae16ab9dSAxel Dörfler
251ae16ab9dSAxel Dörfler			int cmp = _Compare(runs[i], run);
252ae16ab9dSAxel Dörfler			if (cmp < 0)
253ae16ab9dSAxel Dörfler				min = i + 1;
254ae16ab9dSAxel Dörfler			else if (cmp > 0)
255ae16ab9dSAxel Dörfler				max = i - 1;
256ae16ab9dSAxel Dörfler			else
257ae16ab9dSAxel Dörfler				return -1;
258ae16ab9dSAxel Dörfler		}
259ae16ab9dSAxel Dörfler
260ae16ab9dSAxel Dörfler		if (_Compare(runs[i], run) < 0)
261ae16ab9dSAxel Dörfler			i++;
262ae16ab9dSAxel Dörfler	} else {
263ae16ab9dSAxel Dörfler		for (; i <= max; i++) {
264ae16ab9dSAxel Dörfler			if (_Compare(runs[i], run) > 0)
265ae16ab9dSAxel Dörfler				break;
266ae16ab9dSAxel Dörfler		}
267ae16ab9dSAxel Dörfler		if (i == count)
268ae16ab9dSAxel Dörfler			return -1;
269ae16ab9dSAxel Dörfler	}
270ae16ab9dSAxel Dörfler
271ae16ab9dSAxel Dörfler	return i;
272ae16ab9dSAxel Dörfler}
273ae16ab9dSAxel Dörfler
274ae16ab9dSAxel Dörfler
275ae16ab9dSAxel Dörfler//	#pragma mark - RunArrays
276a59b9affSAxel Dörfler
277a59b9affSAxel Dörfler
27802c8f6c8SAxel DörflerRunArrays::RunArrays(Journal* journal)
279a59b9affSAxel Dörfler	:
280a59b9affSAxel Dörfler	fJournal(journal),
281d89cda9fSAxel Dörfler	fBlockCount(0),
282a59b9affSAxel Dörfler	fArrays(),
283a59b9affSAxel Dörfler	fLastArray(NULL)
284a59b9affSAxel Dörfler{
285a59b9affSAxel Dörfler}
286a59b9affSAxel Dörfler
287a59b9affSAxel Dörfler
288a59b9affSAxel DörflerRunArrays::~RunArrays()
289a59b9affSAxel Dörfler{
29002c8f6c8SAxel Dörfler	run_array* array;
291a59b9affSAxel Dörfler	while (fArrays.Pop(&array))
292a59b9affSAxel Dörfler		free(array);
293a59b9affSAxel Dörfler}
294a59b9affSAxel Dörfler
295a59b9affSAxel Dörfler
296a59b9affSAxel Dörflerbool
29702c8f6c8SAxel DörflerRunArrays::_ContainsRun(block_run& run)
298a59b9affSAxel Dörfler{
299a59b9affSAxel Dörfler	for (int32 i = 0; i < CountArrays(); i++) {
30002c8f6c8SAxel Dörfler		run_array* array = ArrayAt(i);
301a59b9affSAxel Dörfler
302a59b9affSAxel Dörfler		for (int32 j = 0; j < array->CountRuns(); j++) {
30302c8f6c8SAxel Dörfler			block_run& arrayRun = array->runs[j];
304a59b9affSAxel Dörfler			if (run.AllocationGroup() != arrayRun.AllocationGroup())
305a59b9affSAxel Dörfler				continue;
306a59b9affSAxel Dörfler
307a59b9affSAxel Dörfler			if (run.Start() >= arrayRun.Start()
3086edf637bSAxel Dörfler				&& run.Start() + run.Length()
3096edf637bSAxel Dörfler					<= arrayRun.Start() + arrayRun.Length())
310a59b9affSAxel Dörfler				return true;
311a59b9affSAxel Dörfler		}
312a59b9affSAxel Dörfler	}
313a59b9affSAxel Dörfler
314a59b9affSAxel Dörfler	return false;
315a59b9affSAxel Dörfler}
316a59b9affSAxel Dörfler
317a59b9affSAxel Dörfler
3186edf637bSAxel Dörfler/*!	Adds the specified block_run into the array.
3196edf637bSAxel Dörfler	Note: it doesn't support overlapping - it must only be used
3206edf637bSAxel Dörfler	with block_runs of length 1!
3216edf637bSAxel Dörfler*/
322a59b9affSAxel Dörflerbool
32302c8f6c8SAxel DörflerRunArrays::_AddRun(block_run& run)
324a59b9affSAxel Dörfler{
325a59b9affSAxel Dörfler	ASSERT(run.length == 1);
326a59b9affSAxel Dörfler
327e620c932SAxel Dörfler	// Be's BFS log replay routine can only deal with block_runs of size 1
328e620c932SAxel Dörfler	// A pity, isn't it? Too sad we have to be compatible.
329a59b9affSAxel Dörfler
330a59b9affSAxel Dörfler	if (fLastArray == NULL || fLastArray->CountRuns() == fLastArray->MaxRuns())
331a59b9affSAxel Dörfler		return false;
332a59b9affSAxel Dörfler
333ae16ab9dSAxel Dörfler	fLastArray->Insert(run);
334d89cda9fSAxel Dörfler	fBlockCount++;
335a59b9affSAxel Dörfler	return true;
336a59b9affSAxel Dörfler}
337a59b9affSAxel Dörfler
338a59b9affSAxel Dörfler
339a59b9affSAxel Dörflerstatus_t
340a59b9affSAxel DörflerRunArrays::_AddArray()
341a59b9affSAxel Dörfler{
342a59b9affSAxel Dörfler	int32 blockSize = fJournal->GetVolume()->BlockSize();
343ae16ab9dSAxel Dörfler
34402c8f6c8SAxel Dörfler	run_array* array = (run_array*)malloc(blockSize);
345a59b9affSAxel Dörfler	if (array == NULL)
346a59b9affSAxel Dörfler		return B_NO_MEMORY;
347a59b9affSAxel Dörfler
348a59b9affSAxel Dörfler	if (fArrays.Push(array) != B_OK) {
349a59b9affSAxel Dörfler		free(array);
350a59b9affSAxel Dörfler		return B_NO_MEMORY;
351a59b9affSAxel Dörfler	}
352a59b9affSAxel Dörfler
353ae16ab9dSAxel Dörfler	array->Init(blockSize);
354a59b9affSAxel Dörfler	fLastArray = array;
355a59b9affSAxel Dörfler	return B_OK;
356a59b9affSAxel Dörfler}
357a59b9affSAxel Dörfler
358a59b9affSAxel Dörfler
359a59b9affSAxel Dörflerstatus_t
360a59b9affSAxel DörflerRunArrays::Insert(off_t blockNumber)
361a59b9affSAxel Dörfler{
36202c8f6c8SAxel Dörfler	Volume* volume = fJournal->GetVolume();
363a59b9affSAxel Dörfler	block_run run = volume->ToBlockRun(blockNumber);
364a59b9affSAxel Dörfler
365a59b9affSAxel Dörfler	if (fLastArray != NULL) {
366a59b9affSAxel Dörfler		// check if the block is already in the array
367a59b9affSAxel Dörfler		if (_ContainsRun(run))
368a59b9affSAxel Dörfler			return B_OK;
369a59b9affSAxel Dörfler	}
370a59b9affSAxel Dörfler
371a59b9affSAxel Dörfler	// insert block into array
372a59b9affSAxel Dörfler
373a59b9affSAxel Dörfler	if (!_AddRun(run)) {
374a59b9affSAxel Dörfler		// array is full
375ae16ab9dSAxel Dörfler		if (_AddArray() != B_OK || !_AddRun(run))
376a59b9affSAxel Dörfler			return B_NO_MEMORY;
377a59b9affSAxel Dörfler	}
378a59b9affSAxel Dörfler
379a59b9affSAxel Dörfler	return B_OK;
380a59b9affSAxel Dörfler}
381a59b9affSAxel Dörfler
382a59b9affSAxel Dörfler
383a59b9affSAxel Dörflerint32
384a59b9affSAxel DörflerRunArrays::MaxArrayLength()
385a59b9affSAxel Dörfler{
386a59b9affSAxel Dörfler	int32 max = 0;
387a59b9affSAxel Dörfler	for (int32 i = 0; i < CountArrays(); i++) {
388ae16ab9dSAxel Dörfler		if (ArrayAt(i)->CountRuns() > max)
389ae16ab9dSAxel Dörfler			max = ArrayAt(i)->CountRuns();
390a59b9affSAxel Dörfler	}
391a59b9affSAxel Dörfler
392a59b9affSAxel Dörfler	return max;
393a59b9affSAxel Dörfler}
394a59b9affSAxel Dörfler
395a59b9affSAxel Dörfler
396ae16ab9dSAxel Dörfler//	#pragma mark - Journal
397a59b9affSAxel Dörfler
398c0614f32SAxel Dörfler
39902c8f6c8SAxel DörflerJournal::Journal(Volume* volume)
400c42ee134SAxel Dörfler	:
401c42ee134SAxel Dörfler	fVolume(volume),
402c42ee134SAxel Dörfler	fOwner(NULL),
403eeb28bb7SAxel Dörfler	fLogSize(volume->Log().Length()),
4045d0afa4eSAxel Dörfler	fMaxTransactionSize(fLogSize / 2 - 5),
405c42ee134SAxel Dörfler	fUsed(0),
406923e3aafSAxel Dörfler	fUnwrittenTransactions(0),
4076c3348f1SAxel Dörfler	fHasSubtransaction(false),
4086c3348f1SAxel Dörfler	fSeparateSubTransactions(false)
409c42ee134SAxel Dörfler{
41003fa417bSAxel Dörfler	recursive_lock_init(&fLock, "bfs journal");
41103fa417bSAxel Dörfler	mutex_init(&fEntriesLock, "bfs journal entries");
41214b62ae4SAugustin Cavalier
41314b62ae4SAugustin Cavalier	fLogFlusherSem = create_sem(0, "bfs log flusher");
41414b62ae4SAugustin Cavalier	fLogFlusher = spawn_kernel_thread(&Journal::_LogFlusher, "bfs log flusher",
41514b62ae4SAugustin Cavalier		B_NORMAL_PRIORITY, this);
41614b62ae4SAugustin Cavalier	if (fLogFlusher > 0)
41714b62ae4SAugustin Cavalier		resume_thread(fLogFlusher);
418c42ee134SAxel Dörfler}
419c42ee134SAxel Dörfler
420c42ee134SAxel Dörfler
421c42ee134SAxel DörflerJournal::~Journal()
422c42ee134SAxel Dörfler{
423c42ee134SAxel Dörfler	FlushLogAndBlocks();
42403fa417bSAxel Dörfler
42503fa417bSAxel Dörfler	recursive_lock_destroy(&fLock);
42603fa417bSAxel Dörfler	mutex_destroy(&fEntriesLock);
42714b62ae4SAugustin Cavalier
42814b62ae4SAugustin Cavalier	sem_id logFlusher = fLogFlusherSem;
42914b62ae4SAugustin Cavalier	fLogFlusherSem = -1;
43014b62ae4SAugustin Cavalier	delete_sem(logFlusher);
43114b62ae4SAugustin Cavalier	wait_for_thread(fLogFlusher, NULL);
432c42ee134SAxel Dörfler}
433c42ee134SAxel Dörfler
434c42ee134SAxel Dörfler
435c42ee134SAxel Dörflerstatus_t
436c42ee134SAxel DörflerJournal::InitCheck()
437c42ee134SAxel Dörfler{
43803fa417bSAxel Dörfler	return B_OK;
439c42ee134SAxel Dörfler}
440c42ee134SAxel Dörfler
441c42ee134SAxel Dörfler
442772506f8SAxel Dörfler/*!	\brief Does a very basic consistency check of the run array.
443772506f8SAxel Dörfler	It will check the maximum run count as well as if all of the runs fall
444772506f8SAxel Dörfler	within a the volume.
445772506f8SAxel Dörfler*/
446c42ee134SAxel Dörflerstatus_t
44702c8f6c8SAxel DörflerJournal::_CheckRunArray(const run_array* array)
448c42ee134SAxel Dörfler{
449923e3aafSAxel Dörfler	int32 maxRuns = run_array::MaxRuns(fVolume->BlockSize()) - 1;
450923e3aafSAxel Dörfler		// the -1 works around an off-by-one bug in Be's BFS implementation,
451923e3aafSAxel Dörfler		// same as in run_array::MaxRuns()
452a59b9affSAxel Dörfler	if (array->MaxRuns() != maxRuns
453a59b9affSAxel Dörfler		|| array->CountRuns() > maxRuns
454a59b9affSAxel Dörfler		|| array->CountRuns() <= 0) {
455c391f84bSIngo Weinhold		dprintf("run count: %d, array max: %d, max runs: %d\n",
456c391f84bSIngo Weinhold			(int)array->CountRuns(), (int)array->MaxRuns(), (int)maxRuns);
457a59b9affSAxel Dörfler		FATAL(("Log entry has broken header!\n"));
458a59b9affSAxel Dörfler		return B_ERROR;
459a59b9affSAxel Dörfler	}
460a59b9affSAxel Dörfler
461a59b9affSAxel Dörfler	for (int32 i = 0; i < array->CountRuns(); i++) {
462a59b9affSAxel Dörfler		if (fVolume->ValidateBlockRun(array->RunAt(i)) != B_OK)
463a59b9affSAxel Dörfler			return B_ERROR;
464a59b9affSAxel Dörfler	}
465a59b9affSAxel Dörfler
46657808587SAxel Dörfler	PRINT(("Log entry has %ld entries\n", array->CountRuns()));
467c42ee134SAxel Dörfler	return B_OK;
468c42ee134SAxel Dörfler}
469c42ee134SAxel Dörfler
470c42ee134SAxel Dörfler
4716edf637bSAxel Dörfler/*!	Replays an entry in the log.
4726edf637bSAxel Dörfler	\a _start points to the entry in the log, and will be bumped to the next
4736edf637bSAxel Dörfler	one if replaying succeeded.
4746edf637bSAxel Dörfler*/
475c42ee134SAxel Dörflerstatus_t
47602c8f6c8SAxel DörflerJournal::_ReplayRunArray(int32* _start)
477c42ee134SAxel Dörfler{
478a59b9affSAxel Dörfler	PRINT(("ReplayRunArray(start = %ld)\n", *_start));
479c42ee134SAxel Dörfler
480c42ee134SAxel Dörfler	off_t logOffset = fVolume->ToBlock(fVolume->Log());
481772506f8SAxel Dörfler	off_t firstBlockNumber = *_start % fLogSize;
482c42ee134SAxel Dörfler
483a59b9affSAxel Dörfler	CachedBlock cachedArray(fVolume);
484fb02804aSAxel Dörfler
48502c8f6c8SAxel Dörfler	const run_array* array = (const run_array*)cachedArray.SetTo(logOffset
486772506f8SAxel Dörfler		+ firstBlockNumber);
487a59b9affSAxel Dörfler	if (array == NULL)
488a59b9affSAxel Dörfler		return B_IO_ERROR;
489c42ee134SAxel Dörfler
490a59b9affSAxel Dörfler	if (_CheckRunArray(array) < B_OK)
491a59b9affSAxel Dörfler		return B_BAD_DATA;
492c42ee134SAxel Dörfler
493772506f8SAxel Dörfler	// First pass: check integrity of the blocks in the run array
494c42ee134SAxel Dörfler
495a59b9affSAxel Dörfler	CachedBlock cached(fVolume);
496772506f8SAxel Dörfler
497772506f8SAxel Dörfler	firstBlockNumber = (firstBlockNumber + 1) % fLogSize;
498772506f8SAxel Dörfler	off_t blockNumber = firstBlockNumber;
499772506f8SAxel Dörfler	int32 blockSize = fVolume->BlockSize();
500772506f8SAxel Dörfler
501a59b9affSAxel Dörfler	for (int32 index = 0; index < array->CountRuns(); index++) {
50202c8f6c8SAxel Dörfler		const block_run& run = array->RunAt(index);
503a59b9affSAxel Dörfler
504a59b9affSAxel Dörfler		off_t offset = fVolume->ToOffset(run);
505a59b9affSAxel Dörfler		for (int32 i = 0; i < run.Length(); i++) {
50602c8f6c8SAxel Dörfler			const uint8* data = cached.SetTo(logOffset + blockNumber);
507a59b9affSAxel Dörfler			if (data == NULL)
508c42ee134SAxel Dörfler				RETURN_ERROR(B_IO_ERROR);
509c42ee134SAxel Dörfler
5100dc57dbeSAxel Dörfler			// TODO: eventually check other well known offsets, like the
5110dc57dbeSAxel Dörfler			// root and index dirs
5120dc57dbeSAxel Dörfler			if (offset == 0) {
51346cf7a5aSPrzemysław Buczkowski				// This log entry writes over the superblock - check if
5140dc57dbeSAxel Dörfler				// it's valid!
515f4260712SAxel Dörfler				if (Volume::CheckSuperBlock(data) != B_OK) {
51646cf7a5aSPrzemysław Buczkowski					FATAL(("Log contains invalid superblock!\n"));
5170dc57dbeSAxel Dörfler					RETURN_ERROR(B_BAD_DATA);
518f4260712SAxel Dörfler				}
5190dc57dbeSAxel Dörfler			}
5200dc57dbeSAxel Dörfler
521772506f8SAxel Dörfler			blockNumber = (blockNumber + 1) % fLogSize;
522772506f8SAxel Dörfler			offset += blockSize;
523772506f8SAxel Dörfler		}
524772506f8SAxel Dörfler	}
525772506f8SAxel Dörfler
526772506f8SAxel Dörfler	// Second pass: write back its blocks
527772506f8SAxel Dörfler
528772506f8SAxel Dörfler	blockNumber = firstBlockNumber;
529772506f8SAxel Dörfler	int32 count = 1;
530772506f8SAxel Dörfler
531772506f8SAxel Dörfler	for (int32 index = 0; index < array->CountRuns(); index++) {
53202c8f6c8SAxel Dörfler		const block_run& run = array->RunAt(index);
5338859eeabSIngo Weinhold		INFORM(("replay block run %u:%u:%u in log at %" B_PRIdOFF "!\n",
534772506f8SAxel Dörfler			(int)run.AllocationGroup(), run.Start(), run.Length(), blockNumber));
535772506f8SAxel Dörfler
536772506f8SAxel Dörfler		off_t offset = fVolume->ToOffset(run);
537772506f8SAxel Dörfler		for (int32 i = 0; i < run.Length(); i++) {
53802c8f6c8SAxel Dörfler			const uint8* data = cached.SetTo(logOffset + blockNumber);
539772506f8SAxel Dörfler			if (data == NULL)
540772506f8SAxel Dörfler				RETURN_ERROR(B_IO_ERROR);
541772506f8SAxel Dörfler
5420dc57dbeSAxel Dörfler			ssize_t written = write_pos(fVolume->Device(), offset, data,
5430dc57dbeSAxel Dörfler				blockSize);
544c42ee134SAxel Dörfler			if (written != blockSize)
545c42ee134SAxel Dörfler				RETURN_ERROR(B_IO_ERROR);
546c42ee134SAxel Dörfler
547c42ee134SAxel Dörfler			blockNumber = (blockNumber + 1) % fLogSize;
5480dc57dbeSAxel Dörfler			offset += blockSize;
549a59b9affSAxel Dörfler			count++;
550c42ee134SAxel Dörfler		}
551c42ee134SAxel Dörfler	}
552a59b9affSAxel Dörfler
553a59b9affSAxel Dörfler	*_start += count;
554c42ee134SAxel Dörfler	return B_OK;
555c42ee134SAxel Dörfler}
556c42ee134SAxel Dörfler
557c42ee134SAxel Dörfler
5586edf637bSAxel Dörfler/*!	Replays all log entries - this will put the disk into a
5596edf637bSAxel Dörfler	consistent and clean state, if it was not correctly unmounted
5606edf637bSAxel Dörfler	before.
5616edf637bSAxel Dörfler	This method is called by Journal::InitCheck() if the log start
5626edf637bSAxel Dörfler	and end pointer don't match.
5636edf637bSAxel Dörfler*/
564c42ee134SAxel Dörflerstatus_t
565c42ee134SAxel DörflerJournal::ReplayLog()
566c42ee134SAxel Dörfler{
56764b7ef1dSAxel Dörfler	// TODO: this logic won't work whenever the size of the pending transaction
56864b7ef1dSAxel Dörfler	//	equals the size of the log (happens with the original BFS only)
56964b7ef1dSAxel Dörfler	if (fVolume->LogStart() == fVolume->LogEnd())
57064b7ef1dSAxel Dörfler		return B_OK;
57164b7ef1dSAxel Dörfler
572c42ee134SAxel Dörfler	INFORM(("Replay log, disk was not correctly unmounted...\n"));
573c42ee134SAxel Dörfler
57466ae5b2dSAxel Dörfler	if (fVolume->SuperBlock().flags != SUPER_BLOCK_DISK_DIRTY) {
57566ae5b2dSAxel Dörfler		INFORM(("log_start and log_end differ, but disk is marked clean - "
57666ae5b2dSAxel Dörfler			"trying to replay log...\n"));
57766ae5b2dSAxel Dörfler	}
57866ae5b2dSAxel Dörfler
57966ae5b2dSAxel Dörfler	if (fVolume->IsReadOnly())
58066ae5b2dSAxel Dörfler		return B_READ_ONLY_DEVICE;
58164b7ef1dSAxel Dörfler
582c42ee134SAxel Dörfler	int32 start = fVolume->LogStart();
583c42ee134SAxel Dörfler	int32 lastStart = -1;
584c42ee134SAxel Dörfler	while (true) {
585c42ee134SAxel Dörfler		// stop if the log is completely flushed
586c42ee134SAxel Dörfler		if (start == fVolume->LogEnd())
587c42ee134SAxel Dörfler			break;
588c42ee134SAxel Dörfler
589c42ee134SAxel Dörfler		if (start == lastStart) {
590c42ee134SAxel Dörfler			// strange, flushing the log hasn't changed the log_start pointer
591c42ee134SAxel Dörfler			return B_ERROR;
592c42ee134SAxel Dörfler		}
593c42ee134SAxel Dörfler		lastStart = start;
594c42ee134SAxel Dörfler
595a59b9affSAxel Dörfler		status_t status = _ReplayRunArray(&start);
59666ae5b2dSAxel Dörfler		if (status != B_OK) {
59766ae5b2dSAxel Dörfler			FATAL(("replaying log entry from %d failed: %s\n", (int)start,
59866ae5b2dSAxel Dörfler				strerror(status)));
599c42ee134SAxel Dörfler			return B_ERROR;
600c42ee134SAxel Dörfler		}
601c42ee134SAxel Dörfler		start = start % fLogSize;
602c42ee134SAxel Dörfler	}
603f4260712SAxel Dörfler
604c42ee134SAxel Dörfler	PRINT(("replaying worked fine!\n"));
605797a92d8SAxel Dörfler	fVolume->SuperBlock().log_start = HOST_ENDIAN_TO_BFS_INT64(
606797a92d8SAxel Dörfler		fVolume->LogEnd());
607797a92d8SAxel Dörfler	fVolume->LogStart() = HOST_ENDIAN_TO_BFS_INT64(fVolume->LogEnd());
608add77fd6SAxel Dörfler	fVolume->SuperBlock().flags = HOST_ENDIAN_TO_BFS_INT32(
609add77fd6SAxel Dörfler		SUPER_BLOCK_DISK_CLEAN);
610c42ee134SAxel Dörfler
611c42ee134SAxel Dörfler	return fVolume->WriteSuperBlock();
612c42ee134SAxel Dörfler}
613c42ee134SAxel Dörfler
614c42ee134SAxel Dörfler
6154e643107SAxel Dörflersize_t
6164e643107SAxel DörflerJournal::CurrentTransactionSize() const
6174e643107SAxel Dörfler{
6184e643107SAxel Dörfler	if (_HasSubTransaction()) {
6194e643107SAxel Dörfler		return cache_blocks_in_sub_transaction(fVolume->BlockCache(),
6204e643107SAxel Dörfler			fTransactionID);
6214e643107SAxel Dörfler	}
6224e643107SAxel Dörfler
6234e643107SAxel Dörfler	return cache_blocks_in_main_transaction(fVolume->BlockCache(),
6244e643107SAxel Dörfler		fTransactionID);
6254e643107SAxel Dörfler}
6264e643107SAxel Dörfler
6274e643107SAxel Dörfler
6284e643107SAxel Dörflerbool
6294e643107SAxel DörflerJournal::CurrentTransactionTooLarge() const
6304e643107SAxel Dörfler{
6314e643107SAxel Dörfler	return CurrentTransactionSize() > fLogSize;
6324e643107SAxel Dörfler}
6334e643107SAxel Dörfler
6344e643107SAxel Dörfler
6356edf637bSAxel Dörfler/*!	This is a callback function that is called by the cache, whenever
63698e10fd4SAxel Dörfler	all blocks of a transaction have been flushed to disk.
63798e10fd4SAxel Dörfler	This lets us keep track of completed transactions, and update
63898e10fd4SAxel Dörfler	the log start pointer as needed. Note, the transactions may not be
63998e10fd4SAxel Dörfler	completed in the order they were written.
6406edf637bSAxel Dörfler*/
6413c78c045SAxel Dörfler/*static*/ void
64202c8f6c8SAxel DörflerJournal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry)
643c42ee134SAxel Dörfler{
64402c8f6c8SAxel Dörfler	LogEntry* logEntry = (LogEntry*)_logEntry;
645c42ee134SAxel Dörfler
64698e10fd4SAxel Dörfler	PRINT(("Log entry %p has been finished, transaction ID = %ld\n", logEntry,
64798e10fd4SAxel Dörfler		transactionID));
64892a076fcSAxel Dörfler
64902c8f6c8SAxel Dörfler	Journal* journal = logEntry->GetJournal();
65002c8f6c8SAxel Dörfler	disk_super_block& superBlock = journal->fVolume->SuperBlock();
651c42ee134SAxel Dörfler	bool update = false;
652c42ee134SAxel Dörfler
653c42ee134SAxel Dörfler	// Set log_start pointer if possible...
654c42ee134SAxel Dörfler
65503fa417bSAxel Dörfler	mutex_lock(&journal->fEntriesLock);
656a59b9affSAxel Dörfler
657c0614f32SAxel Dörfler	if (logEntry == journal->fEntries.First()) {
65802c8f6c8SAxel Dörfler		LogEntry* next = journal->fEntries.GetNext(logEntry);
659797a92d8SAxel Dörfler		if (next != NULL) {
660797a92d8SAxel Dörfler			superBlock.log_start = HOST_ENDIAN_TO_BFS_INT64(next->Start()
661797a92d8SAxel Dörfler				% journal->fLogSize);
662797a92d8SAxel Dörfler		} else {
663797a92d8SAxel Dörfler			superBlock.log_start = HOST_ENDIAN_TO_BFS_INT64(
664797a92d8SAxel Dörfler				journal->fVolume->LogEnd());
665797a92d8SAxel Dörfler		}
666c42ee134SAxel Dörfler
667c42ee134SAxel Dörfler		update = true;
668c42ee134SAxel Dörfler	}
669c42ee134SAxel Dörfler
670797a92d8SAxel Dörfler	T(LogEntry(logEntry, superBlock.LogStart(), false));