1/*
2 * Copyright 2001-2017, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5
6
7//! Inode access functions
8
9
10#include "Debug.h"
11#include "Inode.h"
12#include "BPlusTree.h"
13#include "Index.h"
14
15
16#if BFS_TRACING && !defined(FS_SHELL) && !defined(_BOOT_MODE)
17namespace BFSInodeTracing {
18
19class Create : public AbstractTraceEntry {
20public:
21	Create(Inode* inode, Inode* parent, const char* name, int32 mode,
22			int openMode, uint32 type)
23		:
24		fInode(inode),
25		fID(inode->ID()),
26		fParent(parent),
27		fParentID(parent != NULL ? parent->ID() : 0),
28		fMode(mode),
29		fOpenMode(openMode),
30		fType(type)
31	{
32		if (name != NULL)
33			strlcpy(fName, name, sizeof(fName));
34		else
35			fName[0] = '\0';
36
37		Initialized();
38	}
39
40	virtual void AddDump(TraceOutput& out)
41	{
42		out.Print("bfs:Create %Ld (%p), parent %Ld (%p), \"%s\", "
43			"mode %lx, omode %x, type %lx", fID, fInode, fParentID,
44			fParent, fName, fMode, fOpenMode, fType);
45	}
46
47private:
48	Inode*	fInode;
49	ino_t	fID;
50	Inode*	fParent;
51	ino_t	fParentID;
52	char	fName[32];
53	int32	fMode;
54	int		fOpenMode;
55	uint32	fType;
56};
57
58class Remove : public AbstractTraceEntry {
59public:
60	Remove(Inode* inode, const char* name)
61		:
62		fInode(inode),
63		fID(inode->ID())
64	{
65		strlcpy(fName, name, sizeof(fName));
66		Initialized();
67	}
68
69	virtual void AddDump(TraceOutput& out)
70	{
71		out.Print("bfs:Remove %Ld (%p), \"%s\"", fID, fInode, fName);
72	}
73
74private:
75	Inode*	fInode;
76	ino_t	fID;
77	char	fName[32];
78};
79
80class Action : public AbstractTraceEntry {
81public:
82	Action(const char* action, Inode* inode)
83		:
84		fInode(inode),
85		fID(inode->ID())
86	{
87		strlcpy(fAction, action, sizeof(fAction));
88		Initialized();
89	}
90
91	virtual void AddDump(TraceOutput& out)
92	{
93		out.Print("bfs:%s %Ld (%p)\n", fAction, fID, fInode);
94	}
95
96private:
97	Inode*	fInode;
98	ino_t	fID;
99	char	fAction[16];
100};
101
102class Resize : public AbstractTraceEntry {
103public:
104	Resize(Inode* inode, off_t oldSize, off_t newSize, bool trim)
105		:
106		fInode(inode),
107		fID(inode->ID()),
108		fOldSize(oldSize),
109		fNewSize(newSize),
110		fTrim(trim)
111	{
112		Initialized();
113	}
114
115	virtual void AddDump(TraceOutput& out)
116	{
117		out.Print("bfs:%s %Ld (%p), %Ld -> %Ld", fTrim ? "Trim" : "Resize",
118			fID, fInode, fOldSize, fNewSize);
119	}
120
121private:
122	Inode*	fInode;
123	ino_t	fID;
124	off_t	fOldSize;
125	off_t	fNewSize;
126	bool	fTrim;
127};
128
129}	// namespace BFSInodeTracing
130
131#	define T(x) new(std::nothrow) BFSInodeTracing::x;
132#else
133#	define T(x) ;
134#endif
135
136
137/*!	A helper class used by Inode::Create() to keep track of the belongings
138	of an inode creation in progress.
139	This class will make sure everything is cleaned up properly.
140*/
141class InodeAllocator {
142public:
143							InodeAllocator(Transaction& transaction);
144							~InodeAllocator();
145
146			status_t		New(block_run* parentRun, mode_t mode, uint32 flags,
147								block_run& run, fs_vnode_ops* vnodeOps,
148								Inode** _inode);
149			status_t		CreateTree();
150			status_t		Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags);
151
152private:
153	static	void			_TransactionListener(int32 id, int32 event,
154								void* _inode);
155
156			Transaction*	fTransaction;
157			block_run		fRun;
158			Inode*			fInode;
159};
160
161
162InodeAllocator::InodeAllocator(Transaction& transaction)
163	:
164	fTransaction(&transaction),
165	fInode(NULL)
166{
167}
168
169
170InodeAllocator::~InodeAllocator()
171{
172	if (fTransaction != NULL) {
173		Volume* volume = fTransaction->GetVolume();
174
175		if (fInode != NULL) {
176			fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
177				// this unblocks any pending bfs_read_vnode() calls
178			fInode->Free(*fTransaction);
179
180			if (fInode->fTree != NULL)
181				fTransaction->RemoveListener(fInode->fTree);
182			fTransaction->RemoveListener(fInode);
183
184			remove_vnode(volume->FSVolume(), fInode->ID());
185		} else
186			volume->Free(*fTransaction, fRun);
187	}
188
189	delete fInode;
190}
191
192
193status_t
194InodeAllocator::New(block_run* parentRun, mode_t mode, uint32 publishFlags,
195	block_run& run, fs_vnode_ops* vnodeOps, Inode** _inode)
196{
197	Volume* volume = fTransaction->GetVolume();
198
199	status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode,
200		fRun);
201	if (status < B_OK) {
202		// don't free the space in the destructor, because
203		// the allocation failed
204		fTransaction = NULL;
205		RETURN_ERROR(status);
206	}
207
208	run = fRun;
209	fInode = new(std::nothrow) Inode(volume, *fTransaction,
210		volume->ToVnode(run), mode, run);
211	if (fInode == NULL)
212		RETURN_ERROR(B_NO_MEMORY);
213
214	if (!volume->IsInitializing()
215		&& (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
216		status = new_vnode(volume->FSVolume(), fInode->ID(), fInode,
217			vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps);
218		if (status < B_OK) {
219			delete fInode;
220			fInode = NULL;
221			RETURN_ERROR(status);
222		}
223	}
224
225	fInode->WriteLockInTransaction(*fTransaction);
226	*_inode = fInode;
227	return B_OK;
228}
229
230
231status_t
232InodeAllocator::CreateTree()
233{
234	Volume* volume = fTransaction->GetVolume();
235
236	// force S_STR_INDEX to be set, if no type is set
237	if ((fInode->Mode() & S_INDEX_TYPES) == 0)
238		fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX);
239
240	BPlusTree* tree = new(std::nothrow) BPlusTree(*fTransaction, fInode);
241	if (tree == NULL)
242		return B_ERROR;
243
244	status_t status = tree->InitCheck();
245	if (status != B_OK) {
246		delete tree;
247		return status;
248	}
249
250	fInode->fTree = tree;
251
252	if (fInode->IsRegularNode()) {
253		if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK
254			|| tree->Insert(*fTransaction, "..",
255					volume->ToVnode(fInode->Parent())) < B_OK)
256			return B_ERROR;
257	}
258	return B_OK;
259}
260
261
262status_t
263InodeAllocator::Keep(fs_vnode_ops* vnodeOps, uint32 publishFlags)
264{
265	ASSERT(fInode != NULL && fTransaction != NULL);
266	Volume* volume = fTransaction->GetVolume();
267
268	status_t status = fInode->WriteBack(*fTransaction);
269	if (status < B_OK) {
270		FATAL(("writing new inode %" B_PRIdINO " failed!\n", fInode->ID()));
271		return status;
272	}
273
274	// Symbolic links are not published -- the caller needs to do this once
275	// the contents have been written.
276	if (!fInode->IsSymLink() && !volume->IsInitializing()
277		&& (publishFlags & BFS_DO_NOT_PUBLISH_VNODE) == 0) {
278		status = publish_vnode(volume->FSVolume(), fInode->ID(), fInode,
279			vnodeOps != NULL ? vnodeOps : &gBFSVnodeOps, fInode->Mode(),
280			publishFlags);
281	}
282
283	if (status == B_OK) {
284		cache_add_transaction_listener(volume->BlockCache(), fTransaction->ID(),
285			TRANSACTION_ABORTED, &_TransactionListener, fInode);
286	}
287
288	fTransaction = NULL;
289	fInode = NULL;
290
291	return status;
292}
293
294
295/*static*/ void
296InodeAllocator::_TransactionListener(int32 id, int32 event, void* _inode)
297{
298	Inode* inode = (Inode*)_inode;
299
300	if (event == TRANSACTION_ABORTED)
301		panic("transaction %d aborted, inode %p still around!\n", (int)id, inode);
302}
303
304
305//	#pragma mark -
306
307
308status_t
309bfs_inode::InitCheck(Volume* volume) const
310{
311	if (Magic1() != INODE_MAGIC1
312		|| !(Flags() & INODE_IN_USE)
313		|| inode_num.Length() != 1
314		// matches inode size?
315		|| (uint32)InodeSize() != volume->InodeSize()
316		// parent resides on disk?
317		|| parent.AllocationGroup() > int32(volume->AllocationGroups())
318		|| parent.AllocationGroup() < 0
319		|| parent.Start() > (1L << volume->AllocationGroupShift())
320		|| parent.Length() != 1
321		// attributes, too?
322		|| attributes.AllocationGroup() > int32(volume->AllocationGroups())
323		|| attributes.AllocationGroup() < 0
324		|| attributes.Start() > (1L << volume->AllocationGroupShift()))
325		RETURN_ERROR(B_BAD_DATA);
326
327	if (Flags() & INODE_DELETED)
328		return B_NOT_ALLOWED;
329
330	// TODO: Add some tests to check the integrity of the other stuff here,
331	// especially for the data_stream!
332
333	return B_OK;
334}
335
336
337//	#pragma mark - Inode
338
339
340Inode::Inode(Volume* volume, ino_t id)
341	:
342	fVolume(volume),
343	fID(id),
344	fTree(NULL),
345	fAttributes(NULL),
346	fCache(NULL),
347	fMap(NULL)
348{
349	PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this));
350
351	rw_lock_init(&fLock, "bfs inode");
352	recursive_lock_init(&fSmallDataLock, "bfs inode small data");
353
354	if (UpdateNodeFromDisk() != B_OK) {
355		// TODO: the error code gets eaten
356		return;
357	}
358
359	// these two will help to maintain the indices
360	fOldSize = Size();
361	fOldLastModified = LastModified();
362
363	if (IsContainer())
364		fTree = new(std::nothrow) BPlusTree(this);
365	if (NeedsFileCache()) {
366		SetFileCache(file_cache_create(fVolume->ID(), ID(), Size()));
367		SetMap(file_map_create(volume->ID(), ID(), Size()));
368	}
369}
370
371
372Inode::Inode(Volume* volume, Transaction& transaction, ino_t id, mode_t mode,
373		block_run& run)
374	:
375	fVolume(volume),
376	fID(id),
377	fTree(NULL),
378	fAttributes(NULL),
379	fCache(NULL),
380	fMap(NULL)
381{
382	PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n",
383		volume, &transaction, id, this));
384
385	rw_lock_init(&fLock, "bfs inode");
386	recursive_lock_init(&fSmallDataLock, "bfs inode small data");
387
388	NodeGetter node(volume, transaction, this, true);
389	if (node.Node() == NULL) {
390		FATAL(("Could not read inode block %" B_PRId64 "!\n", BlockNumber()));
391		return;
392	}
393
394	memset(&fNode, 0, sizeof(bfs_inode));
395
396	// Initialize the bfs_inode structure -- it's not written back to disk
397	// here, because it will be done so already when the inode could be
398	// created completely.
399
400	Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1);
401	Node().inode_num = run;
402	Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode);
403	Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
404
405	Node().create_time = Node().last_modified_time = Node().status_change_time
406		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
407
408	Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize());
409
410	// these two will help to maintain the indices
411	fOldSize = Size();
412	fOldLastModified = LastModified();
413}
414
415
416Inode::~Inode()
417{
418	PRINT(("Inode::~Inode() @ %p\n", this));
419
420	file_cache_delete(FileCache());
421	file_map_delete(Map());
422	delete fTree;
423
424	rw_lock_destroy(&fLock);
425	recursive_lock_destroy(&fSmallDataLock);
426}
427
428
429status_t
430Inode::InitCheck(bool checkNode) const
431{
432	// test inode magic and flags
433	if (checkNode) {
434		status_t status = Node().InitCheck(fVolume);
435		if (status == B_BUSY)
436			return B_BUSY;
437
438		if (status != B_OK) {
439			FATAL(("inode at block %" B_PRIdOFF " corrupt!\n", BlockNumber()));
440			RETURN_ERROR(B_BAD_DATA);
441		}
442	}
443
444	if (IsContainer()) {
445		// inodes that have a B+tree
446		if (fTree == NULL)
447			RETURN_ERROR(B_NO_MEMORY);
448
449		status_t status = fTree->InitCheck();
450		if (status != B_OK) {
451			FATAL(("inode tree at block %" B_PRIdOFF " corrupt!\n",
452				BlockNumber()));
453			RETURN_ERROR(B_BAD_DATA);
454		}
455	}
456
457	if (NeedsFileCache() && (fCache == NULL || fMap == NULL))
458		return B_NO_MEMORY;
459
460	return B_OK;
461}
462
463
464/*!	Adds this inode to the specified transaction. This means that the inode will
465	be write locked until the transaction ended.
466	To ensure that the inode will stay valid until that point, an extra reference
467	is acquired to it as long as this transaction stays active.
468*/
469void
470Inode::WriteLockInTransaction(Transaction& transaction)
471{
472	// These flags can only change while holding the transaction lock
473	if ((Flags() & INODE_IN_TRANSACTION) != 0)
474		return;
475
476	// We share the same list link with the removed list, so we have to remove
477	// the inode from that list here (and add it back when we no longer need it)
478	if ((Flags() & INODE_DELETED) != 0)
479		fVolume->RemovedInodes().Remove(this);
480
481	if (!fVolume->IsInitializing())
482		acquire_vnode(fVolume->FSVolume(), ID());
483
484	rw_lock_write_lock(&Lock());
485	Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
486
487	transaction.AddListener(this);
488}
489
490
491status_t
492Inode::WriteBack(Transaction& transaction)
493{
494	NodeGetter node(fVolume, transaction, this);
495	if (node.WritableNode() == NULL)
496		return B_IO_ERROR;
497
498	memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode));
499	return B_OK;
500}
501
502
503status_t
504Inode::UpdateNodeFromDisk()
505{
506	NodeGetter node(fVolume, this);
507	if (node.Node() == NULL) {
508		FATAL(("Failed to read block %" B_PRId64 " from disk!\n",
509			BlockNumber()));
510		return B_IO_ERROR;
511	}
512
513	memcpy(&fNode, node.Node(), sizeof(bfs_inode));
514	fNode.flags &= HOST_ENDIAN_TO_BFS_INT32(INODE_PERMANENT_FLAGS);
515	return B_OK;
516}
517
518
519status_t
520Inode::CheckPermissions(int accessMode) const
521{
522	// you never have write access to a read-only volume
523	if ((accessMode & W_OK) != 0 && fVolume->IsReadOnly())
524		return B_READ_ONLY_DEVICE;
525
526	return check_access_permissions(accessMode, Mode(), (gid_t)fNode.GroupID(),
527		(uid_t)fNode.UserID());
528}
529
530
531//	#pragma mark - attributes
532
533
534void
535Inode::_AddIterator(AttributeIterator* iterator)
536{
537	RecursiveLocker _(fSmallDataLock);
538	fIterators.Add(iterator);
539}
540
541
542void
543Inode::_RemoveIterator(AttributeIterator* iterator)
544{
545	RecursiveLocker _(fSmallDataLock);
546	fIterators.Remove(iterator);
547}
548
549
550/*!	Tries to free up "bytes" space in the small_data section by moving
551	attributes to real files. Used for system attributes like the name.
552	You need to hold the fSmallDataLock when you call this method
553*/
554status_t
555Inode::_MakeSpaceForSmallData(Transaction& transaction, bfs_inode* node,
556	const char* name, int32 bytes)
557{
558	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
559
560	while (bytes > 0) {
561		small_data* item = node->SmallDataStart();
562		small_data* max = NULL;
563		int32 index = 0, maxIndex = 0;
564		for (; !item->IsLast(node); item = item->Next(), index++) {
565			// should not remove those
566			if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name()))
567				continue;
568
569			if (max == NULL || max->Size() < item->Size()) {
570				maxIndex = index;
571				max = item;
572			}
573
574			// Remove the first one large enough to free the needed amount of
575			// bytes
576			if (bytes < (int32)item->Size())
577				break;
578		}
579
580		if (item->IsLast(node) || (int32)item->Size() < bytes)
581			return B_ERROR;
582
583		bytes -= max->Size();
584
585		// Move the attribute to a real attribute file
586		// Luckily, this doesn't cause any index updates
587
588		Inode* attribute;
589		status_t status = CreateAttribute(transaction, item->Name(),
590			item->Type(), &attribute);
591		if (status != B_OK)
592			RETURN_ERROR(status);
593
594		size_t length = item->DataSize();
595		status = attribute->WriteAt(transaction, 0, item->Data(), &length);
596
597		ReleaseAttribute(attribute);
598
599		if (status != B_OK) {
600			Vnode vnode(fVolume, Attributes());
601			Inode* attributes;
602			if (vnode.Get(&attributes) < B_OK
603				|| attributes->Remove(transaction, name) < B_OK) {
604				FATAL(("Could not remove newly created attribute!\n"));
605			}
606
607			RETURN_ERROR(status);
608		}
609
610		_RemoveSmallData(node, max, maxIndex);
611	}
612	return B_OK;
613}
614
615
616/*!	Private function which removes the given attribute from the small_data
617	section.
618	You need to hold the fSmallDataLock when you call this method
619*/
620status_t
621Inode::_RemoveSmallData(bfs_inode* node, small_data* item, int32 index)
622{
623	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
624
625	small_data* next = item->Next();
626	if (!next->IsLast(node)) {
627		// find the last attribute
628		small_data* last = next;
629		while (!last->IsLast(node))
630			last = last->Next();
631
632		int32 size = (uint8*)last - (uint8*)next;
633		if (size < 0
634			|| size > (uint8*)node + fVolume->BlockSize() - (uint8*)next)
635			return B_BAD_DATA;
636
637		memmove(item, next, size);
638
639		// Move the "last" one to its new location and
640		// correctly terminate the small_data section
641		last = (small_data*)((uint8*)last - ((uint8*)next - (uint8*)item));
642		memset(last, 0, (uint8*)node + fVolume->BlockSize() - (uint8*)last);
643	} else
644		memset(item, 0, item->Size());
645
646	// update all current iterators
647	SinglyLinkedList<AttributeIterator>::Iterator iterator
648		= fIterators.GetIterator();
649	while (iterator.HasNext()) {
650		iterator.Next()->Update(index, -1);
651	}
652
653	return B_OK;
654}
655
656
657//!	Removes the given attribute from the small_data section.
658status_t
659Inode::_RemoveSmallData(Transaction& transaction, NodeGetter& nodeGetter,
660	const char* name)
661{
662	if (name == NULL)
663		return B_BAD_VALUE;
664
665	bfs_inode* node = nodeGetter.WritableNode();
666	RecursiveLocker locker(fSmallDataLock);
667
668	// search for the small_data item
669
670	small_data* item = node->SmallDataStart();
671	int32 index = 0;
672	while (!item->IsLast(node) && strcmp(item->Name(), name)) {
673		item = item->Next();
674		index++;
675	}
676
677	if (item->IsLast(node))
678		return B_ENTRY_NOT_FOUND;
679
680	nodeGetter.MakeWritable(transaction);
681
682	status_t status = _RemoveSmallData(node, item, index);
683	if (status == B_OK) {
684		Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
685			bfs_inode::ToInode(real_time_clock_usecs()));
686
687		status = WriteBack(transaction);
688	}
689
690	return status;
691}
692
693
694/*!	Try to place the given attribute in the small_data section - if the
695	new attribute is too big to fit in that section, it returns B_DEVICE_FULL.
696	In that case, the attribute should be written to a real attribute file;
697	it's the caller's responsibility to remove any existing attributes in the
698	small data section if that's the case.
699
700	Note that you need to write back the inode yourself after having called that
701	method - it's a bad API decision that it needs a transaction but enforces
702	you to write back the inode all by yourself, but it's just more efficient
703	in most cases...
704*/
705status_t
706Inode::_AddSmallData(Transaction& transaction, NodeGetter& nodeGetter,
707	const char* name, uint32 type, off_t pos, const uint8* data, size_t length,
708	bool force)
709{
710	bfs_inode* node = nodeGetter.WritableNode();
711
712	if (node == NULL || name == NULL || data == NULL)
713		return B_BAD_VALUE;
714
715	// reject any requests that can't fit into the small_data section
716	uint32 nameLength = strlen(name);
717	uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + pos + length + 1;
718	if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode))
719		return B_DEVICE_FULL;
720
721	nodeGetter.MakeWritable(transaction);
722	RecursiveLocker locker(fSmallDataLock);
723
724	// Find the last item or one with the same name we have to add
725	small_data* item = node->SmallDataStart();
726	int32 index = 0;
727	while (!item->IsLast(node) && strcmp(item->Name(), name)) {
728		item = item->Next();
729		index++;
730	}
731
732	// is the attribute already in the small_data section?
733	// then just replace the data part of that one
734	if (!item->IsLast(node)) {
735		// find last attribute
736		small_data* last = item;
737		while (!last->IsLast(node))
738			last = last->Next();
739
740		// try to change the attributes value
741		if (item->data_size > pos + length
742			|| force
743			|| ((uint8*)last + pos + length - item->DataSize())
744					<= ((uint8*)node + fVolume->InodeSize())) {
745			// Make room for the new attribute if needed (and we are forced
746			// to do so)
747			if (force && ((uint8*)last + pos + length - item->DataSize())
748					> ((uint8*)node + fVolume->InodeSize())) {
749				// We also take the free space at the end of the small_data
750				// section into account, and request only what's really needed
751				uint32 needed = pos + length - item->DataSize() -
752					(uint32)((uint8*)node + fVolume->InodeSize()
753						- (uint8*)last);
754
755				if (_MakeSpaceForSmallData(transaction, node, name, needed)
756						!= B_OK)
757					return B_ERROR;
758
759				// reset our pointers
760				item = node->SmallDataStart();
761				index = 0;
762				while (!item->IsLast(node) && strcmp(item->Name(), name)) {
763					item = item->Next();
764					index++;
765				}
766
767				last = item;
768				while (!last->IsLast(node))
769					last = last->Next();
770			}
771
772			size_t oldDataSize = item->DataSize();
773
774			// Normally, we can just overwrite the attribute data as the size
775			// is specified by the type and does not change that often
776			if (pos + length != item->DataSize()) {
777				// move the attributes after the current one
778				small_data* next = item->Next();
779				if (!next->IsLast(node)) {
780					memmove((uint8*)item + spaceNeeded, next,
781						(uint8*)last - (uint8*)next);
782				}
783
784				// Move the "last" one to its new location and
785				// correctly terminate the small_data section
786				last = (small_data*)((uint8*)last
787					- ((uint8*)next - ((uint8*)item + spaceNeeded)));
788				if ((uint8*)last < (uint8*)node + fVolume->BlockSize()) {
789					memset(last, 0, (uint8*)node + fVolume->BlockSize()
790						- (uint8*)last);
791				}
792
793				item->data_size = HOST_ENDIAN_TO_BFS_INT16(pos + length);
794			}
795
796			item->type = HOST_ENDIAN_TO_BFS_INT32(type);
797
798			if ((uint64)oldDataSize < (uint64)pos) {
799				// Fill gap with zeros
800				memset(item->Data() + oldDataSize, 0, pos - oldDataSize);
801			}
802			if (user_memcpy(item->Data() + pos, data, length) < B_OK)
803				return B_BAD_ADDRESS;
804			item->Data()[pos + length] = '\0';
805
806			return B_OK;
807		}
808
809		return B_DEVICE_FULL;
810	}
811
812	// try to add the new attribute!
813
814	if ((uint8*)item + spaceNeeded > (uint8*)node + fVolume->InodeSize()) {
815		// there is not enough space for it!
816		if (!force)
817			return B_DEVICE_FULL;
818
819		// make room for the new attribute
820		if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK)
821			return B_ERROR;
822
823		// get new last item!
824		item = node->SmallDataStart();
825		index = 0;
826		while (!item->IsLast(node)) {
827			item = item->Next();
828			index++;
829		}
830	}
831
832	memset(item, 0, spaceNeeded);
833	item->type = HOST_ENDIAN_TO_BFS_INT32(type);
834	item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength);
835	item->data_size = HOST_ENDIAN_TO_BFS_INT16(length);
836	strcpy(item->Name(), name);
837	if (user_memcpy(item->Data() + pos, data, length) < B_OK)
838		return B_BAD_ADDRESS;
839
840	// correctly terminate the small_data section
841	item = item->Next();
842	if (!item->IsLast(node))
843		memset(item, 0, (uint8*)node + fVolume->InodeSize() - (uint8*)item);
844
845	// update all current iterators
846	SinglyLinkedList<AttributeIterator>::Iterator iterator
847		= fIterators.GetIterator();
848	while (iterator.HasNext()) {
849		iterator.Next()->Update(index, 1);
850	}
851
852	return B_OK;
853}
854
855
856/*!	Iterates through the small_data section of an inode.
857	To start at the beginning of this section, you let smallData
858	point to NULL, like:
859		small_data* data = NULL;
860		while (inode->GetNextSmallData(&data) { ... }
861
862	This function is reentrant and doesn't allocate any memory;
863	you can safely stop calling it at any point (you don't need
864	to iterate through the whole list).
865	You need to hold the fSmallDataLock when you call this method
866*/
867status_t
868Inode::_GetNextSmallData(bfs_inode* node, small_data** _smallData) const
869{
870	if (node == NULL)
871		RETURN_ERROR(B_BAD_VALUE);
872
873	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
874
875	small_data* data = *_smallData;
876
877	// begin from the start?
878	if (data == NULL)
879		data = node->SmallDataStart();
880	else
881		data = data->Next();
882
883	// is already last item?
884	if (data->IsLast(node))
885		return B_ENTRY_NOT_FOUND;
886
887	*_smallData = data;
888
889	return B_OK;
890}
891
892
893/*!	Finds the attribute "name" in the small data section, and
894	returns a pointer to it (or NULL if it doesn't exist).
895	You need to hold the fSmallDataLock when you call this method
896*/
897small_data*
898Inode::FindSmallData(const bfs_inode* node, const char* name) const
899{
900	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
901
902	small_data* smallData = NULL;
903	while (_GetNextSmallData(const_cast<bfs_inode*>(node), &smallData)
904			== B_OK) {
905		if (!strcmp(smallData->Name(), name))
906			return smallData;
907	}
908	return NULL;
909}
910
911
912/*!	Returns a pointer to the node's name if present in the small data
913	section, NULL otherwise.
914	You need to hold the fSmallDataLock when you call this method
915*/
916const char*
917Inode::Name(const bfs_inode* node) const
918{
919	ASSERT_LOCKED_RECURSIVE(&fSmallDataLock);
920
921	small_data* smallData = NULL;
922	while (_GetNextSmallData((bfs_inode*)node, &smallData) == B_OK) {
923		if (*smallData->Name() == FILE_NAME_NAME
924			&& smallData->NameSize() == FILE_NAME_NAME_LENGTH)
925			return (const char*)smallData->Data();
926	}
927	return NULL;
928}
929
930
931/*!	Copies the node's name into the provided buffer.
932	The buffer should be B_FILE_NAME_LENGTH bytes large.
933*/
934status_t
935Inode::GetName(char* buffer, size_t size) const
936{
937	NodeGetter node(fVolume, this);
938	if (node.Node() == NULL)
939		return B_IO_ERROR;
940
941	RecursiveLocker locker(fSmallDataLock);
942
943	const char* name = Name(node.Node());
944	if (name == NULL)
945		return B_ENTRY_NOT_FOUND;
946
947	strlcpy(buffer, name, size);
948	return B_OK;
949}
950
951
952/*!	Changes or set the name of a file: in the inode small_data section only, it
953	doesn't change it in the parent directory's b+tree.
954	Note that you need to write back the inode yourself after having called
955	that method. It suffers from the same API decision as AddSmallData() does
956	(and for the same reason).
957*/
958status_t
959Inode::SetName(Transaction& transaction, const char* name)
960{
961	if (name == NULL || *name == '\0')
962		return B_BAD_VALUE;
963
964	NodeGetter node(fVolume, transaction, this);
965	if (node.Node() == NULL)
966		return B_IO_ERROR;
967
968	const char nameTag[2] = {FILE_NAME_NAME, 0};
969
970	return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 0,
971		(uint8*)name, strlen(name), true);
972}
973
974
975status_t
976Inode::_RemoveAttribute(Transaction& transaction, const char* name,
977	bool hasIndex, Index* index)
978{
979	// remove the attribute file if it exists
980	Vnode vnode(fVolume, Attributes());
981	Inode* attributes;
982	status_t status = vnode.Get(&attributes);
983	if (status < B_OK)
984		return status;
985
986	// update index
987	if (index != NULL) {
988		Inode* attribute;
989		if ((hasIndex || fVolume->CheckForLiveQuery(name))
990			&& GetAttribute(name, &attribute) == B_OK) {
991			uint8 data[MAX_INDEX_KEY_LENGTH];
992			size_t length = MAX_INDEX_KEY_LENGTH;
993			if (attribute->ReadAt(0, data, &length) == B_OK) {
994				index->Update(transaction, name, attribute->Type(), data,
995					length, NULL, 0, this);
996			}
997
998			ReleaseAttribute(attribute);
999		}
1000	}
1001
1002	if ((status = attributes->Remove(transaction, name)) < B_OK)
1003		return status;
1004
1005	if (attributes->IsEmpty()) {
1006		attributes->WriteLockInTransaction(transaction);
1007
1008		// remove attribute directory (don't fail if that can't be done)
1009		if (remove_vnode(fVolume->FSVolume(), attributes->ID()) == B_OK) {
1010			// update the inode, so that no one will ever doubt it's deleted :-)
1011			attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1012			if (attributes->WriteBack(transaction) == B_OK) {
1013				Attributes().SetTo(0, 0, 0);
1014				WriteBack(transaction);
1015			} else {
1016				unremove_vnode(fVolume->FSVolume(), attributes->ID());
1017				attributes->Node().flags
1018					&= ~HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
1019			}
1020		}
1021	}
1022
1023	return status;
1024}
1025
1026
1027/*!	Reads data from the specified attribute.
1028	This is a high-level attribute function that understands attributes
1029	in the small_data section as well as real attribute files.
1030*/
1031status_t
1032Inode::ReadAttribute(const char* name, int32 type, off_t pos, uint8* buffer,
1033	size_t* _length)
1034{
1035	if (pos < 0)
1036		pos = 0;
1037
1038	// search in the small_data section (which has to be locked first)
1039	{
1040		NodeGetter node(fVolume, this);
1041		if (node.Node() == NULL)
1042			return B_IO_ERROR;
1043
1044		RecursiveLocker locker(fSmallDataLock);
1045
1046		small_data* smallData = FindSmallData(node.Node(), name);
1047		if (smallData != NULL) {
1048			size_t length = *_length;
1049			if (pos >= smallData->data_size) {
1050				*_length = 0;
1051				return B_OK;
1052			}
1053			if (length + pos > smallData->DataSize())
1054				length = smallData->DataSize() - pos;
1055
1056			status_t error = user_memcpy(buffer, smallData->Data() + pos,
1057				length);
1058			*_length = length;
1059			return error;
1060		}
1061	}
1062
1063	// search in the attribute directory
1064	Inode* attribute;
1065	status_t status = GetAttribute(name, &attribute);
1066	if (status == B_OK) {
1067		status = attribute->ReadAt(pos, (uint8*)buffer, _length);
1068
1069		ReleaseAttribute(attribute);
1070	}
1071
1072	RETURN_ERROR(status);
1073}
1074
1075
1076/*!	Writes data to the specified attribute.
1077	This is a high-level attribute function that understands attributes
1078	in the small_data section as well as real attribute files.
1079*/
1080status_t
1081Inode::WriteAttribute(Transaction& transaction, const char* name, int32 type,
1082	off_t pos, const uint8* buffer, size_t* _length, bool* _created)
1083{
1084	if (pos < 0)
1085		return B_BAD_VALUE;
1086
1087	// needed to maintain the index
1088	uint8 oldBuffer[MAX_INDEX_KEY_LENGTH];
1089	uint8* oldData = NULL;
1090	size_t oldLength = 0;
1091	bool created = false;
1092
1093	// TODO: we actually depend on that the contents of "buffer" are constant.
1094	// If they get changed during the write (hey, user programs), we may mess
1095	// up our index trees!
1096	// TODO: for attribute files, we need to log the first
1097	// MAX_INDEX_KEY_LENGTH bytes of the data stream, or the same as above
1098	// might happen.
1099
1100	Index index(fVolume);
1101	bool hasIndex = index.SetTo(name) == B_OK;
1102
1103	Inode* attribute = NULL;
1104	status_t status = B_OK;
1105
1106	if (GetAttribute(name, &attribute) != B_OK) {
1107		// No attribute inode exists yet
1108
1109		// save the old attribute data
1110		NodeGetter node(fVolume, transaction, this);
1111		if (node.Node() == NULL)
1112			return B_IO_ERROR;
1113
1114		recursive_lock_lock(&fSmallDataLock);
1115
1116		small_data* smallData = FindSmallData(node.Node(), name);
1117		if (smallData != NULL) {
1118			oldLength = smallData->DataSize();
1119			if (oldLength > 0) {
1120				if (oldLength > MAX_INDEX_KEY_LENGTH)
1121					oldLength = MAX_INDEX_KEY_LENGTH;
1122				memcpy(oldData = oldBuffer, smallData->Data(), oldLength);
1123			}
1124		} else
1125			created = true;
1126
1127		recursive_lock_unlock(&fSmallDataLock);
1128
1129		// if the attribute doesn't exist yet (as a file), try to put it in the
1130		// small_data section first - if that fails (due to insufficent space),
1131		// create a real attribute file
1132		status = _AddSmallData(transaction, node, name, type, pos, buffer,
1133			*_length);
1134		if (status == B_DEVICE_FULL) {
1135			if (smallData != NULL) {
1136				// remove the old attribute from the small data section - there
1137				// is no space left for the new data
1138				status = _RemoveSmallData(transaction, node, name);
1139			} else
1140				status = B_OK;
1141
1142			if (status == B_OK)
1143				status = CreateAttribute(transaction, name, type, &attribute);
1144			if (status != B_OK)
1145				RETURN_ERROR(status);
1146
1147			created = true;
1148		} else if (status == B_OK) {
1149			// Update status time on attribute write
1150			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1151				bfs_inode::ToInode(real_time_clock_usecs()));
1152
1153			status = WriteBack(transaction);
1154		}
1155	}
1156
1157	if (attribute != NULL) {
1158		WriteLocker writeLocker(attribute->fLock);
1159
1160		if (hasIndex || fVolume->CheckForLiveQuery(name)) {
1161			// Save the old attribute data (if this fails, oldLength will
1162			// reflect it)
1163			while (attribute->Size() > 0) {
1164				bigtime_t oldModified = attribute->LastModified();
1165				writeLocker.Unlock();
1166
1167				oldLength = MAX_INDEX_KEY_LENGTH;
1168				if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK)
1169					oldData = oldBuffer;
1170
1171				writeLocker.Lock();
1172
1173				// Read until the data hasn't changed in between
1174				if (oldModified == attribute->LastModified())
1175					break;
1176
1177				oldLength = 0;
1178			}
1179		}
1180
1181		// check if the data fits into the small_data section again
1182		NodeGetter node(fVolume, transaction, this);
1183		if (node.Node() == NULL)
1184			return B_IO_ERROR;
1185
1186		status = _AddSmallData(transaction, node, name, type, pos, buffer,
1187			*_length);
1188
1189		if (status == B_OK) {
1190			// it does - remove its file
1191			writeLocker.Unlock();
1192			status = _RemoveAttribute(transaction, name, false, NULL);
1193		} else {
1194			// The attribute type might have been changed - we need to
1195			// adopt the new one
1196			attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type);
1197			status = attribute->WriteBack(transaction);
1198			writeLocker.Unlock();
1199
1200			if (status == B_OK) {
1201				status = attribute->WriteAt(transaction, pos, buffer,
1202					_length);
1203			}
1204		}
1205
1206		if (status == B_OK) {
1207			// Update status time on attribute write
1208			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1209				bfs_inode::ToInode(real_time_clock_usecs()));
1210
1211			status = WriteBack(transaction);
1212		}
1213
1214		attribute->WriteLockInTransaction(transaction);
1215		ReleaseAttribute(attribute);
1216	}
1217
1218	// TODO: find a better way than this "pos" thing (the begin of the old key
1219	//	must be copied to the start of the new one for a comparison)
1220	if (status == B_OK && pos == 0) {
1221		// Index only the first MAX_INDEX_KEY_LENGTH bytes
1222		uint16 length = *_length;
1223		if (length > MAX_INDEX_KEY_LENGTH)
1224			length = MAX_INDEX_KEY_LENGTH;
1225
1226		uint8 indexBuffer[MAX_INDEX_KEY_LENGTH];
1227		// _AddSmallData() already read the buffer
1228		user_memcpy(indexBuffer, buffer, length);
1229
1230		// Update index. Note, Index::Update() may be called even if
1231		// initializing the index failed - it will just update the live
1232		// queries in this case
1233		if (pos < length || (uint64)pos < (uint64)oldLength) {
1234			index.Update(transaction, name, type, oldData, oldLength,
1235				indexBuffer, length, this);
1236		}
1237	}
1238
1239	if (_created != NULL)
1240		*_created = created;
1241
1242	return status;
1243}
1244
1245
1246/*!	Removes the specified attribute from the inode.
1247	This is a high-level attribute function that understands attributes
1248	in the small_data section as well as real attribute files.
1249*/
1250status_t
1251Inode::RemoveAttribute(Transaction& transaction, const char* name)
1252{
1253	Index index(fVolume);
1254	bool hasIndex = index.SetTo(name) == B_OK;
1255	NodeGetter node(fVolume, this);
1256	if (node.Node() == NULL)
1257		return B_IO_ERROR;
1258
1259	// update index for attributes in the small_data section
1260	{
1261		RecursiveLocker _(fSmallDataLock);
1262
1263		small_data* smallData = FindSmallData(node.Node(), name);
1264		if (smallData != NULL) {
1265			uint32 length = smallData->DataSize();
1266			if (length > MAX_INDEX_KEY_LENGTH)
1267				length = MAX_INDEX_KEY_LENGTH;
1268			index.Update(transaction, name, smallData->Type(),
1269				smallData->Data(), length, NULL, 0, this);
1270		}
1271	}
1272
1273	status_t status = _RemoveSmallData(transaction, node, name);
1274	if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) {
1275		// remove the attribute file if it exists
1276		status = _RemoveAttribute(transaction, name, hasIndex, &index);
1277		if (status == B_OK) {
1278			Node().status_change_time = HOST_ENDIAN_TO_BFS_INT64(
1279				bfs_inode::ToInode(real_time_clock_usecs()));
1280			WriteBack(transaction);
1281		}
1282	}
1283
1284	return status;
1285}
1286
1287
1288/*!	Returns the attribute inode with the specified \a name, in case it exists.
1289	This method can only return real attribute files; the attributes in the
1290	small data section are ignored.
1291*/
1292status_t
1293Inode::GetAttribute(const char* name, Inode** _attribute)
1294{
1295	// does this inode even have attributes?
1296	if (Attributes().IsZero())
1297		return B_ENTRY_NOT_FOUND;
1298
1299	Vnode vnode(fVolume, Attributes());
1300	Inode* attributes;
1301	if (vnode.Get(&attributes) < B_OK) {
1302		FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n",
1303			name));
1304		return B_ERROR;
1305	}
1306
1307	BPlusTree* tree = attributes->Tree();
1308	if (tree == NULL)
1309		return B_BAD_VALUE;
1310
1311	InodeReadLocker locker(attributes);
1312
1313	ino_t id;
1314	status_t status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
1315	if (status == B_OK) {
1316		Vnode vnode(fVolume, id);
1317		Inode* inode;
1318		// Check if the attribute is really an attribute
1319		if (vnode.Get(&inode) != B_OK || !inode->IsAttribute())
1320			return B_ERROR;
1321
1322		*_attribute = inode;
1323		vnode.Keep();
1324		return B_OK;
1325	}
1326
1327	return status;
1328}
1329
1330
1331void
1332Inode::ReleaseAttribute(Inode* attribute)
1333{
1334	if (attribute == NULL)
1335		return;
1336
1337	put_vnode(fVolume->FSVolume(), attribute->ID());
1338}
1339
1340
1341status_t
1342Inode::CreateAttribute(Transaction& transaction, const char* name, uint32 type,
1343	Inode** attribute)
1344{
1345	// do we need to create the attribute directory first?
1346	if (Attributes().IsZero()) {
1347		status_t status = Inode::Create(transaction, this, NULL,
1348			S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL);
1349		if (status < B_OK)
1350			RETURN_ERROR(status);
1351	}
1352	Vnode vnode(fVolume, Attributes());
1353	Inode* attributes;
1354	if (vnode.Get(&attributes) < B_OK)
1355		return B_ERROR;
1356
1357	// Inode::Create() locks the inode for us
1358	return Inode::Create(transaction, attributes, name,
1359		S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute);
1360}
1361
1362
1363//	#pragma mark - directory tree
1364
1365
1366bool
1367Inode::IsEmpty()
1368{
1369	TreeIterator iterator(fTree);
1370
1371	uint32 count = 0;
1372	char name[MAX_INDEX_KEY_LENGTH + 1];
1373	uint16 length;
1374	ino_t id;
1375	while (iterator.GetNextEntry(name, &length, MAX_INDEX_KEY_LENGTH + 1,
1376			&id) == B_OK) {
1377		if ((Mode() & (S_ATTR_DIR | S_INDEX_DIR)) != 0)
1378			return false;
1379
1380		// Unlike index and attribute directories, directories
1381		// for standard files always contain ".", and "..", so
1382		// we need to ignore those two
1383		if (++count > 2 || (strcmp(".", name) != 0 && strcmp("..", name) != 0))
1384			return false;
1385	}
1386	return true;
1387}
1388
1389
1390status_t
1391Inode::ContainerContentsChanged(Transaction& transaction)
1392{
1393	ASSERT(!InLastModifiedIndex());
1394
1395	Node().last_modified_time = Node().status_change_time
1396		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1397
1398	return WriteBack(transaction);
1399}
1400
1401
1402//	#pragma mark - data stream
1403
1404
1405/*!	Computes the number of bytes this inode occupies in the file system.
1406	This includes the file meta blocks used for maintaining its data stream.
1407
1408	TODO: However, the attributes in extra files are not really accounted for;
1409	depending on the speed penalty, this should be changed, though (the value
1410	could be cached in the inode structure or Inode object, though).
1411*/
1412off_t
1413Inode::AllocatedSize() const
1414{
1415	if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) {
1416		// This symlink does not have a data stream
1417		return Node().InodeSize();
1418	}
1419
1420	const data_stream& data = Node().data;
1421	uint32 blockSize = fVolume->BlockSize();
1422	off_t size = blockSize;
1423
1424	if (data.MaxDoubleIndirectRange() != 0) {
1425		off_t doubleIndirectSize = data.MaxDoubleIndirectRange()
1426			- data.MaxIndirectRange();
1427		int32 indirectSize = double_indirect_max_indirect_size(
1428			data.double_indirect.Length(), fVolume->BlockSize());
1429
1430		size += (2 * data.double_indirect.Length()
1431				+ doubleIndirectSize / indirectSize)
1432			* blockSize + data.MaxDoubleIndirectRange();
1433	} else if (data.MaxIndirectRange() != 0)
1434		size += data.indirect.Length() + data.MaxIndirectRange();
1435	else
1436		size += data.MaxDirectRange();
1437
1438	if (!Node().attributes.IsZero()) {
1439		// TODO: to make this exact, we'd had to count all attributes
1440		size += 2 * blockSize;
1441			// 2 blocks, one for the attributes inode, one for its B+tree
1442	}
1443
1444	return size;
1445}
1446
1447
1448/*!	Finds the block_run where "pos" is located in the data_stream of
1449	the inode.
1450	If successful, "offset" will then be set to the file offset
1451	of the block_run returned; so "pos - offset" is for the block_run
1452	what "pos" is for the whole stream.
1453	The caller has to make sure that "pos" is inside the stream.
1454*/
1455status_t
1456Inode::FindBlockRun(off_t pos, block_run& run, off_t& offset)
1457{
1458	data_stream* data = &Node().data;
1459
1460	// find matching block run
1461
1462	if (data->MaxIndirectRange() > 0 && pos >= data->MaxDirectRange()) {
1463		if (data->MaxDoubleIndirectRange() > 0
1464			&& pos >= data->MaxIndirectRange()) {
1465			// access to double indirect blocks
1466
1467			CachedBlock cached(fVolume);
1468
1469			int32 runsPerBlock;
1470			int32 directSize;
1471			int32 indirectSize;
1472			get_double_indirect_sizes(data->double_indirect.Length(),
1473				fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1474			if (directSize <= 0 || indirectSize <= 0)
1475				RETURN_ERROR(B_BAD_DATA);
1476
1477			off_t start = pos - data->MaxIndirectRange();
1478			int32 index = start / indirectSize;
1479
1480			block_run* indirect = (block_run*)cached.SetTo(
1481				fVolume->ToBlock(data->double_indirect) + index / runsPerBlock);
1482			if (indirect == NULL)
1483				RETURN_ERROR(B_ERROR);
1484
1485			int32 current = (start % indirectSize) / directSize;
1486
1487			indirect = (block_run*)cached.SetTo(
1488				fVolume->ToBlock(indirect[index % runsPerBlock])
1489				+ current / runsPerBlock);
1490			if (indirect == NULL)
1491				RETURN_ERROR(B_ERROR);
1492
1493			run = indirect[current % runsPerBlock];
1494			if (run.Length() != data->double_indirect.Length())
1495				RETURN_ERROR(B_BAD_DATA);
1496
1497			offset = data->MaxIndirectRange() + (index * indirectSize)
1498				+ (current * directSize);
1499		} else {
1500			// access to indirect blocks
1501
1502			int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
1503			off_t runBlockEnd = data->MaxDirectRange();
1504
1505			CachedBlock cached(fVolume);
1506			off_t block = fVolume->ToBlock(data->indirect);
1507
1508			for (int32 i = 0; i < data->indirect.Length(); i++) {
1509				block_run* indirect = (block_run*)cached.SetTo(block + i);
1510				if (indirect == NULL)
1511					RETURN_ERROR(B_IO_ERROR);
1512
1513				int32 current = -1;
1514				while (++current < runsPerBlock) {
1515					if (indirect[current].IsZero())
1516						break;
1517
1518					runBlockEnd += (uint32)indirect[current].Length()
1519						<< cached.BlockShift();
1520					if (runBlockEnd > pos) {
1521						run = indirect[current];
1522						offset = runBlockEnd - ((uint32)run.Length()
1523							<< cached.BlockShift());
1524						return fVolume->ValidateBlockRun(run);
1525					}
1526				}
1527			}
1528			RETURN_ERROR(B_ERROR);
1529		}
1530	} else {
1531		// access from direct blocks
1532
1533		off_t runBlockEnd = 0LL;
1534		int32 current = -1;
1535
1536		while (++current < NUM_DIRECT_BLOCKS) {
1537			if (data->direct[current].IsZero())
1538				break;
1539
1540			runBlockEnd += (uint32)data->direct[current].Length()
1541				<< fVolume->BlockShift();
1542			if (runBlockEnd > pos) {
1543				run = data->direct[current];
1544				offset = runBlockEnd
1545					- ((uint32)run.Length() << fVolume->BlockShift());
1546				return fVolume->ValidateBlockRun(run);
1547			}
1548		}
1549
1550		return B_ENTRY_NOT_FOUND;
1551	}
1552	return fVolume->ValidateBlockRun(run);
1553}
1554
1555
1556status_t
1557Inode::ReadAt(off_t pos, uint8* buffer, size_t* _length)
1558{
1559	return file_cache_read(FileCache(), NULL, pos, buffer, _length);
1560}
1561
1562
1563status_t
1564Inode::WriteAt(Transaction& transaction, off_t pos, const uint8* buffer,
1565	size_t* _length)
1566{
1567	InodeReadLocker locker(this);
1568
1569	// update the last modification time in memory, it will be written
1570	// back to the inode, and the index when the file is closed
1571	// TODO: should update the internal last modified time only at this point!
1572	Node().last_modified_time = Node().status_change_time
1573		= HOST_ENDIAN_TO_BFS_INT64(bfs_inode::ToInode(real_time_clock_usecs()));
1574
1575	// TODO: support INODE_LOGGED!
1576
1577	size_t length = *_length;
1578	bool changeSize = (uint64)pos + (uint64)length > (uint64)Size();
1579
1580	// set/check boundaries for pos/length
1581	if (pos < 0)
1582		return B_BAD_VALUE;
1583
1584	locker.Unlock();
1585
1586	// the transaction doesn't have to be started already
1587	if (changeSize && !transaction.IsStarted())
1588		transaction.Start(fVolume, BlockNumber());
1589
1590	WriteLocker writeLocker(fLock);
1591
1592	// Work around possible race condition: Someone might have shrunken the file
1593	// while we had no lock.
1594	if (!transaction.IsStarted()
1595		&& (uint64)pos + (uint64)length > (uint64)Size()) {
1596		writeLocker.Unlock();
1597		transaction.Start(fVolume, BlockNumber());
1598		writeLocker.Lock();
1599	}
1600
1601	off_t oldSize = Size();
1602
1603	if ((uint64)pos + (uint64)length > (uint64)oldSize) {
1604		// let's grow the data stream to the size needed
1605		status_t status = SetFileSize(transaction, pos + length);
1606		if (status != B_OK) {
1607			*_length = 0;
1608			WriteLockInTransaction(transaction);
1609			RETURN_ERROR(status);
1610		}
1611		// TODO: In theory we would need to update the file size
1612		// index here as part of the current transaction - this might just
1613		// be a bit too expensive, but worth a try.
1614
1615		// we need to write back the inode here because it has to
1616		// go into this transaction (we cannot wait until the file
1617		// is closed)
1618		status = WriteBack(transaction);
1619		if (status != B_OK) {
1620			WriteLockInTransaction(transaction);
1621			return status;
1622		}
1623	}
1624
1625	writeLocker.Unlock();
1626
1627	if (oldSize < pos)
1628		FillGapWithZeros(oldSize, pos);
1629
1630	// If we don't want to write anything, we can now return (we may
1631	// just have changed the file size using the position parameter)
1632	if (length == 0)
1633		return B_OK;
1634
1635	status_t status = file_cache_write(FileCache(), NULL, pos, buffer, _length);
1636
1637	if (transaction.IsStarted())
1638		WriteLockInTransaction(transaction);
1639
1640	return status;
1641}
1642
1643
1644/*!	Fills the gap between the old file size and the new file size
1645	with zeros.
1646	It's more or less a copy of Inode::WriteAt() but it can handle
1647	length differences of more than just 4 GB, and it never uses
1648	the log, even if the INODE_LOGGED flag is set.
1649*/
1650status_t
1651Inode::FillGapWithZeros(off_t pos, off_t newSize)
1652{
1653	while (pos < newSize) {
1654		size_t size;
1655		if (newSize > pos + 1024 * 1024 * 1024)
1656			size = 1024 * 1024 * 1024;
1657		else
1658			size = newSize - pos;
1659
1660		status_t status = file_cache_write(FileCache(), NULL, pos, NULL, &size);
1661		if (status < B_OK)
1662			return status;
1663
1664		pos += size;
1665	}
1666
1667	return B_OK;
1668}
1669
1670
1671/*!	Allocates \a length blocks, and clears their contents. Growing
1672	the indirect and double indirect range uses this method.
1673	The allocated block_run is saved in "run"
1674*/
1675status_t
1676Inode::_AllocateBlockArray(Transaction& transaction, block_run& run,
1677	size_t length, bool variableSize)
1678{
1679	if (!run.IsZero())
1680		return B_BAD_VALUE;
1681
1682	status_t status = fVolume->Allocate(transaction, this, length, run,
1683		variableSize ? 1 : length);
1684	if (status != B_OK)
1685		return status;
1686
1687	// make sure those blocks are empty
1688	CachedBlock cached(fVolume);
1689	off_t block = fVolume->ToBlock(run);
1690
1691	for (int32 i = 0; i < run.Length(); i++) {
1692		block_run* runs = (block_run*)cached.SetToWritable(transaction,
1693			block + i, true);
1694		if (runs == NULL)
1695			return B_IO_ERROR;
1696	}
1697	return B_OK;
1698}
1699
1700
1701/*!	Grows the stream to \a size, and fills the direct/indirect/double indirect
1702	ranges with the runs.
1703	This method will also determine the size of the preallocation, if any.
1704*/
1705status_t
1706Inode::_GrowStream(Transaction& transaction, off_t size)
1707{
1708	data_stream* data = &Node().data;
1709
1710	// is the data stream already large enough to hold the new size?
1711	// (can be the case with preallocated blocks)
1712	if (size < data->MaxDirectRange()
1713		|| size < data->MaxIndirectRange()
1714		|| size < data->MaxDoubleIndirectRange()) {
1715		data->size = HOST_ENDIAN_TO_BFS_INT64(size);
1716		return B_OK;
1717	}
1718
1719	// how many bytes are still needed? (unused ranges are always zero)
1720	uint16 minimum = 1;
1721	off_t bytes;
1722	if (data->Size() < data->MaxDoubleIndirectRange()) {
1723		bytes = size - data->MaxDoubleIndirectRange();
1724		// The double indirect range can only handle multiples of
1725		// its base length
1726		minimum = data->double_indirect.Length();
1727	} else if (data->Size() < data->MaxIndirectRange())
1728		bytes = size - data->MaxIndirectRange();
1729	else if (data->Size() < data->MaxDirectRange())
1730		bytes = size - data->MaxDirectRange();
1731	else {
1732		// no preallocation left to be used
1733		bytes = size - data->Size();
1734		if (data->MaxDoubleIndirectRange() > 0)
1735			minimum = data->double_indirect.Length();
1736	}
1737
1738	// do we have enough free blocks on the disk?
1739	off_t blocksNeeded = (bytes + fVolume->BlockSize() - 1)
1740		>> fVolume->BlockShift();
1741	if (blocksNeeded > fVolume->FreeBlocks())
1742		return B_DEVICE_FULL;
1743
1744	off_t blocksRequested = blocksNeeded;
1745		// because of preallocations and partial allocations, the number of
1746		// blocks we need to allocate may be different from the one we request
1747		// from the block allocator
1748
1749	// Should we preallocate some blocks?
1750	// Attributes, attribute directories, and long symlinks usually won't get
1751	// that big, and should stay close to the inode - preallocating could be
1752	// counterproductive.
1753	// Also, if free disk space is tight, don't preallocate.
1754	if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink()
1755		&& fVolume->FreeBlocks() > 128) {
1756		off_t roundTo = 0;
1757		if (IsFile()) {
1758			// Request preallocated blocks depending on the file size and growth
1759			if (size < 1 * 1024 * 1024 && bytes < 512 * 1024) {
1760				// Preallocate 64 KB for file sizes <1 MB and grow rates <512 KB
1761				roundTo = 65536 >> fVolume->BlockShift();
1762			} else if (size < 32 * 1024 * 1024 && bytes <= 1 * 1024 * 1024) {
1763				// Preallocate 512 KB for file sizes between 1 MB and 32 MB, and
1764				// grow rates smaller than 1 MB
1765				roundTo = (512 * 1024) >> fVolume->BlockShift();
1766			} else {
1767				// Preallocate 1/16 of the file size (ie. 4 MB for 64 MB,
1768				// 64 MB for 1 GB)
1769				roundTo = size >> (fVolume->BlockShift() + 4);
1770			}
1771		} else if (IsIndex()) {
1772			// Always preallocate 64 KB for index directories
1773			roundTo = 65536 >> fVolume->BlockShift();
1774		} else {
1775			// Preallocate only 4 KB - directories only get trimmed when their
1776			// vnode is flushed, which might not happen very often.
1777			roundTo = 4096 >> fVolume->BlockShift();
1778		}
1779		if (roundTo > 1) {
1780			// Round to next "roundTo" block count
1781			blocksRequested = ((blocksNeeded + roundTo) / roundTo) * roundTo;
1782		}
1783	}
1784
1785	while (blocksNeeded > 0) {
1786		// the requested blocks do not need to be returned with a
1787		// single allocation, so we need to iterate until we have
1788		// enough blocks allocated
1789		if (minimum > 1) {
1790			// make sure that "blocks" is a multiple of minimum
1791			blocksRequested = round_up(blocksRequested, minimum);
1792		}
1793
1794		block_run run;
1795		status_t status = fVolume->Allocate(transaction, this, blocksRequested,
1796			run, minimum);
1797		if (status != B_OK)
1798			return status;
1799
1800		// okay, we have the needed blocks, so just distribute them to the
1801		// different ranges of the stream (direct, indirect & double indirect)
1802
1803		blocksNeeded -= run.Length();
1804		// don't preallocate if the first allocation was already too small
1805		blocksRequested = blocksNeeded;
1806
1807		// Direct block range
1808
1809		if (data->Size() <= data->MaxDirectRange()) {
1810			// let's try to put them into the direct block range
1811			int32 free = 0;
1812			for (; free < NUM_DIRECT_BLOCKS; free++) {
1813				if (data->direct[free].IsZero())
1814					break;
1815			}
1816
1817			if (free < NUM_DIRECT_BLOCKS) {
1818				// can we merge the last allocated run with the new one?
1819				int32 last = free - 1;
1820				if (free > 0 && data->direct[last].MergeableWith(run)) {
1821					data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(
1822						data->direct[last].Length() + run.Length());
1823				} else
1824					data->direct[free] = run;
1825
1826				data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(
1827					data->MaxDirectRange()
1828					+ run.Length() * fVolume->BlockSize());
1829				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1830					? data->max_direct_range : size);
1831				continue;
1832			}
1833		}
1834
1835		// Indirect block range
1836
1837		if (data->Size() <= data->MaxIndirectRange()
1838			|| !data->MaxIndirectRange()) {
1839			CachedBlock cached(fVolume);
1840			block_run* runs = NULL;
1841			uint32 free = 0;
1842			off_t block;
1843
1844			// if there is no indirect block yet, create one
1845			if (data->indirect.IsZero()) {
1846				status = _AllocateBlockArray(transaction, data->indirect,
1847					NUM_ARRAY_BLOCKS, true);
1848				if (status != B_OK)
1849					return status;
1850
1851				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1852					data->MaxDirectRange());
1853				// insert the block_run in the first block
1854				runs = (block_run*)cached.SetTo(data->indirect);
1855			} else {
1856				uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run);
1857				block = fVolume->ToBlock(data->indirect);
1858
1859				// search first empty entry
1860				int32 i = 0;
1861				for (; i < data->indirect.Length(); i++) {
1862					if ((runs = (block_run*)cached.SetTo(block + i)) == NULL)
1863						return B_IO_ERROR;
1864
1865					for (free = 0; free < numberOfRuns; free++)
1866						if (runs[free].IsZero())
1867							break;
1868
1869					if (free < numberOfRuns)
1870						break;
1871				}
1872				if (i == data->indirect.Length())
1873					runs = NULL;
1874			}
1875
1876			if (runs != NULL) {
1877				// try to insert the run to the last one - note that this
1878				// doesn't take block borders into account, so it could be
1879				// further optimized
1880				cached.MakeWritable(transaction);
1881
1882				int32 last = free - 1;
1883				if (free > 0 && runs[last].MergeableWith(run)) {
1884					runs[last].length = HOST_ENDIAN_TO_BFS_INT16(
1885						runs[last].Length() + run.Length());
1886				} else
1887					runs[free] = run;
1888
1889				data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
1890					data->MaxIndirectRange()
1891					+ ((uint32)run.Length() << fVolume->BlockShift()));
1892				data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0
1893					? data->MaxIndirectRange() : size);
1894				continue;
1895			}
1896		}
1897
1898		// Double indirect block range
1899
1900		if (data->Size() <= data->MaxDoubleIndirectRange()
1901			|| !data->max_double_indirect_range) {
1902			// We make sure here that we have this minimum allocated, so if
1903			// the allocation succeeds, we don't run into an endless loop.
1904			if (!data->max_double_indirect_range)
1905				minimum = _DoubleIndirectBlockLength();
1906			else
1907				minimum = data->double_indirect.Length();
1908
1909			if ((run.Length() % minimum) != 0) {
1910				// The number of allocated blocks isn't a multiple of 'minimum',
1911				// so we have to change this. This can happen the first time the
1912				// stream grows into the double indirect range.
1913				// First, free the remaining blocks that don't fit into this
1914				// multiple.
1915				int32 rest = run.Length() % minimum;
1916				run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest);
1917
1918				status = fVolume->Free(transaction,
1919					block_run::Run(run.AllocationGroup(),
1920					run.Start() + run.Length(), rest));
1921				if (status != B_OK)
1922					return status;
1923
1924				blocksNeeded += rest;
1925				blocksRequested = round_up(blocksNeeded, minimum);
1926
1927				// Are there any blocks left in the run? If not, allocate
1928				// a new one
1929				if (run.length == 0)
1930					continue;
1931			}
1932
1933			// if there is no double indirect block yet, create one
1934			if (data->double_indirect.IsZero()) {
1935				status = _AllocateBlockArray(transaction,
1936					data->double_indirect, _DoubleIndirectBlockLength());
1937				if (status != B_OK)
1938					return status;
1939
1940				data->max_double_indirect_range = data->max_indirect_range;
1941			}
1942
1943			// calculate the index where to insert the new blocks
1944
1945			int32 runsPerBlock;
1946			int32 directSize;
1947			int32 indirectSize;
1948			get_double_indirect_sizes(data->double_indirect.Length(),
1949				fVolume->BlockSize(), runsPerBlock, directSize, indirectSize);
1950			if (directSize <= 0 || indirectSize <= 0)
1951				return B_BAD_DATA;
1952
1953			off_t start = data->MaxDoubleIndirectRange()
1954				- data->MaxIndirectRange();
1955			int32 indirectIndex = start / indirectSize;
1956			int32 index = (start % indirectSize) / directSize;
1957			int32 runsPerArray = runsPerBlock * minimum;
1958
1959			// distribute the blocks to the array and allocate
1960			// new array blocks when needed
1961
1962			CachedBlock cached(fVolume);
1963			CachedBlock cachedDirect(fVolume);
1964			block_run* array = NULL;
1965			uint32 runLength = run.Length();
1966
1967			while (run.length != 0) {
1968				// get the indirect array block
1969				if (array == NULL) {
1970					uint32 block = indirectIndex / runsPerBlock;
1971					if (block >= minimum)
1972						return EFBIG;
1973
1974					array = (block_run*)cached.SetTo(fVolume->ToBlock(
1975						data->double_indirect) + block);
1976					if (array == NULL)
1977						return B_IO_ERROR;
1978				}
1979
1980				do {
1981					// do we need a new array block?
1982					if (array[indirectIndex % runsPerBlock].IsZero()) {
1983						cached.MakeWritable(transaction);
1984
1985						status = _AllocateBlockArray(transaction,
1986							array[indirectIndex % runsPerBlock],
1987							data->double_indirect.Length());
1988						if (status != B_OK)
1989							return status;
1990					}
1991
1992					block_run* runs = (block_run*)cachedDirect.SetToWritable(
1993						transaction, fVolume->ToBlock(array[indirectIndex
1994							% runsPerBlock]) + index / runsPerBlock);
1995					if (runs == NULL)
1996						return B_IO_ERROR;
1997
1998					do {
1999						// insert the block_run into the array
2000						runs[index % runsPerBlock] = run;
2001						runs[index % runsPerBlock].length
2002							= HOST_ENDIAN_TO_BFS_INT16(minimum);
2003
2004						// alter the remaining block_run
2005						run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start()
2006							+ minimum);
2007						run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
2008							- minimum);
2009					} while ((++index % runsPerBlock) != 0 && run.length);
2010				} while ((index % runsPerArray) != 0 && run.length);
2011
2012				if (index == runsPerArray)
2013					index = 0;
2014				if (++indirectIndex % runsPerBlock == 0) {
2015					array = NULL;
2016					index = 0;
2017				}
2018			}
2019
2020			data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(
2021				data->MaxDoubleIndirectRange()
2022				+ (runLength << fVolume->BlockShift()));
2023			data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(
2024				data->max_double_indirect_range) : size;
2025
2026			continue;
2027		}
2028
2029		RETURN_ERROR(EFBIG);
2030	}
2031	// update the size of the data stream
2032	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2033
2034	return B_OK;
2035}
2036
2037
2038size_t
2039Inode::_DoubleIndirectBlockLength() const
2040{
2041	if (fVolume->BlockSize() > DOUBLE_INDIRECT_ARRAY_SIZE)
2042		return 1;
2043
2044	return DOUBLE_INDIRECT_ARRAY_SIZE / fVolume->BlockSize();
2045}
2046
2047
2048/*!	Frees the statically sized array of the double indirect part of a data
2049	stream.
2050*/
2051status_t
2052Inode::_FreeStaticStreamArray(Transaction& transaction, int32 level,
2053	block_run run, off_t size, off_t offset, off_t& max)
2054{
2055	int32 indirectSize;
2056	if (level == 0) {
2057		indirectSize = double_indirect_max_indirect_size(run.Length(),
2058			fVolume->BlockSize());
2059	} else {
2060		indirectSize = double_indirect_max_direct_size(run.Length(),
2061			fVolume->BlockSize());
2062	}
2063	if (indirectSize <= 0)
2064		return B_BAD_DATA;
2065
2066	off_t start;
2067	if (size > offset)
2068		start = size - offset;
2069	else
2070		start = 0;
2071
2072	int32 index = start / indirectSize;
2073	int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run);
2074
2075	CachedBlock cached(fVolume);
2076	off_t blockNumber = fVolume->ToBlock(run);
2077
2078	// set the file offset to the current block run
2079	offset += (off_t)index * indirectSize;
2080
2081	for (int32 i = index / runsPerBlock; i < run.Length(); i++) {
2082		block_run* array = (block_run*)cached.SetToWritable(transaction,
2083			blockNumber + i);
2084		if (array == NULL)
2085			RETURN_ERROR(B_ERROR);
2086
2087		for (index = index % runsPerBlock; index < runsPerBlock; index++) {
2088			if (array[index].IsZero()) {
2089				// we also want to break out of the outer loop
2090				i = run.Length();
2091				break;
2092			}
2093
2094			status_t status = B_OK;
2095			if (level == 0) {
2096				status = _FreeStaticStreamArray(transaction, 1, array[index],
2097					size, offset, max);
2098			} else if (offset >= size)
2099				status = fVolume->Free(transaction, array[index]);
2100			else
2101				max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize);
2102
2103			if (status < B_OK)
2104				RETURN_ERROR(status);
2105
2106			if (offset >= size)
2107				array[index].SetTo(0, 0, 0);
2108
2109			offset += indirectSize;
2110		}
2111		index = 0;
2112	}
2113	return B_OK;
2114}
2115
2116
2117/*!	Frees all block_runs in the array which come after the specified size.
2118	It also trims the last block_run that contain the size.
2119	"offset" and "max" are maintained until the last block_run that doesn't
2120	have to be freed - after this, the values won't be correct anymore, but
2121	will still assure correct function for all subsequent calls.
2122	"max" is considered to be in file system byte order.
2123*/
2124status_t
2125Inode::_FreeStreamArray(Transaction& transaction, block_run* array,
2126	uint32 arrayLength, off_t size, off_t& offset, off_t& max)
2127{
2128	PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n",
2129		arrayLength, size, offset, max));
2130
2131	off_t newOffset = offset;
2132	uint32 i = 0;
2133	for (; i < arrayLength; i++, offset = newOffset) {
2134		if (array[i].IsZero())
2135			break;
2136
2137		newOffset += (off_t)array[i].Length() << fVolume->BlockShift();
2138		if (newOffset <= size)
2139			continue;
2140
2141		block_run run = array[i];
2142
2143		// determine the block_run to be freed
2144		if (newOffset > size && offset < size) {
2145			// free partial block_run (and update the original block_run)
2146			run.start = HOST_ENDIAN_TO_BFS_INT16(array[i].Start()
2147				+ ((size + fVolume->BlockSize() - 1 - offset)
2148					>> fVolume->BlockShift()));
2149				// round to next block
2150			array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start()
2151				- array[i].Start());
2152			run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length()
2153				- array[i].Length());
2154
2155			if (run.length == 0)
2156				continue;
2157
2158			// update maximum range
2159			max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length()
2160				<< fVolume->BlockShift()));
2161		} else {
2162			// free the whole block_run
2163			array[i].SetTo(0, 0, 0);
2164
2165			if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset)
2166				max = HOST_ENDIAN_TO_BFS_INT64(offset);
2167		}
2168
2169		if (fVolume->Free(transaction, run) < B_OK)
2170			return B_IO_ERROR;
2171	}
2172	return B_OK;
2173}
2174
2175
2176status_t
2177Inode::_ShrinkStream(Transaction& transaction, off_t size)
2178{
2179	data_stream* data = &Node().data;
2180	status_t status;
2181
2182	if (data->MaxDoubleIndirectRange() > size) {
2183		off_t* maxDoubleIndirect = &data->max_double_indirect_range;
2184			// gcc 4 work-around: "error: cannot bind packed field
2185			// 'data->data_stream::max_double_indirect_range' to 'off_t&'"
2186		status = _FreeStaticStreamArray(transaction, 0, data->double_indirect,
2187			size, data->MaxIndirectRange(), *maxDoubleIndirect);
2188		if (status != B_OK)
2189			return status;
2190
2191		if (size <= data->MaxIndirectRange()) {
2192			fVolume->Free(transaction, data->double_indirect);
2193			data->double_indirect.SetTo(0, 0, 0);
2194			data->max_double_indirect_range = 0;
2195		}
2196	}
2197
2198	if (data->MaxIndirectRange() > size) {
2199		CachedBlock cached(fVolume);
2200		off_t block = fVolume->ToBlock(data->indirect);
2201		off_t offset = data->MaxDirectRange();
2202
2203		for (int32 i = 0; i < data->indirect.Length(); i++) {
2204			block_run* array = (block_run*)cached.SetToWritable(transaction,
2205				block + i);
2206			if (array == NULL)
2207				break;
2208
2209			off_t* maxIndirect = &data->max_indirect_range;
2210				// gcc 4 work-around: "error: cannot bind packed field
2211				// 'data->data_stream::max_indirect_range' to 'off_t&'"
2212			if (_FreeStreamArray(transaction, array, fVolume->BlockSize()
2213					/ sizeof(block_run), size, offset, *maxIndirect) != B_OK)
2214				return B_IO_ERROR;
2215		}
2216		if (data->max_direct_range == data->max_indirect_range) {
2217			fVolume->Free(transaction, data->indirect);
2218			data->indirect.SetTo(0, 0, 0);
2219			data->max_indirect_range = 0;
2220		}
2221	}
2222
2223	if (data->MaxDirectRange() > size) {
2224		off_t offset = 0;
2225		off_t *maxDirect = &data->max_direct_range;
2226			// gcc 4 work-around: "error: cannot bind packed field
2227			// 'data->data_stream::max_direct_range' to 'off_t&'"
2228		status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS,
2229			size, offset, *maxDirect);
2230		if (status < B_OK)
2231			return status;
2232	}
2233
2234	data->size = HOST_ENDIAN_TO_BFS_INT64(size);
2235	return B_OK;
2236}
2237
2238
2239status_t
2240Inode::SetFileSize(Transaction& transaction, off_t size)
2241{
2242	if (size < 0)
2243		return B_BAD_VALUE;
2244
2245	off_t oldSize = Size();
2246
2247	if (size == oldSize)
2248		return B_OK;
2249
2250	T(Resize(this, oldSize, size, false));
2251
2252	// should the data stream grow or shrink?
2253	status_t status;
2254	if (size > oldSize) {
2255		status = _GrowStream(transaction, size);
2256		if (status < B_OK) {
2257			// if the growing of the stream fails, the whole operation
2258			// fails, so we should shrink the stream to its former size
2259			_ShrinkStream(transaction, oldSize);
2260		}
2261	} else
2262		status = _ShrinkStream(transaction, size);
2263
2264	if (status < B_OK)
2265		return status;
2266
2267	file_cache_set_size(FileCache(), size);
2268	file_map_set_size(Map(), size);
2269
2270	return WriteBack(transaction);
2271}
2272
2273
2274status_t
2275Inode::Append(Transaction& transaction, off_t bytes)
2276{
2277	return SetFileSize(transaction, Size() + bytes);
2278}
2279
2280
2281/*!	Checks whether or not this inode's data stream needs to be trimmed
2282	because of an earlier preallocation.
2283	Returns true if there are any blocks to be trimmed.
2284*/
2285bool
2286Inode::NeedsTrimming() const
2287{
2288	// We never trim preallocated index blocks to make them grow as smooth as
2289	// possible. There are only few indices anyway, so this doesn't hurt.
2290	// Also, if an inode is already in deleted state, we don't bother trimming
2291	// it.
2292	if (IsIndex() || IsDeleted()
2293		|| (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0))
2294		return false;
2295
2296	off_t roundedSize = round_up(Size(), fVolume->BlockSize());
2297
2298	return Node().data.MaxDirectRange() > roundedSize
2299		|| Node().data.MaxIndirectRange() > roundedSize
2300		|| Node().data.MaxDoubleIndirectRange() > roundedSize;
2301}
2302
2303
2304status_t
2305Inode::TrimPreallocation(Transaction& transaction)
2306{
2307	T(Resize(this, max_c(Node().data.MaxDirectRange(),
2308		Node().data.MaxIndirectRange()), Size(), true));
2309
2310	status_t status = _ShrinkStream(transaction, Size());
2311	if (status < B_OK)
2312		return status;
2313
2314	return WriteBack(transaction);
2315}
2316
2317
2318//!	Frees the file's data stream and removes all attributes
2319status_t
2320Inode::Free(Transaction& transaction)
2321{
2322	FUNCTION();
2323
2324	// Perhaps there should be an implementation of Inode::ShrinkStream() that
2325	// just frees the data_stream, but doesn't change the inode (since it is
2326	// freed anyway) - that would make an undelete command possible
2327	if (!IsSymLink() || (Flags() & INODE_LONG_SYMLINK) != 0) {
2328		status_t status = SetFileSize(transaction, 0);
2329		if (status < B_OK)
2330			return status;
2331	}
2332
2333	// Free all attributes, and remove their indices
2334	{
2335		// We have to limit the scope of AttributeIterator, so that its
2336		// destructor is not called after the inode is deleted
2337		AttributeIterator iterator(this);
2338
2339		char name[B_FILE_NAME_LENGTH];
2340		uint32 type;
2341		size_t length;
2342		ino_t id;
2343		while (iterator.GetNext(name, &length, &type, &id) == B_OK) {
2344			RemoveAttribute(transaction, name);
2345		}
2346	}
2347
2348	if (WriteBack(transaction) < B_OK)
2349		return B_IO_ERROR;
2350
2351	return fVolume->Free(transaction, BlockRun());
2352}
2353
2354
2355//!	Synchronizes (writes back to disk) the file stream of the inode.
2356status_t
2357Inode::Sync()
2358{
2359	if (FileCache())
2360		return file_cache_sync(FileCache());
2361
2362	// We may also want to flush the attribute's data stream to
2363	// disk here... (do we?)
2364
2365	if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0)
2366		return B_OK;
2367
2368	InodeReadLocker locker(this);
2369
2370	data_stream* data = &Node().data;
2371	status_t status = B_OK;
2372
2373	// flush direct range
2374
2375	for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
2376		if (data->direct[i].IsZero())
2377			return B_OK;
2378
2379		status = block_cache_sync_etc(fVolume->BlockCache(),
2380			fVolume->ToBlock(data->direct[i]), data->direct[i].Length());
2381		if (status != B_OK)
2382			return status;
2383	}
2384
2385	// flush indirect range
2386
2387	if (data->max_indirect_range == 0)
2388		return B_OK;
2389
2390	CachedBlock cached(fVolume);
2391	off_t block = fVolume->ToBlock(data->indirect);
2392	int32 count = fVolume->BlockSize() / sizeof(block_run);
2393
2394	for (int32 j = 0; j < data->indirect.Length(); j++) {
2395		block_run* runs = (block_run*)cached.SetTo(block + j);
2396		if (runs == NULL)
2397			break;
2398
2399		for (int32 i = 0; i < count; i++) {
2400			if (runs[i].IsZero())
2401				return B_OK;
2402
2403			status = block_cache_sync_etc(fVolume->BlockCache(),
2404				fVolume->ToBlock(runs[i]), runs[i].Length());
2405			if (status != B_OK)
2406				return status;
2407		}
2408	}
2409
2410	// flush double indirect range
2411
2412	if (data->max_double_indirect_range == 0)
2413		return B_OK;
2414
2415	off_t indirectBlock = fVolume->ToBlock(data->double_indirect);
2416
2417	for (int32 l = 0; l < data->double_indirect.Length(); l++) {
2418		block_run* indirectRuns = (block_run*)cached.SetTo(indirectBlock + l);
2419		if (indirectRuns == NULL)
2420			return B_FILE_ERROR;
2421
2422		CachedBlock directCached(fVolume);
2423
2424		for (int32 k = 0; k < count; k++) {
2425			if (indirectRuns[k].IsZero())
2426				return B_OK;
2427
2428			block = fVolume->ToBlock(indirectRuns[k]);
2429			for (int32 j = 0; j < indirectRuns[k].Length(); j++) {
2430				block_run* runs = (block_run*)directCached.SetTo(block + j);
2431				if (runs == NULL)
2432					return B_FILE_ERROR;
2433
2434				for (int32 i = 0; i < count; i++) {
2435					if (runs[i].IsZero())
2436						return B_OK;
2437
2438					// TODO: combine single block_runs to bigger ones when
2439					// they are adjacent
2440					status = block_cache_sync_etc(fVolume->BlockCache(),
2441						fVolume->ToBlock(runs[i]), runs[i].Length());
2442					if (status != B_OK)
2443						return status;
2444				}
2445			}
2446		}
2447	}
2448	return B_OK;
2449}
2450
2451
2452//	#pragma mark - TransactionListener implementation
2453
2454
2455void
2456Inode::TransactionDone(bool success)
2457{
2458	if (!success) {
2459		// Revert any changes made to the cached bfs_inode
2460		// TODO: return code gets eaten
2461		UpdateNodeFromDisk();
2462	}
2463}
2464
2465
2466void
2467Inode::RemovedFromTransaction()
2468{
2469	Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_TRANSACTION);
2470
2471	// See AddInode() why we do this here
2472	if ((Flags() & INODE_DELETED) != 0)
2473		fVolume->RemovedInodes().Add(this);
2474
2475	rw_lock_write_unlock(&Lock());
2476
2477	if (!fVolume->IsInitializing())
2478		put_vnode(fVolume->FSVolume(), ID());
2479}
2480
2481
2482//	#pragma mark - creation/deletion
2483
2484
2485status_t
2486Inode::Remove(Transaction& transaction, const char* name, ino_t* _id,
2487	bool isDirectory, bool force)
2488{
2489	if (fTree == NULL)
2490		RETURN_ERROR(B_BAD_VALUE);
2491
2492	WriteLockInTransaction(transaction);
2493
2494	// does the file even exist?
2495	off_t id;
2496	if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) < B_OK)
2497		return B_ENTRY_NOT_FOUND;
2498
2499	if (_id)
2500		*_id = id;
2501
2502	Vnode vnode(fVolume, id);
2503	Inode* inode;
2504	status_t status = vnode.Get(&inode);
2505	if (status < B_OK) {
2506		REPORT_ERROR(status);
2507		return B_ENTRY_NOT_FOUND;
2508	}
2509
2510	T(Remove(inode, name));
2511	inode->WriteLockInTransaction(transaction);
2512
2513	// Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR
2514	// bit is set for indices in BFS, not for attribute directories) - but you
2515	// should really be able to do whatever you want with your indices
2516	// without having to remove all files first :)
2517	if (!inode->IsIndex() && !force) {
2518		// if it's not of the correct type, don't delete it!
2519		if (inode->IsContainer() != isDirectory)
2520			return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY;
2521
2522		// only delete empty directories
2523		if (isDirectory && !inode->IsEmpty())
2524			return B_DIRECTORY_NOT_EMPTY;
2525	}
2526
2527	// remove_vnode() allows the inode to be accessed until the last put_vnode()
2528	status = remove_vnode(fVolume->FSVolume(), id);
2529	if (status != B_OK)
2530		return status;
2531
2532	if (fTree->Remove(transaction, name, id) != B_OK && !force) {
2533		unremove_vnode(fVolume->FSVolume(), id);
2534		RETURN_ERROR(B_ERROR);
2535	}
2536
2537#ifdef DEBUG
2538	if (fTree->Find((uint8*)name, (uint16)strlen(name), &id) == B_OK) {
2539		DIE(("deleted entry still there"));
2540	}
2541#endif
2542
2543	ContainerContentsChanged(transaction);
2544
2545	// update the inode, so that no one will ever doubt it's deleted :-)
2546	inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED);
2547	inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE);
2548
2549	// In balance to the Inode::Create() method, the main indices
2550	// are updated here (name, size, & last_modified)
2551
2552	Index index(fVolume);
2553	if (inode->InNameIndex()) {
2554		index.RemoveName(transaction, name, inode);
2555			// If removing from the index fails, it is not regarded as a
2556			// fatal error and will not be reported back!
2557			// Deleted inodes won't be visible in queries anyway.
2558	}
2559
2560	if (inode->InSizeIndex())
2561		index.RemoveSize(transaction, inode);
2562	if (inode->InLastModifiedIndex())
2563		index.RemoveLastModified(transaction, inode);
2564
2565	return inode->WriteBack(transaction);
2566}
2567
2568
2569/*!	Creates the inode with the specified \a parent directory, and automatically
2570	adds the created inode to that parent directory. If an attribute directory
2571	is created, it will also automatically  be added to the \a parent inode as
2572	such. However, the indices root node, and the regular root node won't be
2573	added to the superblock.
2574	It will also create the initial B+tree for the inode if it's a directory
2575	of any kind.
2576	\a name may be \c NULL, but only if no \a parent is given.
2577	If the "_id" or "_inode" variable is given and non-NULL to store the
2578	inode's ID, the inode stays locked - you have to call put_vnode() if you
2579	don't use it anymore.
2580	If the node already exists, this method will fail if \c O_EXCL is set, or
2581	it's a directory or a symlink. Otherwise, it will just be returned.
2582	If \c O_TRUNC has been specified, the file will also be truncated.
2583*/
2584status_t
2585Inode::Create(Transaction& transaction, Inode* parent, const char* name,
2586	int32 mode, int openMode, uint32 type, bool* _created, ino_t* _id,
2587	Inode** _inode, fs_vnode_ops* vnodeOps, uint32 publishFlags)
2588{
2589	FUNCTION_START(("name = %s, mode = %ld\n", name, mode));
2590
2591	block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0);
2592	Volume* volume = transaction.GetVolume();
2593	BPlusTree* tree = NULL;
2594
2595	if (parent != NULL && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) {
2596		// check if the file already exists in the directory
2597		tree = parent->Tree();
2598	}
2599
2600	if (parent != NULL) {
2601		// the parent directory is locked during the whole inode creation
2602		parent->WriteLockInTransaction(transaction);
2603	}
2604
2605	if (parent != NULL && !volume->IsInitializing() && parent->IsContainer()) {
2606		// don't create anything in removed directories
2607		bool removed;
2608		if (get_vnode_removed(volume->FSVolume(), parent->ID(), &removed)
2609				== B_OK && removed) {
2610			RETURN_ERROR(B_ENTRY_NOT_FOUND);
2611		}
2612	}
2613
2614	if (tree != NULL) {
2615		// Does the file already exist?
2616		off_t offset;
2617		if (tree->Find((uint8*)name, (uint16)strlen(name), &offset) == B_OK) {
2618			// Return if the file should be a directory/link or opened in
2619			// exclusive mode
2620			if (S_ISDIR(mode) || S_ISLNK(mode) || (openMode & O_EXCL) != 0)
2621				return B_FILE_EXISTS;
2622
2623			Vnode vnode(volume, offset);
2624			Inode* inode;
2625			status_t status = vnode.Get(&inode);
2626			if (status != B_OK) {
2627				REPORT_ERROR(status);
2628				return B_ENTRY_NOT_FOUND;
2629			}
2630
2631			if (inode->IsDirectory() && (openMode & O_RWMASK) != O_RDONLY)
2632				return B_IS_A_DIRECTORY;
2633			if ((openMode & O_DIRECTORY) != 0 && !inode->IsDirectory())
2634				return B_NOT_A_DIRECTORY;
2635
2636			// we want to open the file, so we should have the rights to do so
2637			if (inode->CheckPermissions(open_mode_to_access(openMode)
2638					| ((openMode & O_TRUNC) != 0 ? W_OK : 0)) != B_OK)
2639				return B_NOT_ALLOWED;
2640
2641			if ((openMode & O_TRUNC) != 0) {
2642				// truncate the existing file
2643				inode->WriteLockInTransaction(transaction);
2644
2645				status_t status = inode->SetFileSize(transaction, 0);
2646				if (status == B_OK)
2647					status = inode->WriteBack(transaction);
2648
2649				if (status != B_OK)
2650					return status;
2651			}
2652
2653			if (_created)
2654				*_created = false;
2655			if (_id)
2656				*_id = inode->ID();
2657			if (_inode)
2658				*_inode = inode;
2659
2660			// Only keep the vnode in memory if the _id or _inode pointer is
2661			// provided
2662			if (_id != NULL || _inode != NULL)
2663				vnode.Keep();
2664
2665			return B_OK;
2666		}
2667	} else if (parent != NULL && (mode & S_ATTR_DIR) == 0) {
2668		return B_BAD_VALUE;
2669	} else if ((openMode & O_DIRECTORY) != 0) {
2670		// TODO: we might need to return B_NOT_A_DIRECTORY here
2671		return B_ENTRY_NOT_FOUND;
2672	}
2673
2674	status_t status;
2675
2676	// do we have the power to create new files at all?
2677	if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK)
2678		RETURN_ERROR(status);
2679
2680	// allocate space for the new inode
2681	InodeAllocator allocator(transaction);
2682	block_run run;
2683	Inode* inode;
2684	status = allocator.New(&parentRun, mode, publishFlags, run, vnodeOps,
2685		&inode);
2686	if (status < B_OK)
2687		return status;
2688
2689	T(Create(inode, parent, name, mode, openMode, type));
2690
2691	// Initialize the parts of the bfs_inode structure that
2692	// InodeAllocator::New() hasn't touched yet
2693
2694	bfs_inode* node = &inode->Node();
2695
2696	if (parent == NULL) {
2697		// we set the parent to itself in this case
2698		// (only happens for the root and indices node)
2699		node->parent = run;
2700	} else
2701		node->parent = parentRun;
2702
2703	node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid());
2704	node->gid = HOST_ENDIAN_TO_BFS_INT32(parent
2705		? parent->Node().GroupID() : getegid());
2706		// the group ID is inherited from the parent, if available
2707
2708	node->type = HOST_ENDIAN_TO_BFS_INT32(type);
2709
2710	inode->WriteBack(transaction);
2711		// make sure the initialized node is available to others
2712
2713	// only add the name to regular files, directories, or symlinks
2714	// don't add it to attributes, or indices
2715	if (tree && inode->IsRegularNode()
2716		&& inode->SetName(transaction, name) != B_OK)
2717		return B_ERROR;
2718
2719	// Initialize b+tree if it's a directory (and add "." & ".." if it's
2720	// a standard directory for files - not for attributes or indices)
2721	if (inode->IsContainer()) {
2722		status = allocator.CreateTree();
2723		if (status != B_OK)
2724			return status;
2725	}
2726
2727	// Add a link to the inode from the parent, depending on its type
2728	// (the vnode is not published yet, so it is safe to make the inode
2729	// accessable to the file system here)
2730	if (tree != NULL) {
2731		status = tree->Insert(transaction, name, inode->ID());
2732	} else if (parent != NULL && (mode & S_ATTR_DIR) != 0) {
2733		parent->Attributes() = run;
2734		status = parent->WriteBack(transaction);
2735	}
2736
2737	// Note, we only care if the inode could be made accessable for the
2738	// two cases above; the root node or the indices root node must
2739	// handle this case on their own (or other cases where "parent" is
2740	// NULL)
2741	if (status != B_OK)
2742		RETURN_ERROR(status);
2743
2744	// Update the main indices (name, size & last_modified)
2745	// (live queries might want to access us after this)
2746
2747	Index index(volume);
2748	if (inode->InNameIndex() && name != NULL) {
2749		// the name index only contains regular files
2750		// (but not the root node where name == NULL)
2751		status = index.InsertName(transaction, name, inode);
2752		if (status != B_OK && status != B_BAD_INDEX) {
2753			// We have to remove the node from the parent at this point,
2754			// because the InodeAllocator destructor can't handle this
2755			// case (and if it fails, we can't do anything about it...)
2756			if (tree)
2757				tree->Remove(transaction, name, inode->ID());
2758			else if (parent != NULL && (mode & S_ATTR_DIR) != 0)
2759				parent->Node().attributes.SetTo(0, 0, 0);
2760
2761			RETURN_ERROR(status);
2762		}
2763	}
2764
2765	if (parent != NULL && parent->IsContainer())
2766		parent->ContainerContentsChanged(transaction);
2767
2768	inode->UpdateOldLastModified();
2769
2770	// The "size" & "last_modified" indices don't contain directories.
2771	// If adding to these indices fails, the inode creation will not be
2772	// harmed; they are considered less important than the "name" index.
2773	if (inode->InSizeIndex())
2774		index.InsertSize(transaction, inode);
2775	if (inode->InLastModifiedIndex())
2776		index.InsertLastModified(transaction, inode);
2777
2778	if (inode->NeedsFileCache()) {
2779		inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(),
2780			inode->Size()));
2781		inode->SetMap(file_map_create(volume->ID(), inode->ID(),
2782			inode->Size()));
2783
2784		if (inode->FileCache() == NULL || inode->Map() == NULL)
2785			return B_NO_MEMORY;
2786	}
2787
2788	// Everything worked well until this point, we have a fully
2789	// initialized inode, and we want to keep it
2790	allocator.Keep(vnodeOps, publishFlags);
2791
2792	if (_created)
2793		*_created = true;
2794	if (_id != NULL)
2795		*_id = inode->ID();
2796	if (_inode != NULL)
2797		*_inode = inode;
2798
2799	// if either _id or _inode is passed, we will keep the inode locked
2800	if (_id == NULL && _inode == NULL)
2801		put_vnode(volume->FSVolume(), inode->ID());
2802
2803	return B_OK;
2804}
2805
2806
2807//	#pragma mark - AttributeIterator
2808
2809
2810AttributeIterator::AttributeIterator(Inode* inode)
2811	:
2812	fCurrentSmallData(0),
2813	fInode(inode),
2814	fAttributes(NULL),
2815	fIterator(NULL),
2816	fBuffer(NULL)
2817{
2818	inode->_AddIterator(this);
2819}
2820
2821
2822AttributeIterator::~AttributeIterator()
2823{
2824	if (fAttributes)
2825		put_vnode(fAttributes->GetVolume()->FSVolume(), fAttributes->ID());
2826
2827	delete fIterator;
2828	fInode->_RemoveIterator(this);
2829}
2830
2831
2832status_t
2833AttributeIterator::Rewind()
2834{
2835	fCurrentSmallData = 0;
2836
2837	if (fIterator != NULL)
2838		fIterator->Rewind();
2839
2840	return B_OK;
2841}
2842
2843
2844status_t
2845AttributeIterator::GetNext(char* name, size_t* _length, uint32* _type,
2846	ino_t* _id)
2847{
2848	// read attributes out of the small data section
2849
2850	if (fCurrentSmallData >= 0) {
2851		NodeGetter nodeGetter(fInode->GetVolume(), fInode);
2852		if (nodeGetter.Node() == NULL)
2853			return B_IO_ERROR;
2854
2855		const bfs_inode* node = nodeGetter.Node();
2856		const small_data* item = ((bfs_inode*)node)->SmallDataStart();
2857
2858		RecursiveLocker _(&fInode->SmallDataLock());
2859
2860		int32 index = 0;
2861		for (; !item->IsLast(node); item = item->Next(), index++) {
2862			if (item->NameSize() == FILE_NAME_NAME_LENGTH
2863				&& *item->Name() == FILE_NAME_NAME)
2864				continue;
2865
2866			if (index >= fCurrentSmallData)
2867				break;
2868		}
2869
2870		if (!item->IsLast(node)) {
2871			strncpy(name, item->Name(), B_FILE_NAME_LENGTH);
2872			*_type = item->Type();
2873			*_length = item->NameSize();
2874			*_id = (ino_t)index;
2875
2876			fCurrentSmallData = index + 1;
2877			return B_OK;
2878		}
2879
2880		// stop traversing the small_data section
2881		fCurrentSmallData = -1;
2882	}
2883
2884	// read attributes out of the attribute directory
2885
2886	if (fInode->Attributes().IsZero())
2887		return B_ENTRY_NOT_FOUND;
2888
2889	Volume* volume = fInode->GetVolume();
2890
2891	// if you haven't yet access to the attributes directory, get it
2892	if (fAttributes == NULL) {
2893		if (get_vnode(volume->FSVolume(), volume->ToVnode(fInode->Attributes()),
2894				(void**)&fAttributes) != B_OK) {
2895			FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t"
2896				" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2897			return B_ENTRY_NOT_FOUND;
2898		}
2899
2900		BPlusTree* tree = fAttributes->Tree();
2901		if (tree == NULL
2902			|| (fIterator = new(std::nothrow) TreeIterator(tree)) == NULL) {
2903			FATAL(("could not get tree in AttributeIterator::GetNext(ino_t"
2904				" = %" B_PRIdINO ",name = \"%s\")\n", fInode->ID(), name));
2905			return B_ENTRY_NOT_FOUND;
2906		}
2907	}
2908
2909	uint16 length;
2910	ino_t id;
2911	status_t status = fIterator->GetNextEntry(name, &length,
2912		B_FILE_NAME_LENGTH, &id);
2913	if (status != B_OK)
2914		return status;
2915
2916	Vnode vnode(volume, id);
2917	Inode* attribute;
2918	if ((status = vnode.Get(&attribute)) == B_OK) {
2919		*_type = attribute->Type();
2920		*_length = length;
2921		*_id = id;
2922	}
2923
2924	return status;
2925}
2926
2927
2928void
2929AttributeIterator::Update(uint16 index, int8 change)
2930{
2931	// fCurrentSmallData points already to the next item
2932	if (index < fCurrentSmallData)
2933		fCurrentSmallData += change;
2934}
2935
2936