1/*
2 * Copyright 2001-2015, Axel D��rfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5#ifndef B_PLUS_TREE_H
6#define B_PLUS_TREE_H
7
8
9#include "bfs.h"
10
11#include "Utility.h"
12
13#if !_BOOT_MODE
14#include "Journal.h"
15class Inode;
16#else
17#define Inode BFS::Stream
18
19namespace BFS {
20
21class Stream;
22class Transaction;
23class TransactionListener {};
24#endif // _BOOT_MODE
25
26//	#pragma mark - on-disk structures
27
28struct bplustree_node;
29
30#define BPLUSTREE_NULL			-1LL
31#define BPLUSTREE_FREE			-2LL
32
33struct bplustree_header {
34	uint32		magic;
35	uint32		node_size;
36	uint32		max_number_of_levels;
37	uint32		data_type;
38	int64		root_node_pointer;
39	int64		free_node_pointer;
40	int64		maximum_size;
41
42	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45	off_t RootNode() const
46		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47	off_t FreeNode() const
48		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50	uint32 MaxNumberOfLevels() const
51		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52
53	inline bool CheckNode(const bplustree_node* node) const;
54	inline bool IsValidLink(off_t link) const;
55	bool IsValid() const;
56} _PACKED;
57
58#define BPLUSTREE_MAGIC 			0x69f6c2e8
59#define BPLUSTREE_NODE_SIZE 		1024
60#define BPLUSTREE_MAX_KEY_LENGTH	256
61#define BPLUSTREE_MIN_KEY_LENGTH	1
62
63enum bplustree_types {
64	BPLUSTREE_STRING_TYPE	= 0,
65	BPLUSTREE_INT32_TYPE	= 1,
66	BPLUSTREE_UINT32_TYPE	= 2,
67	BPLUSTREE_INT64_TYPE	= 3,
68	BPLUSTREE_UINT64_TYPE	= 4,
69	BPLUSTREE_FLOAT_TYPE	= 5,
70	BPLUSTREE_DOUBLE_TYPE	= 6
71};
72
73
74struct duplicate_array;
75
76
77struct bplustree_node {
78			int64				left_link;
79			int64				right_link;
80			int64				overflow_link;
81			uint16				all_key_count;
82			uint16				all_key_length;
83
84			off_t				LeftLink() const
85									{ return BFS_ENDIAN_TO_HOST_INT64(
86										left_link); }
87			off_t				RightLink() const
88									{ return BFS_ENDIAN_TO_HOST_INT64(
89										right_link); }
90			off_t				OverflowLink() const
91									{ return BFS_ENDIAN_TO_HOST_INT64(
92										overflow_link); }
93			uint16				NumKeys() const
94									{ return BFS_ENDIAN_TO_HOST_INT16(
95										all_key_count); }
96			uint16				AllKeyLength() const
97									{ return BFS_ENDIAN_TO_HOST_INT16(
98										all_key_length); }
99
100	inline	uint16*				KeyLengths() const;
101	inline	off_t*				Values() const;
102	inline	uint8*				Keys() const;
103	inline	int32				Used() const;
104			uint8*				KeyAt(int32 index, uint16* keyLength) const;
105
106	inline	bool				IsLeaf() const;
107
108			void				Initialize();
109			uint8				CountDuplicates(off_t offset,
110									bool isFragment) const;
111			off_t				DuplicateAt(off_t offset, bool isFragment,
112									int8 index) const;
113			uint32				FragmentsUsed(uint32 nodeSize) const;
114	inline	duplicate_array*	FragmentAt(int8 index) const;
115	inline	duplicate_array*	DuplicateArray() const;
116
117	static inline uint8			LinkType(off_t link);
118	static inline off_t			MakeLink(uint8 type, off_t link,
119									uint32 fragmentIndex = 0);
120	static inline bool			IsDuplicate(off_t link);
121	static inline off_t			FragmentOffset(off_t link);
122	static inline uint32		FragmentIndex(off_t link);
123	static inline uint32		MaxFragments(uint32 nodeSize);
124
125			status_t			CheckIntegrity(uint32 nodeSize) const;
126} _PACKED;
127
128//#define BPLUSTREE_NODE 0
129#define BPLUSTREE_DUPLICATE_NODE 2
130#define BPLUSTREE_DUPLICATE_FRAGMENT 3
131
132#define NUM_FRAGMENT_VALUES 7
133#define NUM_DUPLICATE_VALUES 125
134
135//**************************************
136
137enum bplustree_traversing {
138	BPLUSTREE_FORWARD = 1,
139	BPLUSTREE_BACKWARD = -1,
140
141	BPLUSTREE_BEGIN = 0,
142	BPLUSTREE_END = 1
143};
144
145
146//	#pragma mark - in-memory structures
147
148
149class BPlusTree;
150struct TreeCheck;
151class TreeIterator;
152
153
154#if !_BOOT_MODE
155// needed for searching (utilizing a stack)
156struct node_and_key {
157	off_t	nodeOffset;
158	uint16	keyIndex;
159};
160#endif // !_BOOT_MODE
161
162
163class CachedNode {
164public:
165	CachedNode(BPlusTree* tree)
166		:
167		fTree(tree),
168		fNode(NULL),
169		fOffset(0),
170		fBlockNumber(0),
171		fWritable(false)
172	{
173#if _BOOT_MODE
174		fBlock = NULL;
175#endif
176	}
177
178	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
179		:
180		fTree(tree),
181		fNode(NULL),
182		fOffset(0),
183		fBlockNumber(0),
184		fWritable(false)
185	{
186#if _BOOT_MODE
187		fBlock = NULL;
188#endif
189		SetTo(offset, check);
190	}
191
192	~CachedNode()
193	{
194		Unset();
195#if _BOOT_MODE
196		free(fBlock);
197#endif
198	}
199
200			const bplustree_node* SetTo(off_t offset, bool check = true);
201			status_t			SetTo(off_t offset,
202									const bplustree_node** _node,
203									bool check = true);
204
205			const bplustree_header* SetToHeader();
206			void				Unset();
207
208#if !_BOOT_MODE
209			bplustree_node*		SetToWritable(Transaction& transaction,
210									off_t offset, bool check = true);
211			bplustree_node*		MakeWritable(Transaction& transaction);
212			bplustree_header*	SetToWritableHeader(Transaction& transaction);
213
214			void				UnsetUnchanged(Transaction& transaction);
215
216			status_t			Free(Transaction& transaction, off_t offset);
217			status_t			Allocate(Transaction& transaction,
218									bplustree_node** _node, off_t* _offset);
219#endif // !_BOOT_MODE
220
221			bool				IsWritable() const { return fWritable; }
222			bplustree_node*		Node() const { return fNode; }
223
224protected:
225			bplustree_node*		InternalSetTo(Transaction* transaction,
226									off_t offset);
227
228			BPlusTree*			fTree;
229			bplustree_node*		fNode;
230			off_t				fOffset;
231			off_t				fBlockNumber;
232			bool				fWritable;
233#if _BOOT_MODE
234			uint8*				fBlock;
235#endif
236};
237
238
239class BPlusTree : public TransactionListener {
240public:
241#if !_BOOT_MODE
242								BPlusTree(Transaction& transaction,
243									Inode* stream,
244									int32 nodeSize = BPLUSTREE_NODE_SIZE);
245#endif
246								BPlusTree(Inode* stream);
247								BPlusTree();
248								~BPlusTree();
249
250#if !_BOOT_MODE
251			status_t			SetTo(Transaction& transaction, Inode* stream,
252									int32 nodeSize = BPLUSTREE_NODE_SIZE);
253#endif
254			status_t			SetTo(Inode* stream);
255			status_t			SetStream(Inode* stream);
256
257			status_t			InitCheck();
258
259			size_t				NodeSize() const { return fNodeSize; }
260			Inode*				Stream() const { return fStream; }
261
262#if !_BOOT_MODE
263			status_t			Validate(bool repair, bool& _errorsFound);
264			status_t			MakeEmpty();
265
266			status_t			Remove(Transaction& transaction,
267									const uint8* key, uint16 keyLength,
268									off_t value);
269			status_t			Insert(Transaction& transaction,
270									const uint8* key, uint16 keyLength,
271									off_t value);
272
273			status_t			Remove(Transaction& transaction, const char* key,
274									off_t value);
275			status_t			Insert(Transaction& transaction, const char* key,
276									off_t value);
277			status_t			Insert(Transaction& transaction, int32 key,
278									off_t value);
279			status_t			Insert(Transaction& transaction, uint32 key,
280									off_t value);
281			status_t			Insert(Transaction& transaction, int64 key,
282									off_t value);
283			status_t			Insert(Transaction& transaction, uint64 key,
284									off_t value);
285			status_t			Insert(Transaction& transaction, float key,
286									off_t value);
287			status_t			Insert(Transaction& transaction, double key,
288									off_t value);
289
290			status_t			Replace(Transaction& transaction,
291									const uint8* key, uint16 keyLength,
292									off_t value);
293#endif // !_BOOT_MODE
294
295			status_t			Find(const uint8* key, uint16 keyLength,
296									off_t* value);
297
298#if !_BOOT_MODE
299	static	int32				TypeCodeToKeyType(type_code code);
300	static	int32				ModeToKeyType(mode_t mode);
301
302protected:
303	virtual void				TransactionDone(bool success);
304	virtual void				RemovedFromTransaction();
305#endif // !_BOOT_MODE
306
307private:
308								BPlusTree(const BPlusTree& other);
309								BPlusTree& operator=(const BPlusTree& other);
310									// no implementation
311
312			int32				_CompareKeys(const void* key1, int keylength1,
313									const void* key2, int keylength2);
314			status_t			_FindKey(const bplustree_node* node,
315									const uint8* key, uint16 keyLength,
316									uint16* index = NULL, off_t* next = NULL);
317#if !_BOOT_MODE
318			status_t			_SeekDown(Stack<node_and_key>& stack,
319									const uint8* key, uint16 keyLength);
320
321			status_t			_FindFreeDuplicateFragment(
322									Transaction& transaction,
323									const bplustree_node* node,
324									CachedNode& cached, off_t* _offset,
325									bplustree_node** _fragment, uint32* _index);
326			status_t			_InsertDuplicate(Transaction& transaction,
327									CachedNode& cached,
328									const bplustree_node* node, uint16 index,
329									off_t value);
330			void				_InsertKey(bplustree_node* node, uint16 index,
331									uint8* key, uint16 keyLength, off_t value);
332			status_t			_SplitNode(bplustree_node* node,
333									off_t nodeOffset, bplustree_node* other,
334									off_t otherOffset, uint16* _keyIndex,
335									uint8* key, uint16* _keyLength,
336									off_t* _value);
337
338			status_t			_RemoveDuplicate(Transaction& transaction,
339									const bplustree_node* node,
340									CachedNode& cached, uint16 keyIndex,
341									off_t value);
342			void				_RemoveKey(bplustree_node* node, uint16 index);
343
344			void				_UpdateIterators(off_t offset, off_t nextOffset,
345									uint16 keyIndex, uint16 splitAt,
346									int8 change);
347			void				_AddIterator(TreeIterator* iterator);
348			void				_RemoveIterator(TreeIterator* iterator);
349
350			status_t			_ValidateChildren(TreeCheck& check,
351									uint32 level, off_t offset,
352									const uint8* largestKey, uint16 keyLength,
353									const bplustree_node* parent);
354			status_t			_ValidateChild(TreeCheck& check,
355									CachedNode& cached, uint32 level,
356									off_t offset, off_t lastOffset,
357									off_t nextOffset, const uint8* key,
358									uint16 keyLength);
359#endif // !_BOOT_MODE
360
361private:
362			friend class TreeIterator;
363			friend class CachedNode;
364			friend struct TreeCheck;
365
366			Inode*				fStream;
367			bplustree_header	fHeader;
368			int32				fNodeSize;
369			bool				fAllowDuplicates;
370			bool				fInTransaction;
371			status_t			fStatus;
372
373#if !_BOOT_MODE
374			mutex				fIteratorLock;
375			SinglyLinkedList<TreeIterator> fIterators;
376#endif
377};
378
379
380//	#pragma mark - helper classes/functions
381
382
383class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
384public:
385								TreeIterator(BPlusTree* tree);
386								~TreeIterator();
387
388			status_t			Goto(int8 to);
389			status_t			Traverse(int8 direction, void* key,
390									uint16* keyLength, uint16 maxLength,
391									off_t* value, uint16* duplicate = NULL);
392			status_t			Find(const uint8* key, uint16 keyLength);
393
394			status_t			Rewind();
395			status_t			GetNextEntry(void* key, uint16* keyLength,
396									uint16 maxLength, off_t* value,
397									uint16* duplicate = NULL);
398			status_t			GetPreviousEntry(void* key, uint16* keyLength,
399									uint16 maxLength, off_t* value,
400									uint16* duplicate = NULL);
401			void				SkipDuplicates();
402
403			BPlusTree*			Tree() const { return fTree; }
404
405#ifdef DEBUG
406			void				Dump();
407#endif
408
409private:
410			friend class BPlusTree;
411
412			// called by BPlusTree
413			void				Update(off_t offset, off_t nextOffset,
414									uint16 keyIndex, uint16 splitAt,
415									int8 change);
416			void				Stop();
417
418private:
419			BPlusTree*			fTree;
420			off_t				fCurrentNodeOffset;
421									// traverse position
422			int32				fCurrentKey;
423			off_t				fDuplicateNode;
424			uint16				fDuplicate;
425			uint16				fNumDuplicates;
426			bool				fIsFragment;
427};
428
429
430//	#pragma mark - BPlusTree's inline functions
431//	(most of them may not be needed)
432
433
434#if !_BOOT_MODE
435inline status_t
436BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
437{
438	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
439		return B_BAD_TYPE;
440	return Remove(transaction, (uint8*)key, strlen(key), value);
441}
442
443
444inline status_t
445BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
446{
447	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
448		return B_BAD_TYPE;
449	return Insert(transaction, (uint8*)key, strlen(key), value);
450}
451
452
453inline status_t
454BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
455{
456	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
457		return B_BAD_TYPE;
458	return Insert(transaction, (uint8*)&key, sizeof(key), value);
459}
460
461
462inline status_t
463BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
464{
465	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
466		return B_BAD_TYPE;
467	return Insert(transaction, (uint8*)&key, sizeof(key), value);
468}
469
470
471inline status_t
472BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
473{
474	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
475		return B_BAD_TYPE;
476	return Insert(transaction, (uint8*)&key, sizeof(key), value);
477}
478
479
480inline status_t
481BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
482{
483	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
484		return B_BAD_TYPE;
485	return Insert(transaction, (uint8*)&key, sizeof(key), value);
486}
487
488
489inline status_t
490BPlusTree::Insert(Transaction& transaction, float key, off_t value)
491{
492	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
493		return B_BAD_TYPE;
494	return Insert(transaction, (uint8*)&key, sizeof(key), value);
495}
496
497
498inline status_t
499BPlusTree::Insert(Transaction& transaction, double key, off_t value)
500{
501	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
502		return B_BAD_TYPE;
503	return Insert(transaction, (uint8*)&key, sizeof(key), value);
504}
505#endif // !_BOOT_MODE
506
507
508//	#pragma mark - TreeIterator inline functions
509
510
511inline status_t
512TreeIterator::Rewind()
513{
514	return Goto(BPLUSTREE_BEGIN);
515}
516
517
518inline status_t
519TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
520	off_t* value, uint16* duplicate)
521{
522	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
523		duplicate);
524}
525
526
527inline status_t
528TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
529	off_t* value, uint16* duplicate)
530{
531	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
532		duplicate);
533}
534
535
536//	#pragma mark - bplustree_header inline functions
537
538
539inline bool
540bplustree_header::CheckNode(const bplustree_node* node) const
541{
542	// sanity checks (links, all_key_count)
543	return IsValidLink(node->LeftLink())
544		&& IsValidLink(node->RightLink())
545		&& IsValidLink(node->OverflowLink())
546		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
547				<= (int8*)node + NodeSize();
548}
549
550
551inline bool
552bplustree_header::IsValidLink(off_t link) const
553{
554	return link == BPLUSTREE_NULL
555		|| (link > 0 && link <= MaximumSize() - NodeSize());
556}
557
558
559//	#pragma mark - bplustree_node inline functions
560
561
562inline uint16*
563bplustree_node::KeyLengths() const
564{
565	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
566		+ AllKeyLength()));
567}
568
569
570inline off_t*
571bplustree_node::Values() const
572{
573	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
574}
575
576
577inline uint8*
578bplustree_node::Keys() const
579{
580	return (uint8*)this + sizeof(bplustree_node);
581}
582
583
584inline int32
585bplustree_node::Used() const
586{
587	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
588		* (sizeof(uint16) + sizeof(off_t));
589}
590
591
592inline bool
593bplustree_node::IsLeaf() const
594{
595	return OverflowLink() == BPLUSTREE_NULL;
596}
597
598
599inline duplicate_array*
600bplustree_node::FragmentAt(int8 index) const
601{
602	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
603}
604
605
606inline duplicate_array*
607bplustree_node::DuplicateArray() const
608{
609	return (duplicate_array*)&overflow_link;
610}
611
612
613inline uint8
614bplustree_node::LinkType(off_t link)
615{
616	return *(uint64*)&link >> 62;
617}
618
619
620inline off_t
621bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
622{
623	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
624		| (fragmentIndex & 0x3ff);
625}
626
627
628inline bool
629bplustree_node::IsDuplicate(off_t link)
630{
631	return (LinkType(link)
632		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
633}
634
635
636inline off_t
637bplustree_node::FragmentOffset(off_t link)
638{
639	return link & 0x3ffffffffffffc00LL;
640}
641
642
643inline uint32
644bplustree_node::FragmentIndex(off_t link)
645{
646	return (uint32)(link & 0x3ff);
647}
648
649
650inline uint32
651bplustree_node::MaxFragments(uint32 nodeSize)
652{
653	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
654}
655
656
657#if _BOOT_MODE
658} // namespace BFS
659
660#undef Inode
661#endif
662
663#endif	// B_PLUS_TREE_H
664