OpenHashTable.h revision 5d4501aa
1/*
2Open Tracker License
3
4Terms and Conditions
5
6Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a copy of
9this software and associated documentation files (the "Software"), to deal in
10the Software without restriction, including without limitation the rights to
11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12of the Software, and to permit persons to whom the Software is furnished to do
13so, subject to the following conditions:
14
15The above copyright notice and this permission notice applies to all licensees
16and shall be included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25Except as contained in this notice, the name of Be Incorporated shall not be
26used in advertising or otherwise to promote the sale, use or other dealings in
27this Software without prior written authorization from Be Incorporated.
28
29Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30of Be Incorporated in the United States and other countries. Other brand product
31names are registered trademarks or trademarks of their respective holders.
32All rights reserved.
33*/
34
35// bonefish:
36// * removed need for exceptions
37// * fixed warnings
38// * implemented rehashing
39// * added RemoveAll()
40// TODO:
41// * shrinking of element vectors
42
43// Hash table with open addresssing
44
45#ifndef __OPEN_HASH_TABLE__
46#define __OPEN_HASH_TABLE__
47
48#include <stdlib.h>
49#include <new>
50
51// don't include <Debug.h>
52#ifndef _OPEN_HASH_TABLE_ASSERT
53#	define _OPEN_HASH_TABLE_ASSERT(E)	(void)0
54#endif
55#ifndef _OPEN_HASH_TABLE_TRESPASS
56#	define _OPEN_HASH_TABLE_TRESPASS()	(void)0
57#endif
58
59namespace BPrivate {
60
61template <class Element>
62class ElementVector {
63	// element vector for OpenHashTable needs to implement this
64	// interface
65public:
66	Element &At(int32 index);
67	Element *Add();
68	int32 IndexOf(const Element &) const;
69	void Remove(int32 index);
70};
71
72class OpenHashElement {
73public:
74	uint32 Hash() const;
75	bool operator==(const OpenHashElement &) const;
76	void Adopt(OpenHashElement &);
77		// low overhead copy, original element is in undefined state
78		// after call (calls Adopt on BString members, etc.)
79	int32 fNext;
80};
81
82const uint32 kPrimes [] = {
83	509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139,
84	524287, 1048573, 2097143, 4194301, 8388593, 16777213, 33554393, 67108859,
85	134217689, 268435399, 536870909, 1073741789, 2147483647, 0
86};
87
88template <class Element, class ElementVec = ElementVector<Element> >
89class OpenHashTable {
90public:
91	OpenHashTable(int32 minSize, ElementVec *elementVector = 0,
92				  float maxLoadFactor = 0.8);
93		// it is up to the subclass of OpenHashTable to supply
94		// elementVector
95	~OpenHashTable();
96
97	bool InitCheck() const;
98
99	void SetElementVector(ElementVec *elementVector);
100
101	Element *FindFirst(uint32 elementHash) const;
102	Element *Add(uint32 elementHash);
103
104	void Remove(Element *element, bool dontRehash = false);
105	void RemoveAll();
106
107	// when calling Add, any outstanding element pointer may become
108	// invalid; to deal with this, get the element index and restore
109	// it after the add
110	int32 ElementIndex(const Element *) const;
111	Element *ElementAt(int32 index) const;
112
113	int32 ArraySize() const;
114	int32 VectorSize() const;
115	int32 CountElements() const;
116
117protected:
118	static int32 OptimalSize(int32 minSize);
119
120private:
121	bool _RehashIfNeeded();
122	bool _Rehash();
123
124	int32 fArraySize;
125	int32 fInitialSize;
126	int32 fElementCount;
127	int32 *fHashArray;
128	ElementVec *fElementVector;
129	float fMaxLoadFactor;
130};
131
132template <class Element>
133class OpenHashElementArray : public ElementVector<Element> {
134	// this is a straightforward implementation of an element vector
135	// deleting is handled by linking deleted elements into a free list
136	// the vector never shrinks
137public:
138	OpenHashElementArray(int32 initialSize);
139	~OpenHashElementArray();
140
141	bool InitCheck() const;
142
143	Element &At(int32 index);
144	const Element &At(int32 index) const;
145	Element *Add(const Element &);
146	Element *Add();
147	void Remove(int32 index);
148	int32 IndexOf(const Element &) const;
149	int32 Size() const;
150
151private:
152	Element *fData;
153	int32 fSize;
154	int32 fNextFree;
155	int32 fNextDeleted;
156};
157
158
159//-----------------------------------
160
161template<class Element, class ElementVec>
162OpenHashTable<Element, ElementVec>::OpenHashTable(int32 minSize,
163	ElementVec *elementVector, float maxLoadFactor)
164	:	fArraySize(OptimalSize(minSize)),
165		fInitialSize(fArraySize),
166		fElementCount(0),
167		fElementVector(elementVector),
168		fMaxLoadFactor(maxLoadFactor)
169{
170	// sanity check the maximal load factor
171	if (fMaxLoadFactor < 0.5)
172		fMaxLoadFactor = 0.5;
173	// allocate and init the array
174	fHashArray = (int32*)calloc(fArraySize, sizeof(int32));
175	if (fHashArray) {
176		for (int32 index = 0; index < fArraySize; index++)
177			fHashArray[index] = -1;
178	}
179}
180
181template<class Element, class ElementVec>
182OpenHashTable<Element, ElementVec>::~OpenHashTable()
183{
184	RemoveAll();
185	free(fHashArray);
186}
187
188template<class Element, class ElementVec>
189bool
190OpenHashTable<Element, ElementVec>::InitCheck() const
191{
192	return (fHashArray && fElementVector);
193}
194
195template<class Element, class ElementVec>
196int32
197OpenHashTable<Element, ElementVec>::OptimalSize(int32 minSize)
198{
199	for (int32 index = 0; ; index++)
200		if (!kPrimes[index] || kPrimes[index] >= (uint32)minSize)
201			return (int32)kPrimes[index];
202
203	return 0;
204}
205
206template<class Element, class ElementVec>
207Element *
208OpenHashTable<Element, ElementVec>::FindFirst(uint32 hash) const
209{
210	_OPEN_HASH_TABLE_ASSERT(fElementVector);
211	hash %= fArraySize;
212	if (fHashArray[hash] < 0)
213		return 0;
214
215	return &fElementVector->At(fHashArray[hash]);
216}
217
218template<class Element, class ElementVec>
219int32
220OpenHashTable<Element, ElementVec>::ElementIndex(const Element *element) const
221{
222	return fElementVector->IndexOf(*element);
223}
224
225template<class Element, class ElementVec>
226Element *
227OpenHashTable<Element, ElementVec>::ElementAt(int32 index) const
228{
229	return &fElementVector->At(index);
230}
231
232template<class Element, class ElementVec>
233int32
234OpenHashTable<Element, ElementVec>::ArraySize() const
235{
236	return fArraySize;
237}
238
239template<class Element, class ElementVec>
240int32
241OpenHashTable<Element, ElementVec>::VectorSize() const
242{
243	return fElementVector->Size();
244}
245
246template<class Element, class ElementVec>
247int32
248OpenHashTable<Element, ElementVec>::CountElements() const
249{
250	return fElementCount;
251}
252
253
254template<class Element, class ElementVec>
255Element *
256OpenHashTable<Element, ElementVec>::Add(uint32 hash)
257{
258	_OPEN_HASH_TABLE_ASSERT(fElementVector);
259	_RehashIfNeeded();
260	hash %= fArraySize;
261	Element *result = fElementVector->Add();
262	if (result) {
263		result->fNext = fHashArray[hash];
264		fHashArray[hash] = fElementVector->IndexOf(*result);
265		fElementCount++;
266	}
267	return result;
268}
269
270template<class Element, class ElementVec>
271void
272OpenHashTable<Element, ElementVec>::Remove(Element *element, bool dontRehash)
273{
274	if (!dontRehash)
275		_RehashIfNeeded();
276	uint32 hash = element->Hash() % fArraySize;
277	int32 next = fHashArray[hash];
278	_OPEN_HASH_TABLE_ASSERT(next >= 0);
279
280	if (&fElementVector->At(next) == element) {
281		fHashArray[hash] = element->fNext;
282		fElementVector->Remove(next);
283		fElementCount--;
284		return;
285	}
286
287	for (int32 index = next; index >= 0; ) {
288		// look for an existing match in table
289		next = fElementVector->At(index).fNext;
290		if (next < 0) {
291			_OPEN_HASH_TABLE_TRESPASS();
292			return;
293		}
294
295		if (&fElementVector->At(next) == element) {
296			fElementVector->At(index).fNext = element->fNext;
297			fElementVector->Remove(next);
298			fElementCount--;
299			return;
300		}
301		index = next;
302	}
303}
304
305template<class Element, class ElementVec>
306void
307OpenHashTable<Element, ElementVec>::RemoveAll()
308{
309	for (int32 i = 0; fElementCount > 0 && i < fArraySize; i++) {
310		int32 index = fHashArray[i];
311		while (index >= 0) {
312			Element* element = &fElementVector->At(index);
313			int32 next = element->fNext;
314			fElementVector->Remove(index);
315			fElementCount--;
316			index = next;
317		}
318		fHashArray[i] = -1;
319	}
320	_RehashIfNeeded();
321}
322
323template<class Element, class ElementVec>
324void
325OpenHashTable<Element, ElementVec>::SetElementVector(ElementVec *elementVector)
326{
327	fElementVector = elementVector;
328}
329
330// _RehashIfNeeded
331template<class Element, class ElementVec>
332bool
333OpenHashTable<Element, ElementVec>::_RehashIfNeeded()
334{
335	// The load factor range [fMaxLoadFactor / 3, fMaxLoadFactor] is fine,
336	// I think. After rehashing the load factor will be about
337	// fMaxLoadFactor * 2 / 3, respectively fMaxLoadFactor / 2.
338	float loadFactor = (float)fElementCount / (float)fArraySize;
339	if (loadFactor > fMaxLoadFactor
340		|| (fArraySize > fInitialSize && loadFactor < fMaxLoadFactor / 3)) {
341		return _Rehash();
342	}
343	return true;
344}
345
346// _Rehash
347template<class Element, class ElementVec>
348bool
349OpenHashTable<Element, ElementVec>::_Rehash()
350{
351	bool result = true;
352	int32 newSize = int32(fElementCount * 1.73 * fMaxLoadFactor);
353	newSize = (fInitialSize > newSize ? fInitialSize : newSize);
354	if (newSize != fArraySize) {
355		// allocate a new array
356		int32 *newHashArray = (int32*)calloc(newSize, sizeof(int32));
357		if (newHashArray) {
358			// init the new hash array
359			for (int32 index = 0; index < newSize; index++)
360				newHashArray[index] = -1;
361			// iterate through all elements and put them into the new
362			// hash array
363			for (int i = 0; i < fArraySize; i++) {
364				int32 index = fHashArray[i];
365				while (index >= 0) {
366					// insert the element in the new array
367					Element &element = fElementVector->At(index);
368					int32 next = element.fNext;
369					uint32 hash = (element.Hash() % newSize);
370					element.fNext = newHashArray[hash];
371					newHashArray[hash] = index;
372					// next element in old list
373					index = next;
374				}
375			}
376			// delete the old array and set the new one
377			free(fHashArray);
378			fHashArray = newHashArray;
379			fArraySize = newSize;
380		} else
381			result = false;
382	}
383	return result;
384}
385
386
387template<class Element>
388OpenHashElementArray<Element>::OpenHashElementArray(int32 initialSize)
389	:	fSize(initialSize),
390		fNextFree(0),
391		fNextDeleted(-1)
392{
393	fData = (Element*)calloc((size_t)initialSize, sizeof(Element));
394}
395
396template<class Element>
397OpenHashElementArray<Element>::~OpenHashElementArray()
398{
399	free(fData);
400}
401
402template<class Element>
403bool
404OpenHashElementArray<Element>::InitCheck() const
405{
406	return fData;
407}
408
409template<class Element>
410Element &
411OpenHashElementArray<Element>::At(int32 index)
412{
413	_OPEN_HASH_TABLE_ASSERT(index < fSize);
414	return fData[index];
415}
416
417template<class Element>
418const Element &
419OpenHashElementArray<Element>::At(int32 index) const
420{
421	_OPEN_HASH_TABLE_ASSERT(index < fSize);
422	return fData[index];
423}
424
425template<class Element>
426int32
427OpenHashElementArray<Element>::IndexOf(const Element &element) const
428{
429	int32 result = &element - fData;
430	if (result < 0 || result > fSize)
431		return -1;
432
433	return result;
434}
435
436template<class Element>
437int32
438OpenHashElementArray<Element>::Size() const
439{
440	return fSize;
441}
442
443
444template<class Element>
445Element *
446OpenHashElementArray<Element>::Add(const Element &newElement)
447{
448	Element *element = Add();
449	if (element)
450		element->Adopt(newElement);
451	return element;
452}
453
454#if DEBUG
455const int32 kGrowChunk = 10;
456#else
457const int32 kGrowChunk = 1024;
458#endif
459
460template<class Element>
461Element *
462OpenHashElementArray<Element>::Add()
463{
464	int32 index = fNextFree;
465	if (fNextDeleted >= 0) {
466		index = fNextDeleted;
467		fNextDeleted = At(index).fNext;
468	} else if (fNextFree >= fSize - 1) {
469		int32 newSize = fSize + kGrowChunk;
470/*
471		Element *newData = (Element *)calloc((size_t)newSize , sizeof(Element));
472		if (!newData)
473			return NULL;
474		memcpy(newData, fData, fSize * sizeof(Element));
475		free(fData);
476*/
477		Element *newData = (Element*)realloc(fData,
478			(size_t)newSize * sizeof(Element));
479		if (!newData)
480			return NULL;
481
482		fData = newData;
483		fSize = newSize;
484		index = fNextFree;
485		fNextFree++;
486	} else
487		fNextFree++;
488
489	new (&At(index)) Element;
490		// call placement new to initialize the element properly
491	_OPEN_HASH_TABLE_ASSERT(At(index).fNext == -1);
492
493	return &At(index);
494}
495
496template<class Element>
497void
498OpenHashElementArray<Element>::Remove(int32 index)
499{
500	// delete by chaining empty elements in a single linked
501	// list, reusing the next field
502	_OPEN_HASH_TABLE_ASSERT(index < fSize);
503	At(index).~Element();
504		// call the destructor explicitly to destroy the element
505		// properly
506	At(index).fNext = fNextDeleted;
507	fNextDeleted = index;
508}
509
510} // namespace BPrivate
511
512using BPrivate::OpenHashTable;
513
514#endif // __OPEN_HASH_TABLE__
515