Ticket #8915: hashmap-py5.patch
File hashmap-py5.patch, 21.6 KB (added by , 16 years ago) |
---|
-
common/memorypool.h
32 32 namespace Common { 33 33 34 34 class MemoryPool { 35 pr ivate:35 protected: 36 36 MemoryPool(const MemoryPool&); 37 37 MemoryPool& operator=(const MemoryPool&); 38 39 struct Page { 40 void *start; 41 size_t numChunks; 42 }; 38 43 39 44 size_t _chunkSize; 40 Array<void*> _pages; 41 void* _next; 45 Array<Page> _pages; 46 void *_next; 47 size_t _chunksPerPage; 42 48 43 void* allocPage(); 44 bool isPointerInPage(void* ptr, void* page); 49 void allocPage(); 50 void addPageToPool(const Page &page); 51 bool isPointerInPage(void *ptr, const Page &page); 52 45 53 public: 46 54 MemoryPool(size_t chunkSize); 47 55 ~MemoryPool(); 48 56 49 void *malloc();50 void free(void *ptr);57 void *malloc(); 58 void free(void *ptr); 51 59 52 60 void freeUnusedPages(); 53 61 }; 54 62 63 template<size_t CHUNK_SIZE, size_t NUM_INTERNAL_CHUNKS = 32> 64 class FixedSizeMemoryPool : public MemoryPool { 65 private: 66 enum { 67 REAL_CHUNK_SIZE = (CHUNK_SIZE + sizeof(void*) - 1) & (~(sizeof(void*) - 1)) 68 }; 69 70 byte _storage[NUM_INTERNAL_CHUNKS * REAL_CHUNK_SIZE]; 71 public: 72 FixedSizeMemoryPool() : MemoryPool(CHUNK_SIZE) { 73 assert(REAL_CHUNK_SIZE == _chunkSize); 74 // Insert some static storage 75 Page internalPage = { _storage, NUM_INTERNAL_CHUNKS }; 76 addPageToPool(internalPage); 77 } 78 }; 79 80 template<size_t CHUNK_SIZE> 81 class FixedSizeMemoryPool<CHUNK_SIZE,0> : public MemoryPool { 82 public: 83 FixedSizeMemoryPool() : MemoryPool(CHUNK_SIZE) {} 84 }; 85 55 86 } // End of namespace Common 56 87 57 88 #endif -
common/hashmap.cpp
24 24 */ 25 25 26 26 // The hash map (associative array) implementation in this file is 27 // based on code by Andrew Y. Ng, 1996: 27 // based on the PyDict implementation of CPython. The erase() method 28 // is based on example code in the Wikipedia article on Hash tables. 28 29 29 /*30 * Copyright (c) 1998-2003 Massachusetts Institute of Technology.31 * This code was developed as part of the Haystack research project32 * (http://haystack.lcs.mit.edu/). Permission is hereby granted,33 * free of charge, to any person obtaining a copy of this software34 * and associated documentation files (the "Software"), to deal in35 * the Software without restriction, including without limitation36 * the rights to use, copy, modify, merge, publish, distribute,37 * sublicense, and/or sell copies of the Software, and to permit38 * persons to whom the Software is furnished to do so, subject to39 * the following conditions:40 *41 * The above copyright notice and this permission notice shall be42 * included in all copies or substantial portions of the Software.43 *44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,45 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES46 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND47 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT48 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,49 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING50 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR51 * OTHER DEALINGS IN THE SOFTWARE.52 */53 54 30 #include "common/hashmap.h" 55 31 56 32 namespace Common { 57 33 58 // const char *:34 // Hash function for strings, taken from CPython. 59 35 uint hashit(const char *p) { 60 uint hash = 0;36 uint hash = *p << 7; 61 37 byte c; 62 while ((c = *p++)) 63 hash = (hash * 31 + c); 64 return hash; 38 int size = 0; 39 while ((c = *p++)) { 40 hash = (1000003 * hash) ^ c; 41 size++; 42 } 43 return hash ^ size; 65 44 } 66 45 46 // Like hashit, but converts every char to lowercase before hashing. 67 47 uint hashit_lower(const char *p) { 68 uint hash = 0;48 uint hash = tolower(*p) << 7; 69 49 byte c; 70 while ((c = *p++)) 71 hash = (hash * 31 + tolower(c)); 72 return hash; 50 int size = 0; 51 while ((c = *p++)) { 52 hash = (1000003 * hash) ^ tolower(c); 53 size++; 54 } 55 return hash ^ size; 73 56 } 74 57 75 // The following table is taken from the GNU ISO C++ Library's hashtable.h file.76 static const uint primes[] = {77 53ul, 97ul, 193ul, 389ul, 769ul,78 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,79 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,80 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,81 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,82 1610612741ul, 3221225473ul, 4294967291ul83 };84 85 uint nextTableSize(uint x) {86 int i = 0;87 while (x >= primes[i])88 i++;89 return primes[i];90 }91 92 58 #ifdef DEBUG_HASH_COLLISIONS 93 59 static double 94 60 g_collisions = 0, … … 98 64 g_size = 0; 99 65 static int g_max_capacity = 0, g_max_size = 0; 100 66 static int g_totalHashmaps = 0; 67 static int g_stats[4] = {0,0,0,0}; 101 68 102 69 void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) { 103 70 g_collisions += collisions; … … 108 75 g_size += nele; 109 76 g_totalHashmaps++; 110 77 78 if (3*nele <= 2*8) 79 g_stats[0]++; 80 if (3*nele <= 2*16) 81 g_stats[1]++; 82 if (3*nele <= 2*32) 83 g_stats[2]++; 84 if (3*nele <= 2*64) 85 g_stats[3]++; 86 111 87 g_max_capacity = MAX(g_max_capacity, arrsize); 112 88 g_max_size = MAX(g_max_size, nele); 113 89 … … 118 94 100 * g_collPerLook / g_totalHashmaps, 119 95 g_size / g_totalHashmaps, g_max_size, 120 96 g_capacity / g_totalHashmaps, g_max_capacity); 97 fprintf(stdout, " %d less than %d; %d less than %d; %d less than %d; %d less than %d\n", 98 g_stats[0], 2*8/3, 99 g_stats[1],2*16/3, 100 g_stats[2],2*32/3, 101 g_stats[3],2*64/3); 102 103 // TODO: 104 // * Should record the maximal size of the map during its lifetime, not that at its death 105 // * Should do some statistics: how many maps are less than 2/3*8, 2/3*16, 2/3*32, ... 121 106 } 122 107 #endif 123 108 -
common/memorypool.cpp
28 28 29 29 namespace Common { 30 30 31 static const size_t CHUNK_PAGE_SIZE = 32;32 33 void* MemoryPool::allocPage() {34 void* result = ::malloc(CHUNK_PAGE_SIZE * _chunkSize);35 _pages.push_back(result);36 void* current = result;37 for (size_t i = 1; i < CHUNK_PAGE_SIZE; ++i) {38 void* next = ((char*)current + _chunkSize);39 *(void**)current = next;40 41 current = next;42 }43 *(void**)current = NULL;44 return result;45 }46 47 31 MemoryPool::MemoryPool(size_t chunkSize) { 48 32 // You must at least fit the pointer in the node (technically unneeded considering the next rounding statement) 49 33 _chunkSize = MAX(chunkSize, sizeof(void*)); … … 52 36 _chunkSize = (_chunkSize + sizeof(void*) - 1) & (~(sizeof(void*) - 1)); 53 37 54 38 _next = NULL; 39 40 _chunksPerPage = 32; 55 41 } 56 42 57 43 MemoryPool::~MemoryPool() { 58 for (size_t i = 0; i <_pages.size(); ++i)59 ::free(_pages[i] );44 for (size_t i = 0; i < _pages.size(); ++i) 45 ::free(_pages[i].start); 60 46 } 61 47 62 void* MemoryPool::malloc() { 63 #if 1 64 if (!_next) 65 _next = allocPage(); 48 void MemoryPool::allocPage() { 49 Page page; 50 51 // Allocate a new page 52 page.numChunks = _chunksPerPage; 53 page.start = ::malloc(page.numChunks * _chunkSize); 54 assert(page.start); 55 _pages.push_back(page); 56 57 // Next time, we'll alocate a page twice as big as this one. 58 _chunksPerPage *= 2; 59 60 // Add the page to the pool of free chunk 61 addPageToPool(page); 62 } 66 63 67 void* result = _next; 64 void MemoryPool::addPageToPool(const Page &page) { 65 66 // Add all chunks of the new page to the linked list (pool) of free chunks 67 void *current = page.start; 68 for (size_t i = 1; i < page.numChunks; ++i) { 69 void *next = ((char*)current + _chunkSize); 70 *(void **)current = next; 71 72 current = next; 73 } 74 75 // Last chunk points to the old _next 76 *(void**)current = _next; 77 78 // From now on, the first free chunk is the first chunk of the new page 79 _next = page.start; 80 } 81 82 void *MemoryPool::malloc() { 83 if (!_next) // No free chunks left? Allocate a new page 84 allocPage(); 85 86 assert(_next); 87 void *result = _next; 68 88 _next = *(void**)result; 69 89 return result; 70 #else71 return ::malloc(_chunkSize);72 #endif73 90 } 74 91 75 92 void MemoryPool::free(void* ptr) { 76 #if 1 93 // Add the chunk back to (the start of) the list of free chunks 77 94 *(void**)ptr = _next; 78 95 _next = ptr; 79 #else80 ::free(ptr);81 #endif82 96 } 83 97 84 98 // Technically not compliant C++ to compare unrelated pointers. In practice... 85 bool MemoryPool::isPointerInPage(void * ptr, void*page) {86 return (ptr >= page ) && (ptr < (char*)page + CHUNK_PAGE_SIZE* _chunkSize);99 bool MemoryPool::isPointerInPage(void *ptr, const Page &page) { 100 return (ptr >= page.start) && (ptr < (char*)page.start + page.numChunks * _chunkSize); 87 101 } 88 102 89 103 void MemoryPool::freeUnusedPages() { … … 94 108 numberOfFreeChunksPerPage[i] = 0; 95 109 } 96 110 97 void* iterator = _next; 111 // Compute for each page how many chunks in it are still in use. 112 void *iterator = _next; 98 113 while (iterator) { 99 // T his should be a binary search114 // TODO: This should be a binary search (requiring us to keep _pages sorted) 100 115 for (size_t i = 0; i < _pages.size(); ++i) { 101 116 if (isPointerInPage(iterator, _pages[i])) { 102 117 ++numberOfFreeChunksPerPage[i]; … … 106 121 iterator = *(void**)iterator; 107 122 } 108 123 124 // Free all pages which are not in use. 125 // TODO: Might want to reset _chunksPerPage here (e.g. to the largest 126 // _pages[i].numChunks value still in use). 109 127 size_t freedPagesCount = 0; 110 for (size_t i = 0; i < _pages.size(); ++i) { 111 if (numberOfFreeChunksPerPage[i] == CHUNK_PAGE_SIZE) { 112 ::free(_pages[i]); 113 _pages[i] = NULL; // TODO : Remove NULL values 128 for (size_t i = 0; i < _pages.size(); ++i) { 129 if (numberOfFreeChunksPerPage[i] == _pages[i].numChunks) { 130 // Remove all chunks of this page from the list of free chunks 131 void **iter2 = &_next; 132 while (*iter2) { 133 if (isPointerInPage(*iter2, _pages[i])) 134 *iter2 = **(void***)iter2; 135 else 136 iter2 = *(void***)iter2; 137 } 138 ::free(_pages[i].start); 114 139 ++freedPagesCount; 140 _pages[i].start = NULL; 115 141 } 116 142 } 117 143 144 for (size_t i = 0; i < _pages.size(); ) { 145 if (_pages[i].start == NULL) { 146 _pages.remove_at(i); 147 // We just removed an entry, so we do not advance "i" 148 } else { 149 ++i; 150 } 151 } 152 118 153 //printf("%d freed pages\n", freedPagesCount); 119 154 } 120 155 -
common/hashmap.h
24 24 */ 25 25 26 26 // The hash map (associative array) implementation in this file is 27 // based on code by Andrew Y. Ng, 1996: 27 // based on the PyDict implementation of CPython. The erase() method 28 // is based on example code in the Wikipedia article on Hash tables. 28 29 29 /*30 * Copyright (c) 1998-2003 Massachusetts Institute of Technology.31 * This code was developed as part of the Haystack research project32 * (http://haystack.lcs.mit.edu/). Permission is hereby granted,33 * free of charge, to any person obtaining a copy of this software34 * and associated documentation files (the "Software"), to deal in35 * the Software without restriction, including without limitation36 * the rights to use, copy, modify, merge, publish, distribute,37 * sublicense, and/or sell copies of the Software, and to permit38 * persons to whom the Software is furnished to do so, subject to39 * the following conditions:40 *41 * The above copyright notice and this permission notice shall be42 * included in all copies or substantial portions of the Software.43 *44 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,45 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES46 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND47 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT48 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,49 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING50 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR51 * OTHER DEALINGS IN THE SOFTWARE.52 */53 54 30 #ifndef COMMON_HASHMAP_H 55 31 #define COMMON_HASHMAP_H 56 32 … … 74 50 75 51 namespace Common { 76 52 77 // The table sizes ideally are primes. We use a helper function to find78 // suitable table sizes.79 uint nextTableSize(uint x);80 81 82 53 // Enable the following #define if you want to check how many collisions the 83 54 // code produces (many collisions indicate either a bad hash function, or a 84 55 // hash table that is too small). … … 113 84 Node(const Key &key) : _key(key), _value() {} 114 85 }; 115 86 87 enum { 88 HASHMAP_PERTURB_SHIFT = 5, 89 HASHMAP_MIN_CAPACITY = 16, 90 91 // The quotient of the next two constants controls how much the 92 // internal storage of the hashmap may fill up before being 93 // increased automatically. 94 // Note: the quotient of these two must be between and different 95 // from 0 and 1. 96 HASHMAP_LOADFACTOR_NUMERATOR = 2, 97 HASHMAP_LOADFACTOR_DENOMINATOR = 3, 98 99 HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR 100 }; 116 101 102 117 103 #ifdef USE_HASHMAP_MEMORY_POOL 118 MemoryPool_nodePool;104 FixedSizeMemoryPool<sizeof(Node), HASHMAP_MEMORYPOOL_SIZE> _nodePool; 119 105 120 106 Node *allocNode(const Key &key) { 121 107 void* mem = _nodePool.malloc(); … … 137 123 #endif 138 124 139 125 Node **_storage; // hashtable of size arrsize. 140 uint _ capacity;126 uint _mask; /**< Capacity of the HashMap minus one; must be a power of two of minus one */ 141 127 uint _size; 142 128 143 129 HashFunc _hash; … … 153 139 void assign(const HM_t &map); 154 140 int lookup(const Key &key) const; 155 141 int lookupAndCreateIfMissing(const Key &key); 156 void expand _array(uint newsize);142 void expandStorage(uint newCapacity); 157 143 158 144 template<class T> friend class IteratorImpl; 159 145 … … 175 161 176 162 NodeType *deref() const { 177 163 assert(_hashmap != 0); 178 assert(_idx < _hashmap->_capacity);164 assert(_idx <= _hashmap->_mask); 179 165 Node *node = _hashmap->_storage[_idx]; 180 166 assert(node != 0); 181 167 return node; … … 196 182 assert(_hashmap); 197 183 do { 198 184 _idx++; 199 } while (_idx < _hashmap->_capacity&& _hashmap->_storage[_idx] == 0);200 if (_idx > = _hashmap->_capacity)185 } while (_idx <= _hashmap->_mask && _hashmap->_storage[_idx] == 0); 186 if (_idx > _hashmap->_mask) 201 187 _idx = (uint)-1; 202 188 203 189 return *this; … … 247 233 248 234 iterator begin() { 249 235 // Find and return the _key non-empty entry 250 for (uint ctr = 0; ctr < _capacity; ++ctr) {236 for (uint ctr = 0; ctr <= _mask; ++ctr) { 251 237 if (_storage[ctr]) 252 238 return iterator(ctr, this); 253 239 } … … 259 245 260 246 const_iterator begin() const { 261 247 // Find and return the first non-empty entry 262 for (uint ctr = 0; ctr < _capacity; ++ctr) {248 for (uint ctr = 0; ctr <= _mask; ++ctr) { 263 249 if (_storage[ctr]) 264 250 return const_iterator(ctr, this); 265 251 } … … 298 284 */ 299 285 template<class Key, class Val, class HashFunc, class EqualFunc> 300 286 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() : 301 #ifdef USE_HASHMAP_MEMORY_POOL302 _nodePool(sizeof(Node)),303 #endif304 287 _defaultVal() { 305 _ capacity = nextTableSize(0);306 _storage = new Node *[ _capacity];288 _mask = HASHMAP_MIN_CAPACITY - 1; 289 _storage = new Node *[HASHMAP_MIN_CAPACITY]; 307 290 assert(_storage != NULL); 308 memset(_storage, 0, _capacity* sizeof(Node *));291 memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); 309 292 310 293 _size = 0; 311 294 … … 322 305 */ 323 306 template<class Key, class Val, class HashFunc, class EqualFunc> 324 307 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) : 325 #ifdef USE_HASHMAP_MEMORY_POOL326 _nodePool(sizeof(Node)),327 #endif328 308 _defaultVal() { 329 309 assign(map); 330 310 } … … 334 314 */ 335 315 template<class Key, class Val, class HashFunc, class EqualFunc> 336 316 HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() { 337 for (uint ctr = 0; ctr < _capacity; ++ctr)317 for (uint ctr = 0; ctr <= _mask; ++ctr) 338 318 if (_storage[ctr] != NULL) 339 319 freeNode(_storage[ctr]); 340 320 341 321 delete[] _storage; 342 322 #ifdef DEBUG_HASH_COLLISIONS 343 323 extern void updateHashCollisionStats(int, int, int, int); 344 updateHashCollisionStats(_collisions, _lookups, _ capacity, _size);324 updateHashCollisionStats(_collisions, _lookups, _mask+1, _size); 345 325 #endif 346 326 } 347 327 … … 354 334 */ 355 335 template<class Key, class Val, class HashFunc, class EqualFunc> 356 336 void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) { 357 _ capacity = map._capacity;358 _storage = new Node *[_ capacity];337 _mask = map._mask; 338 _storage = new Node *[_mask+1]; 359 339 assert(_storage != NULL); 360 memset(_storage, 0, _capacity* sizeof(Node *));340 memset(_storage, 0, (_mask+1) * sizeof(Node *)); 361 341 362 342 // Simply clone the map given to us, one by one. 363 343 _size = 0; 364 for (uint ctr = 0; ctr < _capacity; ++ctr) {344 for (uint ctr = 0; ctr <= _mask; ++ctr) { 365 345 if (map._storage[ctr] != NULL) { 366 346 _storage[ctr] = allocNode(map._storage[ctr]->_key); 367 347 _storage[ctr]->_value = map._storage[ctr]->_value; … … 375 355 376 356 template<class Key, class Val, class HashFunc, class EqualFunc> 377 357 void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) { 378 for (uint ctr = 0; ctr < _capacity; ++ctr) {358 for (uint ctr = 0; ctr <= _mask; ++ctr) { 379 359 if (_storage[ctr] != NULL) { 380 360 freeNode(_storage[ctr]); 381 361 _storage[ctr] = NULL; 382 362 } 383 363 } 384 364 385 if (shrinkArray && _capacity > nextTableSize(0)) { 365 #ifdef USE_HASHMAP_MEMORY_POOL 366 _nodePool.freeUnusedPages(); 367 #endif 368 369 if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) { 386 370 delete[] _storage; 387 371 388 _ capacity = nextTableSize(0);389 _storage = new Node *[ _capacity];372 _mask = HASHMAP_MIN_CAPACITY; 373 _storage = new Node *[HASHMAP_MIN_CAPACITY]; 390 374 assert(_storage != NULL); 391 memset(_storage, 0, _capacity* sizeof(Node *));375 memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); 392 376 } 393 377 394 378 _size = 0; 395 379 } 396 380 397 381 template<class Key, class Val, class HashFunc, class EqualFunc> 398 void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) { 399 assert(newsize > _capacity); 400 uint ctr, dex; 382 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) { 383 assert(newCapacity > _mask+1); 401 384 402 385 const uint old_size = _size; 403 const uint old_ capacity = _capacity;386 const uint old_mask = _mask; 404 387 Node **old_storage = _storage; 405 388 406 389 // allocate a new array 407 390 _size = 0; 408 _ capacity = newsize;409 _storage = new Node *[ _capacity];391 _mask = newCapacity - 1; 392 _storage = new Node *[newCapacity]; 410 393 assert(_storage != NULL); 411 memset(_storage, 0, _capacity * sizeof(Node *));394 memset(_storage, 0, newCapacity * sizeof(Node *)); 412 395 413 396 // rehash all the old elements 414 for ( ctr = 0; ctr < old_capacity; ++ctr) {397 for (uint ctr = 0; ctr <= old_mask; ++ctr) { 415 398 if (old_storage[ctr] == NULL) 416 399 continue; 417 400 … … 419 402 // Since we know that no key exists twice in the old table, we 420 403 // can do this slightly better than by calling lookup, since we 421 404 // don't have to call _equal(). 422 dex = _hash(old_storage[ctr]->_key) % _capacity; 423 while (_storage[dex] != NULL) { 424 dex = (dex + 1) % _capacity; 405 const uint hash = _hash(old_storage[ctr]->_key); 406 uint idx = hash & _mask; 407 for (uint perturb = hash; _storage[idx] != NULL; perturb >>= HASHMAP_PERTURB_SHIFT) { 408 idx = (5 * idx + perturb + 1) & _mask; 425 409 } 426 410 427 _storage[ dex] = old_storage[ctr];411 _storage[idx] = old_storage[ctr]; 428 412 _size++; 429 413 } 430 414 … … 439 423 440 424 template<class Key, class Val, class HashFunc, class EqualFunc> 441 425 int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { 442 uint ctr = _hash(key) % _capacity; 426 const uint hash = _hash(key); 427 uint ctr = hash & _mask; 428 for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { 429 if (_storage[ctr] == NULL || _equal(_storage[ctr]->_key, key)) 430 break; 443 431 444 while (_storage[ctr] != NULL && !_equal(_storage[ctr]->_key, key)) { 445 ctr = (ctr + 1) % _capacity; 432 ctr = (5 * ctr + perturb + 1) & _mask; 446 433 447 434 #ifdef DEBUG_HASH_COLLISIONS 448 435 _collisions++; … … 453 440 _lookups++; 454 441 fprintf(stderr, "collisions %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n", 455 442 _collisions, _lookups, ((double) _collisions / (double)_lookups), 456 (const void *)this, _ capacity, _size);443 (const void *)this, _mask+1, _size); 457 444 #endif 458 445 459 446 return ctr; … … 467 454 _storage[ctr] = allocNode(key); 468 455 _size++; 469 456 470 // Keep the load factor below 75%. 471 if (_size > _capacity * 75 / 100) { 472 expand_array(nextTableSize(_capacity)); 457 // Keep the load factor below a certain threshold. 458 uint capacity = _mask + 1; 459 if (_size * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) { 460 capacity = capacity < 500 ? (capacity * 4) : (capacity * 2); 461 expandStorage(capacity); 473 462 ctr = lookup(key); 474 463 } 475 464 } … … 520 509 template<class Key, class Val, class HashFunc, class EqualFunc> 521 510 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) { 522 511 // This is based on code in the Wikipedia article on Hash tables. 523 uint i = lookup(key); 512 513 const uint hash = _hash(key); 514 uint i = hash & _mask; 515 uint perturb; 516 517 for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { 518 if (_storage[i] == NULL || _equal(_storage[i]->_key, key)) 519 break; 520 521 i = (5 * i + perturb + 1) & _mask; 522 } 523 524 524 if (_storage[i] == NULL) 525 525 return; // key wasn't present, so no work has to be done 526 526 527 // If we remove a key, we must check all subsequent keys and possibly 527 528 // reinsert them. 528 529 uint j = i; 529 530 freeNode(_storage[i]); 530 531 _storage[i] = NULL; 531 while (true) {532 for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { 532 533 // Look at the next table slot 533 j = ( j + 1) % _capacity;534 j = (5 * j + perturb + 1) & _mask; 534 535 // If the next slot is empty, we are done 535 536 if (_storage[j] == NULL) 536 537 break; 537 538 // Compute the slot where the content of the next slot should normally be, 538 539 // assuming an empty table, and check whether we have to move it. 539 uint k = _hash(_storage[j]->_key) % _capacity;540 uint k = _hash(_storage[j]->_key) & _mask; 540 541 if ((j > i && (k <= i || k > j)) || 541 542 (j < i && (k <= i && k > j)) ) { 542 543 _storage[i] = _storage[j];