/* Copyright (c) 2022 Alex Diener This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Alex Diener alex@ludobloom.com */ #include "utilities/HashTable.h" #include "utilities/IOUtilities.h" #include "utilities/lookup3.h" #include "utilities/StrideArray.h" #include #include struct HashTable_bucketEntry { HashTable_key key; char data[1]; }; #define ALIGNMENT_SIZE 8 #define alignedEntryStride(entryDataSize) ((entryDataSize + sizeof(struct HashTable_bucketEntry) - sizeof(char) + ALIGNMENT_SIZE - 1) & ~(ALIGNMENT_SIZE - 1)) #define bucketEntryAtIndex(bucketEntries, entryStride, entryIndex) (bucketEntries + (entryIndex) * (entryStride)); HashTable * HashTable_create(size_t entryDataSize) { HashTable * hashTable = malloc(sizeof(*hashTable)); hashTable->entryDataSize = entryDataSize; hashTable->densityMax = HASH_TABLE_DENSITY_DEFAULT; hashTable->count = 0; hashTable->bucketCount = 8; hashTable->buckets = calloc(hashTable->bucketCount, sizeof(*hashTable->buckets)); return hashTable; } HashTable * HashTable_copy(HashTable * hashTable) { HashTable * copy = HashTable_create(hashTable->entryDataSize); copy->densityMax = hashTable->densityMax; size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); HashTable_set(copy, entry->key, entry->data); } } return copy; } static void disposeHashTableKey(HashTable_key key) { switch (key.type) { case HASH_KEY_TYPE_STRING: case HASH_KEY_TYPE_BYTES: free((void *) key.data.bytes.data); break; case HASH_KEY_TYPE_OBJECT: call_virtual(dispose, key.data.object); break; case HASH_KEY_TYPE_UINT32: case HASH_KEY_TYPE_UINT32_PAIR: case HASH_KEY_TYPE_POINTER: break; } } void HashTable_dispose(HashTable * hashTable) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); disposeHashTableKey(entry->key); } free(hashTable->buckets[bucketIndex].entries); } free(hashTable->buckets); free(hashTable); } static size_t hashKeyToBucketIndex(HashTable_key key) { switch (key.type) { case HASH_KEY_TYPE_STRING: return hashlittle(key.data.string.characters, key.data.string.length, 0); case HASH_KEY_TYPE_BYTES: return hashlittle(key.data.bytes.data, key.data.bytes.length, 0); case HASH_KEY_TYPE_UINT32: return hashword(&key.data.uint32, 1, 0); case HASH_KEY_TYPE_UINT32_PAIR: return hashword(&key.data.uint32Pair.second, 1, hashword(&key.data.uint32Pair.first, 1, 0)); case HASH_KEY_TYPE_POINTER: return hashword((const uint32_t *) &key.data.pointer, sizeof(key.data.pointer) / 4, 0); case HASH_KEY_TYPE_OBJECT: return call_virtual(hash, key.data.object); } return 0; } static struct HashTable_bucketEntry * getBucketEntryMatchingKey(HashTable * hashTable, size_t bucketIndex, HashTable_key key) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); size_t entryCount = hashTable->buckets[bucketIndex].count; for (size_t entryIndex = 0; entryIndex < entryCount; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); if (entry->key.type == key.type) { switch (key.type) { case HASH_KEY_TYPE_STRING: case HASH_KEY_TYPE_BYTES: if (entry->key.data.bytes.length == key.data.bytes.length && !memcmp(entry->key.data.bytes.data, key.data.bytes.data, key.data.bytes.length)) { return entry; } break; case HASH_KEY_TYPE_UINT32: if (entry->key.data.uint32 == key.data.uint32) { return entry; } break; case HASH_KEY_TYPE_UINT32_PAIR: if (entry->key.data.uint32Pair.first == key.data.uint32Pair.first && entry->key.data.uint32Pair.second == key.data.uint32Pair.second) { return entry; } break; case HASH_KEY_TYPE_POINTER: if (entry->key.data.pointer == key.data.pointer) { return entry; } break; case HASH_KEY_TYPE_OBJECT: if (call_virtual(isEqual, entry->key.data.object, key.data.object)) { return entry; } break; } } } return NULL; } void * HashTable_get(HashTable * hashTable, HashTable_key key) { size_t bucketIndex = hashKeyToBucketIndex(key) % hashTable->bucketCount; struct HashTable_bucketEntry * entry = getBucketEntryMatchingKey(hashTable, bucketIndex, key); if (entry == NULL) { return NULL; } return entry->data; } void * HashTable_set(HashTable * hashTable, HashTable_key key, void * entryData) { size_t bucketIndexRaw = hashKeyToBucketIndex(key); size_t bucketIndex = bucketIndexRaw % hashTable->bucketCount; struct HashTable_bucketEntry * entry = getBucketEntryMatchingKey(hashTable, bucketIndex, key); if (entry == NULL) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); if (hashTable->count / hashTable->bucketCount >= hashTable->densityMax) { size_t newBucketCount = hashTable->bucketCount * 2; struct HashTable_bucket * newBuckets = calloc(newBucketCount, sizeof(*newBuckets)); for (size_t oldBucketIndex = 0; oldBucketIndex < hashTable->bucketCount; oldBucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[oldBucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * oldEntry = bucketEntryAtIndex(hashTable->buckets[oldBucketIndex].entries, entryStride, entryIndex); size_t newBucketIndex = hashKeyToBucketIndex(oldEntry->key) % newBucketCount; if (newBuckets[newBucketIndex].allocatedCount < newBuckets[newBucketIndex].count + 1) { newBuckets[newBucketIndex].allocatedCount = newBuckets[newBucketIndex].allocatedCount * 2 + !newBuckets[newBucketIndex].allocatedCount; newBuckets[newBucketIndex].entries = realloc(newBuckets[newBucketIndex].entries, newBuckets[newBucketIndex].allocatedCount * entryStride); } struct HashTable_bucketEntry * newEntry = bucketEntryAtIndex(newBuckets[newBucketIndex].entries, entryStride, newBuckets[newBucketIndex].count); memcpy(newEntry, oldEntry, entryStride); newBuckets[newBucketIndex].count += 1; } } free(hashTable->buckets); hashTable->buckets = newBuckets; hashTable->bucketCount = newBucketCount; bucketIndex = bucketIndexRaw % newBucketCount; } if (hashTable->buckets[bucketIndex].allocatedCount < hashTable->buckets[bucketIndex].count + 1) { hashTable->buckets[bucketIndex].allocatedCount = hashTable->buckets[bucketIndex].allocatedCount * 2 + !hashTable->buckets[bucketIndex].allocatedCount; hashTable->buckets[bucketIndex].entries = realloc(hashTable->buckets[bucketIndex].entries, hashTable->buckets[bucketIndex].allocatedCount * entryStride); } entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, hashTable->buckets[bucketIndex].count); hashTable->buckets[bucketIndex].count++; entry->key = key; switch (key.type) { case HASH_KEY_TYPE_STRING: if (key.data.string.characters != NULL) { entry->key.data.string.characters = malloc(key.data.string.length + 1); memcpy((char *) entry->key.data.string.characters, key.data.string.characters, key.data.string.length); ((char *) entry->key.data.string.characters)[key.data.string.length] = 0; } break; case HASH_KEY_TYPE_BYTES: entry->key.data.bytes.data = memdup(key.data.bytes.data, key.data.bytes.length); break; case HASH_KEY_TYPE_OBJECT: entry->key.data.object = call_virtual(copy, key.data.object); break; case HASH_KEY_TYPE_UINT32: case HASH_KEY_TYPE_UINT32_PAIR: case HASH_KEY_TYPE_POINTER: break; } hashTable->count++; } if (entryData != NULL) { memcpy(entry->data, entryData, hashTable->entryDataSize); } return entry->data; } static void deleteEntryInternal(HashTable * hashTable, struct HashTable_bucketEntry * entry, size_t bucketIndex) { disposeHashTableKey(entry->key); hashTable->buckets[bucketIndex].count--; hashTable->count--; size_t entryStride = alignedEntryStride(hashTable->entryDataSize); size_t entryIndex = ((void *) entry - hashTable->buckets[bucketIndex].entries) / entryStride; if (hashTable->buckets[bucketIndex].count > entryIndex) { memmove(hashTable->buckets[bucketIndex].entries + entryIndex * entryStride, hashTable->buckets[bucketIndex].entries + (entryIndex + 1) * entryStride, (hashTable->buckets[bucketIndex].count - entryIndex) * entryStride); } } bool HashTable_delete(HashTable * hashTable, HashTable_key key) { size_t bucketIndex = hashKeyToBucketIndex(key) % hashTable->bucketCount; struct HashTable_bucketEntry * entry = getBucketEntryMatchingKey(hashTable, bucketIndex, key); if (entry == NULL) { return false; } deleteEntryInternal(hashTable, entry, bucketIndex); return true; } void HashTable_deleteAll(HashTable * hashTable) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (unsigned int bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (unsigned int entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); disposeHashTableKey(entry->key); } hashTable->buckets[bucketIndex].count = 0; } hashTable->count = 0; } HashTable_key * HashTable_listKeys(HashTable * hashTable, size_t * outCount) { HashTable_key * keys = malloc(hashTable->count * sizeof(*keys)); size_t keyCount = 0; size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); keys[keyCount++] = entry->key; } } if (outCount != NULL) { *outCount = keyCount; } return keys; } HashTable_key * HashTable_keyAtIndex(HashTable * hashTable, size_t index) { size_t keyCount = 0; for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { keyCount += hashTable->buckets[bucketIndex].count; if (keyCount > index) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, index - (keyCount - hashTable->buckets[bucketIndex].count)); return &entry->key; } } return NULL; } bool HashTable_hasNonStringKeys(HashTable * hashTable) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); if (entry->key.type != HASH_KEY_TYPE_STRING) { return true; } } } return false; } void HashTable_merge(HashTable * toHashTable, HashTable * fromHashTable) { assert(toHashTable->entryDataSize == fromHashTable->entryDataSize); size_t entryStride = alignedEntryStride(toHashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < fromHashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < fromHashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(fromHashTable->buckets[bucketIndex].entries, entryStride, entryIndex); HashTable_set(toHashTable, entry->key, entry->data); } } } bool HashTable_foreach(HashTable * hashTable, HashTable_foreachCallback callback, void * callbackContext) { size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); if (!callback(hashTable, entry->key, entry->data, callbackContext)) { return false; } } } return true; } bool HashTable_deleteMatching(HashTable * hashTable, HashTable_foreachCallback callback, void * callbackContext) { bool anyEntriesDeleted = false; size_t entryStride = alignedEntryStride(hashTable->entryDataSize); for (size_t bucketIndex = 0; bucketIndex < hashTable->bucketCount; bucketIndex++) { for (size_t entryIndex = 0; entryIndex < hashTable->buckets[bucketIndex].count; entryIndex++) { struct HashTable_bucketEntry * entry = bucketEntryAtIndex(hashTable->buckets[bucketIndex].entries, entryStride, entryIndex); if (callback(hashTable, entry->key, entry->data, callbackContext)) { deleteEntryInternal(hashTable, entry, bucketIndex); entryIndex--; anyEntriesDeleted = true; } } } return anyEntriesDeleted; }