From: bernie Date: Wed, 13 Aug 2008 10:07:24 +0000 (+0000) Subject: hashtable: Move from mware/ to struct/ X-Git-Tag: 2.0.0~305 X-Git-Url: https://codewiz.org/gitweb?a=commitdiff_plain;h=56bc36208843a05a8879f31a1b1a6bcb0f1de176;p=bertos.git hashtable: Move from mware/ to struct/ git-svn-id: https://src.develer.com/svnoss/bertos/trunk@1629 38d2e660-2303-0410-9eaa-f027e97ec537 --- diff --git a/bertos/mware/hashtable.c b/bertos/mware/hashtable.c deleted file mode 100644 index d3797339..00000000 --- a/bertos/mware/hashtable.c +++ /dev/null @@ -1,391 +0,0 @@ -/** - * \file - * - * - * \brief Portable hash table implementation - * - * Some rationales of our choices in implementation: - * - * \li For embedded systems, it is vital to allocate the table in static memory. To do - * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file. - * Nevertheless, they should be used as opaque types (that is, the users should not - * access the structure fields directly). - * - * \li To statically allocate the structures, a macro is provided. With this macro, we - * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without - * the macro, the user would have had to define both the \c HashNode and the \c HashTable - * manually, and pass both of them to \c ht_init() (which would have created the link between - * the two). Instead, the link is created with a literal initialization. - * - * \li The hash table is created as power of two to remove the divisions from the code. - * Of course, hash functions work at their best when the table size is a prime number. - * When calculating the modulus to convert the hash value to an index, the actual operation - * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher - * bits are first "merged" with the lower bits through some XOR operations (see the last line of - * \c calc_hash()). - * - * \li To minimize the memory occupation, there is no flag to set for the empty node. An - * empty node is recognized by its data pointer set to NULL. It is then invalid to store - * NULL as data pointer in the table. - * - * \li The visiting interface through iterators is implemented with pass-by-value semantic. - * While this is overkill for medium-to-stupid compilers, it is the best designed from an - * user point of view. Moreover, being totally inlined (defined completely in the header), - * even a stupid compiler should be able to perform basic optimizations on it. - * We thought about using a pass-by-pointer semantic but it was much more awful to use, and - * the compiler is then forced to spill everything to the stack (unless it is *very* smart). - * - * \li The current implementation allows to either store the key internally (that is, copy - * the key within the hash table) or keep it external (that is, a hook is used to extract - * the key from the data in the node). The former is more memory-hungry of course, as it - * allocated static space to store the key copies. The overhead to keep both methods at - * the same time is minimal: - * - * - * \version $Id$ - * - * \author Giovanni Bajo - */ - -/*#* - *#* $Log$ - *#* Revision 1.8 2007/02/06 16:05:01 asterix - *#* Replaced ROTATE_* with ROT* defined in macros.h - *#* - *#* Revision 1.7 2006/07/19 12:56:27 bernie - *#* Convert to new Doxygen style. - *#* - *#* Revision 1.6 2006/06/01 12:27:39 marco - *#* Added utilities for protocols - *#* - *#*/ - -#include "hashtable.h" -#include -#include -#include //ROTL(), ROTR(); - -#include - - - -typedef const void** HashNodePtr; -#define NODE_EMPTY(node) (!*(node)) -#define HT_HAS_INTERNAL_KEY(ht) (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal) - -/** For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */ -INLINE uint8_t *key_internal_get_ptr(struct HashTable *ht, HashNodePtr node) -{ - uint8_t* key_buf = ht->key_data.mem; - size_t index; - - // Compute the index of the node and use it to move within the whole key buffer - index = node - &ht->mem[0]; - ASSERT(index < (size_t)(1 << ht->max_elts_log2)); - key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1); - - return key_buf; -} - - -INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length) -{ - if (HT_HAS_INTERNAL_KEY(ht)) - { - uint8_t* k = key_internal_get_ptr(ht, node); - - // Key has its length stored in the first byte - *key_length = *k++; - *key = k; - } - else - *key = ht->key_data.hook(*node, key_length); -} - - -INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length) -{ - const void* key2; - uint8_t key2_length; - - node_get_key(ht, node, &key2, &key2_length); - - return (key_length == key2_length && memcmp(key, key2, key_length) == 0); -} - - -static uint16_t calc_hash(const void* _key, uint8_t key_length) -{ - const char* key = (const char*)_key; - uint16_t hash = key_length; - int i; - int len = (int)key_length; - - for (i = 0; i < len; ++i) - hash = ROTL(hash, 4) ^ key[i]; - - return hash ^ (hash >> 6) ^ (hash >> 13); -} - - -static HashNodePtr perform_lookup(struct HashTable* ht, - const void* key, uint8_t key_length) -{ - uint16_t hash = calc_hash(key, key_length); - uint16_t mask = ((1 << ht->max_elts_log2) - 1); - uint16_t index = hash & mask; - uint16_t first_index = index; - uint16_t step; - HashNodePtr node; - - // Fast-path optimization: we check immediately if the current node - // is the one we were looking for, so we save the computation of the - // increment step in the common case. - node = &ht->mem[index]; - if (NODE_EMPTY(node) - || node_key_match(ht, node, key, key_length)) - return node; - - // Increment while going through the hash table in case of collision. - // This implements the double-hash technique: we use the higher part - // of the hash as a step increment instead of just going to the next - // element, to minimize the collisions. - // Notice that the number must be odd to be sure that the whole table - // is traversed. Actually MCD(table_size, step) must be 1, but - // table_size is always a power of 2, so we just ensure that step is - // never a multiple of 2. - step = (ROTR(hash, ht->max_elts_log2) & mask) | 1; - - do - { - index += step; - index &= mask; - - node = &ht->mem[index]; - if (NODE_EMPTY(node) - || node_key_match(ht, node, key, key_length)) - return node; - - // The check is done after the key compare. This actually causes - // one more compare in the case the table is full (since the first - // element was compared at the very start, and then at the end), - // but it makes faster the common path where we enter this loop - // for the first time, and index will not match first_index for - // sure. - } while (index != first_index); - - return NULL; -} - - -void ht_init(struct HashTable* ht) -{ - memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2)); -} - - -static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) -{ - HashNodePtr node; - - if (!data) - return false; - - if (HT_HAS_INTERNAL_KEY(ht)) - key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH); - - node = perform_lookup(ht, key, key_length); - if (!node) - return false; - - if (HT_HAS_INTERNAL_KEY(ht)) - { - uint8_t* k = key_internal_get_ptr(ht, node); - *k++ = key_length; - memcpy(k, key, key_length); - } - - *node = data; - return true; -} - - -bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) -{ -#ifdef _DEBUG - if (!HT_HAS_INTERNAL_KEY(ht)) - { - // Construct a fake node and use it to match the key - HashNodePtr node = &data; - if (!node_key_match(ht, node, key, key_length)) - { - ASSERT2(0, "parameter key is different from the external key"); - return false; - } - } -#endif - - return insert(ht, key, key_length, data); -} - - -bool ht_insert(struct HashTable* ht, const void* data) -{ - const void* key; - uint8_t key_length; - -#ifdef _DEBUG - if (HT_HAS_INTERNAL_KEY(ht)) - { - ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()" - && 0); - return false; - } -#endif - - key = ht->key_data.hook(data, &key_length); - - return insert(ht, key, key_length, data); -} - - -const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length) -{ - HashNodePtr node; - - if (HT_HAS_INTERNAL_KEY(ht)) - key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH); - - node = perform_lookup(ht, key, key_length); - - if (!node || NODE_EMPTY(node)) - return NULL; - - return *node; -} - - -#if 0 - -#include - -bool ht_test(void); - -static const void* test_get_key(const void* ptr, uint8_t* length) -{ - const char* s = ptr; - *length = strlen(s); - return s; -} - -#define NUM_ELEMENTS 256 -DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key); -DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256); - -static char data[NUM_ELEMENTS][10]; -static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - -static bool single_test(void) -{ - int i; - - ht_init(&test1); - ht_init(&test2); - - for (i=0;i - * - * \brief Portable hash table - * - * This file implements a portable hash table, with the following features: - * - * \li Open double-hashing. The maximum number of elements is fixed. The double hashing - * function improves recovery in case of collisions. - * \li Configurable size (which is clamped to a power of two) - * \li Visiting interface through iterator (returns the element in random order). - * \li The key is stored within the data and a hook is used to extract it. Optionally, it - * is possible to store a copy of the key within the hash table. - * - * Since the hashing is open, there is no way to remove elements from the table. Instead, a - * function is provided to clear the table completely. - * - * The data stored within the table must be a pointer. The NULL pointer is used as - * a marker for a free node, so it is invalid to store a NULL pointer in the table - * with \c ht_insert(). - * - * \version $Id$ - * - * \author Giovanni Bajo - */ - -/*#* - *#* $Log$ - *#* Revision 1.8 2006/07/19 12:56:27 bernie - *#* Convert to new Doxygen style. - *#* - *#* Revision 1.7 2006/06/01 12:27:39 marco - *#* Added utilities for protocols - *#* - *#*/ - -#ifndef MWARE_HASHTABLE_H -#define MWARE_HASHTABLE_H - -#include -#include -#include - -/** - * Enable/disable support to declare special hash tables which maintain a copy of - * the key internally instead of relying on the hook to extract it from the data. - */ -#define CONFIG_HT_OPTIONAL_INTERNAL_KEY 1 - -/// Maximum length of the internal key (use (2^n)-1 for slight speedup) -#define INTERNAL_KEY_MAX_LENGTH 15 - -/** - * Hook to get the key from \a data, which is an element of the hash table. The - * key must be returned together with \a key_length (in words). - */ -typedef const void *(*hook_get_key)(const void *data, uint8_t *key_length); - - -/** - * Hash table description - * - * \note This structures MUST NOT be accessed directly. Its definition is - * provided in the header file only for optimization purposes (see the rationale - * in hashtable.c). - * - * \note If new elements must be added to this list, please double check - * \c DECLARE_HASHTABLE, which requires the existing elements to be at the top. - */ -struct HashTable -{ - const void **mem; ///< Buckets of data - uint16_t max_elts_log2; ///< Log2 of the size of the table - struct { - bool key_internal : 1; ///< true if the key is copied internally - } flags; - union { - hook_get_key hook; ///< Hook to get the key - uint8_t *mem; ///< Pointer to the key memory - } key_data; -}; - - -/// Iterator to walk the hash table -typedef struct -{ - const void** pos; - const void** end; -} HashIterator; - - -/** - * Declare a hash table in the current scope - * - * \param name Variable name - * \param size Number of elements - * \param hook_gk Hook to be used to extract the key from the node - * - * \note The number of elements will be rounded down to the nearest - * power of two. - * - */ -#define DECLARE_HASHTABLE(name, size, hook_gk) \ - static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ - struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, hook_gk } - -/** Exactly like \c DECLARE_HASHTABLE, but the variable will be declared as static. */ -#define DECLARE_HASHTABLE_STATIC(name, size, hook_gk) \ - static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ - static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, { hook_gk } } - -#if CONFIG_HT_OPTIONAL_INTERNAL_KEY - /** Declare a hash table with internal copies of the keys. This version does not - * require a hook, nor it requires the user to allocate static memory for the keys. - * It is mostly suggested for tables whose keys are computed on the fly and need - * to be stored somewhere. - */ - #define DECLARE_HASHTABLE_INTERNALKEY(name, size) \ - static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ - static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ - struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } - - /** Exactly like \c DECLARE_HASHTABLE_INTERNALKEY, but the variable will be declared as static. */ - #define DECLARE_HASHTABLE_INTERNALKEY_STATIC(name, size) \ - static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ - static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ - static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } -#endif - -/** - * Initialize (and clear) a hash table in a memory buffer. - * - * \param ht Hash table declared with \c DECLARE_HASHTABLE - * - * \note This function must be called before using the hash table. Optionally, - * it can be called later in the program to clear the hash table, - * removing all its elements. - */ -void ht_init(struct HashTable* ht); - -/** - * Insert an element into the hash table - * - * \param ht Handle of the hash table - * \param data Data to be inserted into the table - * \return true if insertion was successful, false otherwise (table is full) - * - * \note The key for the element to insert is extract from the data with - * the hook. This means that this function cannot be called for hashtables - * with internal keys. - * - * \note If an element with the same key already exists in the table, - * it will be overwritten. - * - * \note It is not allowed to store NULL in the table. If you pass NULL as data, - * the function call will fail. - */ -bool ht_insert(struct HashTable* ht, const void* data); - -/** - * Insert an element into the hash table - * - * \param ht Handle of the hash table - * \param key Key of the element - * \param key_length Length of the key in characters - * \param data Data to be inserted into the table - * \return true if insertion was successful, false otherwise (table is full) - * - * \note If this function is called for hash table with external keys, - * the key provided must be match the key that would be extracted with the - * hook, otherwise the function will fail. - * - * \note If an element with the same key already exists in the table, - * it will be overwritten. - * - * \note It is not allowed to store NULL in the table. If you pass NULL as data, - * the function call will fail. - */ -bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data); - -/** - * Find an element in the hash table - * - * \param ht Handle of the hash table - * \param key Key of the element - * \param key_length Length of the key in characters - * \return Data of the element, or NULL if no element was found for the given key. - */ -const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length); - -/** Similar to \c ht_insert_with_key() but \a key is an ASCIIZ string */ -#define ht_insert_str(ht, key, data) ht_insert_with_key(ht, key, strlen(key), data) - -/** Similar to \c ht_find() but \a key is an ASCIIZ string */ -#define ht_find_str(ht, key) ht_find(ht, key, strlen(key)) - -/// Get an iterator to the begin of the hash table \a ht -INLINE HashIterator ht_iter_begin(struct HashTable* ht) -{ - HashIterator h; - - h.pos = &ht->mem[0]; - h.end = &ht->mem[1 << ht->max_elts_log2]; - - while (h.pos != h.end && !*h.pos) - ++h.pos; - - return h; -} - -/** - * Get an iterator to the (exclusive) end of the hash table \a ht - * - * \note Like in STL, the end iterator is not a valid iterator (you - * cannot call \c ht_iter_get() on it), and it must be used only to - * detect if we reached the end of the iteration (through \c ht_iter_cmp()). - */ -INLINE HashIterator ht_iter_end(struct HashTable* ht) -{ - HashIterator h; - - h.pos = h.end = &ht->mem[1 << ht->max_elts_log2]; - - return h; -} - -/// Compare \a it1 and \a it2 for equality -INLINE bool ht_iter_cmp(HashIterator it1, HashIterator it2) -{ - ASSERT(it1.end == it2.end); - return it1.pos == it2.pos; -} - -/// Get the element within the hash table \a ht pointed by the iterator \a iter -INLINE const void* ht_iter_get(HashIterator iter) -{ return *iter.pos; } - -/** Return an iterator pointing to the element following \a h - * - * \note The order of the elements visited during the iteration is casual, - * and depends on the implementation. - * - */ -INLINE HashIterator ht_iter_next(HashIterator h) -{ - ++h.pos; - while (h.pos != h.end && !(*h.pos)) - ++h.pos; - - return h; -} - -#endif /* MWARE_HASHTABLE_H */ +#warning This header is OBSOLETE +#include diff --git a/bertos/mware/heap.c b/bertos/mware/heap.c deleted file mode 100644 index 3bed8993..00000000 --- a/bertos/mware/heap.c +++ /dev/null @@ -1,238 +0,0 @@ -/** - * \file - * - * - * \brief Heap subsystem (public interface). - * - * \version $Id$ - * - * \author Bernie Innocenti - */ - -#include "heap.h" - -#include // IS_POW2() -#include // ASSERT() - -#include // memset() - -/* NOTE: struct size must be a 2's power! */ -typedef struct _MemChunk -{ - struct _MemChunk *next; - size_t size; -} MemChunk; - -STATIC_ASSERT(IS_POW2(sizeof(MemChunk))); - -#define FREE_FILL_CODE 0xDEAD -#define ALLOC_FILL_CODE 0xBEEF - -void heap_init(struct Heap* h, void* memory, size_t size) -{ -#ifdef _DEBUG - memset(memory, FREE_FILL_CODE, size); -#endif - - /* Initialize heap with a single big chunk */ - h->FreeList = (MemChunk *)memory; - h->FreeList->next = NULL; - h->FreeList->size = size; -} - - -void *heap_allocmem(struct Heap* h, size_t size) -{ - MemChunk *chunk, *prev; - - /* Round size up to the allocation granularity */ - size = ROUND2(size, sizeof(MemChunk)); - - /* Handle allocations of 0 bytes */ - if (!size) - size = sizeof(MemChunk); - - /* Walk on the free list looking for any chunk big enough to - * fit the requested block size. - */ - for (prev = (MemChunk *)&h->FreeList, chunk = h->FreeList; - chunk; - prev = chunk, chunk = chunk->next) - { - if (chunk->size >= size) - { - if (chunk->size == size) - { - /* Just remove this chunk from the free list */ - prev->next = chunk->next; - #ifdef _DEBUG - memset(chunk, ALLOC_FILL_CODE, size); - #endif - return (void *)chunk; - } - else - { - /* Allocate from the END of an existing chunk */ - chunk->size -= size; - #ifdef _DEBUG - memset((uint8_t *)chunk + chunk->size, ALLOC_FILL_CODE, size); - #endif - return (void *)((uint8_t *)chunk + chunk->size); - } - } - } - - return NULL; /* fail */ -} - - -void heap_freemem(struct Heap* h, void *mem, size_t size) -{ - MemChunk *prev; - ASSERT(mem); - -#ifdef _DEBUG - memset(mem, FREE_FILL_CODE, size); -#endif - - /* Round size up to the allocation granularity */ - size = ROUND2(size, sizeof(MemChunk)); - - /* Handle allocations of 0 bytes */ - if (!size) - size = sizeof(MemChunk); - - /* Special case: first chunk in the free list */ - ASSERT((uint8_t*)mem != (uint8_t*)h->FreeList); - if (((uint8_t *)mem) < ((uint8_t *)h->FreeList)) - { - /* Insert memory block before the current free list head */ - prev = (MemChunk *)mem; - prev->next = h->FreeList; - prev->size = size; - h->FreeList = prev; - } - else /* Normal case: not the first chunk in the free list */ - { - /* - * Walk on the free list. Stop at the insertion point (when mem - * is between prev and prev->next) - */ - prev = h->FreeList; - while (prev->next < (MemChunk *)mem && prev->next) - prev = prev->next; - - /* Make sure mem is not *within* prev */ - ASSERT((uint8_t*)mem >= (uint8_t*)prev + prev->size); - - /* Should it be merged with previous block? */ - if (((uint8_t *)prev) + prev->size == ((uint8_t *)mem)) - { - /* Yes */ - prev->size += size; - } - else /* not merged with previous chunk */ - { - MemChunk *curr = (MemChunk*)mem; - - /* insert it after the previous node - * and move the 'prev' pointer forward - * for the following operations - */ - curr->next = prev->next; - curr->size = size; - prev->next = curr; - - /* Adjust for the following test */ - prev = curr; - } - } - - /* Also merge with next chunk? */ - if (((uint8_t *)prev) + prev->size == ((uint8_t *)prev->next)) - { - prev->size += prev->next->size; - prev->next = prev->next->next; - - /* There should be only one merge opportunity, becuase we always merge on free */ - ASSERT((uint8_t*)prev + prev->size != (uint8_t*)prev->next); - } -} - -#if CONFIG_HEAP_MALLOC - -void *heap_malloc(struct Heap* h, size_t size) -{ - size_t *mem; - - size += sizeof(size_t); - if ((mem = (size_t*)heap_allocmem(h, size))) - *mem++ = size; - - return mem; -} - -void *heap_calloc(struct Heap* h, size_t size) -{ - void *mem; - - if ((mem = heap_malloc(h, size))) - memset(mem, 0, size); - - return mem; -} - -/** - * Free a block of memory, determining its size automatically. - * - * \param h Heap from which the block was allocated. - * \param mem Pointer to a block of memory previously allocated with - * either heap_malloc() or heap_calloc(). - * - * \note If \a mem is a NULL pointer, no operation is performed. - * - * \note Freeing the same memory block twice has undefined behavior. - * - * \note This function works like the ANSI C free(). - */ -void heap_free(struct Heap *h, void *mem) -{ - size_t *_mem = (size_t *)mem; - - if (_mem) - { - --_mem; - heap_freemem(h, _mem, *_mem); - } -} - -#endif /* CONFIG_HEAP_MALLOC */ diff --git a/bertos/struct/hashtable.c b/bertos/struct/hashtable.c new file mode 100644 index 00000000..6f5471dd --- /dev/null +++ b/bertos/struct/hashtable.c @@ -0,0 +1,375 @@ +/** + * \file + * + * + * \brief Portable hash table implementation + * + * Some rationales of our choices in implementation: + * + * \li For embedded systems, it is vital to allocate the table in static memory. To do + * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file. + * Nevertheless, they should be used as opaque types (that is, the users should not + * access the structure fields directly). + * + * \li To statically allocate the structures, a macro is provided. With this macro, we + * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without + * the macro, the user would have had to define both the \c HashNode and the \c HashTable + * manually, and pass both of them to \c ht_init() (which would have created the link between + * the two). Instead, the link is created with a literal initialization. + * + * \li The hash table is created as power of two to remove the divisions from the code. + * Of course, hash functions work at their best when the table size is a prime number. + * When calculating the modulus to convert the hash value to an index, the actual operation + * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher + * bits are first "merged" with the lower bits through some XOR operations (see the last line of + * \c calc_hash()). + * + * \li To minimize the memory occupation, there is no flag to set for the empty node. An + * empty node is recognized by its data pointer set to NULL. It is then invalid to store + * NULL as data pointer in the table. + * + * \li The visiting interface through iterators is implemented with pass-by-value semantic. + * While this is overkill for medium-to-stupid compilers, it is the best designed from an + * user point of view. Moreover, being totally inlined (defined completely in the header), + * even a stupid compiler should be able to perform basic optimizations on it. + * We thought about using a pass-by-pointer semantic but it was much more awful to use, and + * the compiler is then forced to spill everything to the stack (unless it is *very* smart). + * + * \li The current implementation allows to either store the key internally (that is, copy + * the key within the hash table) or keep it external (that is, a hook is used to extract + * the key from the data in the node). The former is more memory-hungry of course, as it + * allocated static space to store the key copies. The overhead to keep both methods at + * the same time is minimal: + *
    + *
  • There is a run-time check in node_get_key which is execute per each node visited.
  • + *
  • Theoretically, there is no memory overhead. In practice, there were no + * flags in \c struct HashTable till now, so we had to add a first bit flag, but the + * overhead will disappear if a second flag is added for a different reason later.
  • + *
  • There is a little interface overhead, since we have two different versions of + * \c ht_insert(), one with the key passed as parameter and one without, but in + * the common case (external keys) both can be used.
  • + *
+ * + * \version $Id$ + * \author Giovanni Bajo + */ + +#include "hashtable.h" +#include +#include +#include //ROTL(), ROTR(); + +#include + + +typedef const void** HashNodePtr; +#define NODE_EMPTY(node) (!*(node)) +#define HT_HAS_INTERNAL_KEY(ht) (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal) + +/** For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */ +INLINE uint8_t *key_internal_get_ptr(struct HashTable *ht, HashNodePtr node) +{ + uint8_t* key_buf = ht->key_data.mem; + size_t index; + + // Compute the index of the node and use it to move within the whole key buffer + index = node - &ht->mem[0]; + ASSERT(index < (size_t)(1 << ht->max_elts_log2)); + key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1); + + return key_buf; +} + + +INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length) +{ + if (HT_HAS_INTERNAL_KEY(ht)) + { + uint8_t* k = key_internal_get_ptr(ht, node); + + // Key has its length stored in the first byte + *key_length = *k++; + *key = k; + } + else + *key = ht->key_data.hook(*node, key_length); +} + + +INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length) +{ + const void* key2; + uint8_t key2_length; + + node_get_key(ht, node, &key2, &key2_length); + + return (key_length == key2_length && memcmp(key, key2, key_length) == 0); +} + + +static uint16_t calc_hash(const void* _key, uint8_t key_length) +{ + const char* key = (const char*)_key; + uint16_t hash = key_length; + int i; + int len = (int)key_length; + + for (i = 0; i < len; ++i) + hash = ROTL(hash, 4) ^ key[i]; + + return hash ^ (hash >> 6) ^ (hash >> 13); +} + + +static HashNodePtr perform_lookup(struct HashTable* ht, + const void* key, uint8_t key_length) +{ + uint16_t hash = calc_hash(key, key_length); + uint16_t mask = ((1 << ht->max_elts_log2) - 1); + uint16_t index = hash & mask; + uint16_t first_index = index; + uint16_t step; + HashNodePtr node; + + // Fast-path optimization: we check immediately if the current node + // is the one we were looking for, so we save the computation of the + // increment step in the common case. + node = &ht->mem[index]; + if (NODE_EMPTY(node) + || node_key_match(ht, node, key, key_length)) + return node; + + // Increment while going through the hash table in case of collision. + // This implements the double-hash technique: we use the higher part + // of the hash as a step increment instead of just going to the next + // element, to minimize the collisions. + // Notice that the number must be odd to be sure that the whole table + // is traversed. Actually MCD(table_size, step) must be 1, but + // table_size is always a power of 2, so we just ensure that step is + // never a multiple of 2. + step = (ROTR(hash, ht->max_elts_log2) & mask) | 1; + + do + { + index += step; + index &= mask; + + node = &ht->mem[index]; + if (NODE_EMPTY(node) + || node_key_match(ht, node, key, key_length)) + return node; + + // The check is done after the key compare. This actually causes + // one more compare in the case the table is full (since the first + // element was compared at the very start, and then at the end), + // but it makes faster the common path where we enter this loop + // for the first time, and index will not match first_index for + // sure. + } while (index != first_index); + + return NULL; +} + + +void ht_init(struct HashTable* ht) +{ + memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2)); +} + + +static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) +{ + HashNodePtr node; + + if (!data) + return false; + + if (HT_HAS_INTERNAL_KEY(ht)) + key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH); + + node = perform_lookup(ht, key, key_length); + if (!node) + return false; + + if (HT_HAS_INTERNAL_KEY(ht)) + { + uint8_t* k = key_internal_get_ptr(ht, node); + *k++ = key_length; + memcpy(k, key, key_length); + } + + *node = data; + return true; +} + + +bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) +{ +#ifdef _DEBUG + if (!HT_HAS_INTERNAL_KEY(ht)) + { + // Construct a fake node and use it to match the key + HashNodePtr node = &data; + if (!node_key_match(ht, node, key, key_length)) + { + ASSERT2(0, "parameter key is different from the external key"); + return false; + } + } +#endif + + return insert(ht, key, key_length, data); +} + + +bool ht_insert(struct HashTable* ht, const void* data) +{ + const void* key; + uint8_t key_length; + +#ifdef _DEBUG + if (HT_HAS_INTERNAL_KEY(ht)) + { + ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()" + && 0); + return false; + } +#endif + + key = ht->key_data.hook(data, &key_length); + + return insert(ht, key, key_length, data); +} + + +const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length) +{ + HashNodePtr node; + + if (HT_HAS_INTERNAL_KEY(ht)) + key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH); + + node = perform_lookup(ht, key, key_length); + + if (!node || NODE_EMPTY(node)) + return NULL; + + return *node; +} + + +#if 0 + +#include + +bool ht_test(void); + +static const void* test_get_key(const void* ptr, uint8_t* length) +{ + const char* s = ptr; + *length = strlen(s); + return s; +} + +#define NUM_ELEMENTS 256 +DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key); +DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256); + +static char data[NUM_ELEMENTS][10]; +static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +static bool single_test(void) +{ + int i; + + ht_init(&test1); + ht_init(&test2); + + for (i=0;i + * + * \brief Portable hash table + * + * This file implements a portable hash table, with the following features: + * + * \li Open double-hashing. The maximum number of elements is fixed. The double hashing + * function improves recovery in case of collisions. + * \li Configurable size (which is clamped to a power of two) + * \li Visiting interface through iterator (returns the element in random order). + * \li The key is stored within the data and a hook is used to extract it. Optionally, it + * is possible to store a copy of the key within the hash table. + * + * Since the hashing is open, there is no way to remove elements from the table. Instead, a + * function is provided to clear the table completely. + * + * The data stored within the table must be a pointer. The NULL pointer is used as + * a marker for a free node, so it is invalid to store a NULL pointer in the table + * with \c ht_insert(). + * + * \version $Id$ + * \author Giovanni Bajo + */ + +#ifndef MWARE_HASHTABLE_H +#define MWARE_HASHTABLE_H + +#include +#include +#include + +/** + * Enable/disable support to declare special hash tables which maintain a copy of + * the key internally instead of relying on the hook to extract it from the data. + */ +#define CONFIG_HT_OPTIONAL_INTERNAL_KEY 1 + +/// Maximum length of the internal key (use (2^n)-1 for slight speedup) +#define INTERNAL_KEY_MAX_LENGTH 15 + +/** + * Hook to get the key from \a data, which is an element of the hash table. The + * key must be returned together with \a key_length (in words). + */ +typedef const void *(*hook_get_key)(const void *data, uint8_t *key_length); + + +/** + * Hash table description + * + * \note This structures MUST NOT be accessed directly. Its definition is + * provided in the header file only for optimization purposes (see the rationale + * in hashtable.c). + * + * \note If new elements must be added to this list, please double check + * \c DECLARE_HASHTABLE, which requires the existing elements to be at the top. + */ +struct HashTable +{ + const void **mem; ///< Buckets of data + uint16_t max_elts_log2; ///< Log2 of the size of the table + struct { + bool key_internal : 1; ///< true if the key is copied internally + } flags; + union { + hook_get_key hook; ///< Hook to get the key + uint8_t *mem; ///< Pointer to the key memory + } key_data; +}; + + +/// Iterator to walk the hash table +typedef struct +{ + const void** pos; + const void** end; +} HashIterator; + + +/** + * Declare a hash table in the current scope + * + * \param name Variable name + * \param size Number of elements + * \param hook_gk Hook to be used to extract the key from the node + * + * \note The number of elements will be rounded down to the nearest + * power of two. + * + */ +#define DECLARE_HASHTABLE(name, size, hook_gk) \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, hook_gk } + +/** Exactly like \c DECLARE_HASHTABLE, but the variable will be declared as static. */ +#define DECLARE_HASHTABLE_STATIC(name, size, hook_gk) \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, { hook_gk } } + +#if CONFIG_HT_OPTIONAL_INTERNAL_KEY + /** Declare a hash table with internal copies of the keys. This version does not + * require a hook, nor it requires the user to allocate static memory for the keys. + * It is mostly suggested for tables whose keys are computed on the fly and need + * to be stored somewhere. + */ + #define DECLARE_HASHTABLE_INTERNALKEY(name, size) \ + static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } + + /** Exactly like \c DECLARE_HASHTABLE_INTERNALKEY, but the variable will be declared as static. */ + #define DECLARE_HASHTABLE_INTERNALKEY_STATIC(name, size) \ + static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } +#endif + +/** + * Initialize (and clear) a hash table in a memory buffer. + * + * \param ht Hash table declared with \c DECLARE_HASHTABLE + * + * \note This function must be called before using the hash table. Optionally, + * it can be called later in the program to clear the hash table, + * removing all its elements. + */ +void ht_init(struct HashTable* ht); + +/** + * Insert an element into the hash table + * + * \param ht Handle of the hash table + * \param data Data to be inserted into the table + * \return true if insertion was successful, false otherwise (table is full) + * + * \note The key for the element to insert is extract from the data with + * the hook. This means that this function cannot be called for hashtables + * with internal keys. + * + * \note If an element with the same key already exists in the table, + * it will be overwritten. + * + * \note It is not allowed to store NULL in the table. If you pass NULL as data, + * the function call will fail. + */ +bool ht_insert(struct HashTable* ht, const void* data); + +/** + * Insert an element into the hash table + * + * \param ht Handle of the hash table + * \param key Key of the element + * \param key_length Length of the key in characters + * \param data Data to be inserted into the table + * \return true if insertion was successful, false otherwise (table is full) + * + * \note If this function is called for hash table with external keys, + * the key provided must be match the key that would be extracted with the + * hook, otherwise the function will fail. + * + * \note If an element with the same key already exists in the table, + * it will be overwritten. + * + * \note It is not allowed to store NULL in the table. If you pass NULL as data, + * the function call will fail. + */ +bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data); + +/** + * Find an element in the hash table + * + * \param ht Handle of the hash table + * \param key Key of the element + * \param key_length Length of the key in characters + * \return Data of the element, or NULL if no element was found for the given key. + */ +const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length); + +/** Similar to \c ht_insert_with_key() but \a key is an ASCIIZ string */ +#define ht_insert_str(ht, key, data) ht_insert_with_key(ht, key, strlen(key), data) + +/** Similar to \c ht_find() but \a key is an ASCIIZ string */ +#define ht_find_str(ht, key) ht_find(ht, key, strlen(key)) + +/// Get an iterator to the begin of the hash table \a ht +INLINE HashIterator ht_iter_begin(struct HashTable* ht) +{ + HashIterator h; + + h.pos = &ht->mem[0]; + h.end = &ht->mem[1 << ht->max_elts_log2]; + + while (h.pos != h.end && !*h.pos) + ++h.pos; + + return h; +} + +/** + * Get an iterator to the (exclusive) end of the hash table \a ht + * + * \note Like in STL, the end iterator is not a valid iterator (you + * cannot call \c ht_iter_get() on it), and it must be used only to + * detect if we reached the end of the iteration (through \c ht_iter_cmp()). + */ +INLINE HashIterator ht_iter_end(struct HashTable* ht) +{ + HashIterator h; + + h.pos = h.end = &ht->mem[1 << ht->max_elts_log2]; + + return h; +} + +/// Compare \a it1 and \a it2 for equality +INLINE bool ht_iter_cmp(HashIterator it1, HashIterator it2) +{ + ASSERT(it1.end == it2.end); + return it1.pos == it2.pos; +} + +/// Get the element within the hash table \a ht pointed by the iterator \a iter +INLINE const void* ht_iter_get(HashIterator iter) +{ return *iter.pos; } + +/** Return an iterator pointing to the element following \a h + * + * \note The order of the elements visited during the iteration is casual, + * and depends on the implementation. + * + */ +INLINE HashIterator ht_iter_next(HashIterator h) +{ + ++h.pos; + while (h.pos != h.end && !(*h.pos)) + ++h.pos; + + return h; +} + +#endif /* MWARE_HASHTABLE_H */