From: rasky Date: Wed, 14 Jul 2004 14:08:16 +0000 (+0000) Subject: Implementazione di una tabella hash X-Git-Tag: 1.0.0~1194 X-Git-Url: https://codewiz.org/gitweb?a=commitdiff_plain;h=5f7d3d48d08f112c1b601e1a37060beb665c3bd7;p=bertos.git Implementazione di una tabella hash git-svn-id: https://src.develer.com/svnoss/bertos/trunk@47 38d2e660-2303-0410-9eaa-f027e97ec537 --- diff --git a/mware/hashtable.c b/mware/hashtable.c new file mode 100755 index 00000000..603f30eb --- /dev/null +++ b/mware/hashtable.c @@ -0,0 +1,381 @@ +/*! + * \file + * + * + * \brief Portable hash table implementation + * + * Some rationales of our choices in implementation: + * + * \li For embedded systems, it is vital to allocate the table in static memory. To do + * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file. + * Nevertheless, they should be used as opaque types (that is, the users should not + * access the structure fields directly). + * + * \li To statically allocate the structures, a macro is provided. With this macro, we + * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without + * the macro, the user would have had to define both the \c HashNode and the \c HashTable + * manually, and pass both of them to \c ht_init() (which would have created the link between + * the two). Instead, the link is created with a literal initialization. + * + * \li The hash table is created as power of two to remove the divisions from the code. + * Of course, hash functions work at their best when the table size is a prime number. + * When calculating the modulus to convert the hash value to an index, the actual operation + * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher + * bits are first "merged" with the lower bits through some XOR operations (see the last line of + * \c calc_hash()). + * + * \li To minimize the memory occupation, there is no flag to set for the empty node. An + * empty node is recognized by its data pointer set to NULL. It is then invalid to store + * NULL as data pointer in the table. + * + * \li The visiting interface through iterators is implemented with pass-by-value semantic. + * While this is overkill for medium-to-stupid compilers, it is the best designed from an + * user point of view. Moreover, being totally inlined (defined completely in the header), + * even a stupid compiler should be able to perform basic optimizations on it. + * We thought about using a pass-by-pointer semantic but it was much more awful to use, and + * the compiler is then forced to spill everything to the stack (unless it is *very* smart). + * + * \li The current implementation allows to either store the key internally (that is, copy + * the key within the hash table) or keep it external (that is, a hook is used to extract + * the key from the data in the node). The former is more memory-hungry of course, as it + * allocated static space to store the key copies. The overhead to keep both methods at + * the same time is minimal: + * + * + * \version $Id$ + * + * \author Giovanni Bajo + */ + +/* + * $Log$ + * Revision 1.1 2004/07/14 14:08:16 rasky + * Implementazione di una tabella hash + * + * Revision 1.13 2004/07/12 16:33:36 rasky + * Aggiunta nuova ASSERT2, con stringa di descrizione del problema (disabilitabile tramite una macro di configurazione) + * Modificato il codice del firmware per utilizzare ASSERT2 + * Modificato il progetto in modo da disabilitare le stringhe di errore nel target xROM-xRAM + * + * Revision 1.12 2004/06/14 15:15:24 rasky + * Cambiato key_data in un union invece di castare + * Aggiunto un ASSERT sull'indice calcolata nella key_internal_get_ptr + * + * Revision 1.11 2004/06/14 15:09:04 rasky + * Cambiati i messaggi di assert (è inutile citare il nome della funzione) + * + * Revision 1.10 2004/06/14 15:07:38 rasky + * Convertito il loop di calc_hash a interi (per farlo ottimizzare maggiormente) + * + * Revision 1.9 2004/06/14 14:59:40 rasky + * Rinominanta la macro di configurazione per rispettare il namespace, e aggiunta in un punto in cui mancava + * + * Revision 1.8 2004/06/12 15:18:05 rasky + * Nuova hashtable con chiave esterna o interna a scelta, come discusso + * + * Revision 1.7 2004/06/04 17:16:31 rasky + * Fixato un bug nel caso in cui la chiave ecceda la dimensione massima: il clamp non può essere fatto dentro la perform_lookup perché anche la ht_insert deve avere il valore clampato a disposizione per fare la memcpy + * + * Revision 1.6 2004/05/26 16:36:50 rasky + * Aggiunto il rationale per l'interfaccia degli iteratori + * + * Revision 1.5 2004/05/24 15:28:20 rasky + * Sistemata la documentazione, rimossa keycmp in favore della memcmp + * + */ +#include "hashtable.h" +#include +#include +#include + + +#define ROTATE_LEFT_16(num, count) (((num) << (count)) | ((num) >> (16-(count)))) +#define ROTATE_RIGHT_16(num, count) ROTATE_LEFT_16(num, 16-(count)) + +typedef const void** HashNodePtr; +#define NODE_EMPTY(node) (!*(node)) +#define HT_HAS_INTERNAL_KEY(ht) (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal) + +/*! For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */ +INLINE uint8_t* key_internal_get_ptr(struct HashTable* ht, HashNodePtr node) +{ + uint8_t* key_buf = ht->key_data.mem; + size_t index; + + // Compute the index of the node and use it to move within the whole key buffer + index = node - &ht->mem[0]; + ASSERT(index < (1 << ht->max_elts_log2)); + key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1); + + return key_buf; +} + + +INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length) +{ + if (HT_HAS_INTERNAL_KEY(ht)) + { + uint8_t* k = key_internal_get_ptr(ht, node); + + // Key has its length stored in the first byte + *key_length = *k++; + *key = k; + } + else + *key = ht->key_data.hook(*node, key_length); +} + +INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length) +{ + const void* key2; + uint8_t key2_length; + + node_get_key(ht, node, &key2, &key2_length); + + return (key_length == key2_length && memcmp(key, key2, key_length) == 0); +} + +static uint16_t calc_hash(const void* _key, uint8_t key_length) +{ + const char* key = (const char*)_key; + uint16_t hash = key_length; + int i; + int len = (int)key_length; + + for (i = 0; i < len; ++i) + hash = ROTATE_LEFT_16(hash, 4) ^ key[i]; + + return hash ^ (hash >> 6) ^ (hash >> 13); +} + +static HashNodePtr perform_lookup(struct HashTable* ht, + const void* key, uint8_t key_length) +{ + uint16_t hash = calc_hash(key, key_length); + uint16_t mask = ((1 << ht->max_elts_log2) - 1); + uint16_t index = hash & mask; + uint16_t first_index = index; + uint16_t step; + HashNodePtr node; + + // Fast-path optimization: we check immediately if the current node + // is the one we were looking for, so we save the computation of the + // increment step in the common case. + node = &ht->mem[index]; + if (NODE_EMPTY(node) + || node_key_match(ht, node, key, key_length)) + return node; + + // Increment while going through the hash table in case of collision. + // This implements the double-hash technique: we use the higher part + // of the hash as a step increment instead of just going to the next + // element, to minimize the collisions. + // Notice that the number must be odd to be sure that the whole table + // is traversed. Actually MCD(table_size, step) must be 1, but + // table_size is always a power of 2, so we just ensure that step is + // never a multiple of 2. + step = (ROTATE_RIGHT_16(hash, ht->max_elts_log2) & mask) | 1; + + do + { + index += step; + index &= mask; + + node = &ht->mem[index]; + if (NODE_EMPTY(node) + || node_key_match(ht, node, key, key_length)) + return node; + + // The check is done after the key compare. This actually causes + // one more compare in the case the table is full (since the first + // element was compared at the very start, and then at the end), + // but it makes faster the common path where we enter this loop + // for the first time, and index will not match first_index for + // sure. + } while (index != first_index); + + return NULL; +} + +void ht_init(struct HashTable* ht) +{ + memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2)); +} + +static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) +{ + HashNodePtr node; + + if (!data) + return false; + + if (HT_HAS_INTERNAL_KEY(ht)) + key_length = MIN(key_length, INTERNAL_KEY_MAX_LENGTH); + + node = perform_lookup(ht, key, key_length); + if (!node) + return false; + + if (HT_HAS_INTERNAL_KEY(ht)) + { + uint8_t* k = key_internal_get_ptr(ht, node); + *k++ = key_length; + memcpy(k, key, key_length); + } + + *node = data; + return true; +} + +bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data) +{ +#ifdef _DEBUG + if (!HT_HAS_INTERNAL_KEY(ht)) + { + // Construct a fake node and use it to match the key + HashNodePtr node = &data; + if (!node_key_match(ht, node, key, key_length)) + { + ASSERT2(0, "parameter key is different from the external key"); + return false; + } + } +#endif + + return insert(ht, key, key_length, data); +} + +bool ht_insert(struct HashTable* ht, const void* data) +{ + const void* key; + uint8_t key_length; + +#ifdef _DEBUG + if (HT_HAS_INTERNAL_KEY(ht)) + { + ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()" + && 0); + return false; + } +#endif + + key = ht->key_data.hook(data, &key_length); + + return insert(ht, key, key_length, data); +} + +const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length) +{ + HashNodePtr node; + + if (HT_HAS_INTERNAL_KEY(ht)) + key_length = MIN(key_length, INTERNAL_KEY_MAX_LENGTH); + + node = perform_lookup(ht, key, key_length); + + if (!node || NODE_EMPTY(node)) + return NULL; + + return *node; +} + + +#if 0 + +#include + +bool ht_test(void); + +static const void* test_get_key(const void* ptr, uint8_t* length) +{ + const char* s = ptr; + *length = strlen(s); + return s; +} + +#define NUM_ELEMENTS 256 +DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key); +DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256); + +static char data[NUM_ELEMENTS][10]; +static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +static bool single_test(void) +{ + int i; + + ht_init(&test1); + ht_init(&test2); + + for (i=0;i + * + * \brief Portable hash table + * + * This file implements a portable hash table, with the following features: + * + * \li Open double-hashing. The maximum number of elements is fixed. The double hashing + * function improves recovery in case of collisions. + * \li Configurable size (which is clamped to a power of two) + * \li Visiting interface through iterator (returns the element in random order). + * \li The key is stored within the data and a hook is used to extract it. Optionally, it + * is possible to store a copy of the key within the hash table. + * + * Since the hashing is open, there is no way to remove elements from the table. Instead, a + * function is provided to clear the table completely. + * + * The data stored within the table must be a pointer. The NULL pointer is used as + * a marker for a free node, so it is invalid to store a NULL pointer in the table + * with \c ht_insert(). + * + * \version $Id$ + * + * \author Giovanni Bajo + */ + +/* + * $Log$ + * Revision 1.1 2004/07/14 14:08:16 rasky + * Implementazione di una tabella hash + * + * Revision 1.10 2004/06/14 15:17:15 rasky + * Qualche fix alla documentazione Doxygen + * + * Revision 1.9 2004/06/14 15:15:24 rasky + * Cambiato key_data in un union invece di castare + * Aggiunto un ASSERT sull'indice calcolata nella key_internal_get_ptr + * + * Revision 1.8 2004/06/14 14:59:40 rasky + * Rinominanta la macro di configurazione per rispettare il namespace, e aggiunta in un punto in cui mancava + * + * Revision 1.7 2004/06/12 15:18:05 rasky + * Nuova hashtable con chiave esterna o interna a scelta, come discusso + * + * Revision 1.6 2004/05/26 16:33:31 rasky + * Aggiunta interfaccia per visita della hashtable tramite iteratori + * + * Revision 1.5 2004/05/24 18:42:23 rasky + * Fixato un commento doxygen + * + * Revision 1.4 2004/05/24 15:28:20 rasky + * Sistemata la documentazione, rimossa keycmp in favore della memcmp + * + */ + + +#ifndef HASHTABLE_H +#define HASHTABLE_H + +#include +#include + +/*! Enable/disable support to declare special hash tables which maintain a copy of + * the key internally instead of relying on the hook to extract it from the data. + */ +#define CONFIG_HT_OPTIONAL_INTERNAL_KEY 1 + +//! Maximum length of the internal key (use (2^n)-1 for slight speedup) +#define INTERNAL_KEY_MAX_LENGTH 15 + +/*! Hook to get the key from \a data, which is an element of the hash table. The + * key must be returned together with \a key_length (in words). + */ +typedef const void* (*hook_get_key)(const void* data, uint8_t* key_length); + +/*! Hash table description + * + * \note This structures MUST NOT be accessed directly. Its definition is + * provided in the header file only for optimization purposes (see the rationale + * in hashtable.c). + * + * \note If new elements must be added to this list, please double check + * \c DECLARE_HASHTABLE, which requires the existing elements to be at the top. + */ +struct HashTable +{ + const void** mem; //!< Buckets of data + uint16_t max_elts_log2; //!< Log2 of the size of the table + struct { + bool key_internal : 1; //!< true if the key is copied internally + } flags; + union { + hook_get_key hook; //!< Hook to get the key + uint8_t* mem; //!< Pointer to the key memory + } key_data; +}; + +//! Iterator to walk the hash table +typedef struct +{ + const void** pos; + const void** end; +} HashIterator; + +/*! Declare a hash table in the current scope + * + * \param name Variable name + * \param size Number of elements + * \param hook_gk Hook to be used to extract the key from the node + * + * \note The number of elements will be rounded down to the nearest + * power of two. + * + */ +#define DECLARE_HASHTABLE(name, size, hook_gk) \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, hook_gk } + +/*! Exactly like \c DECLARE_HASHTABLE, but the variable will be declared as static. */ +#define DECLARE_HASHTABLE_STATIC(name, size, hook_gk) \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { false }, hook_gk } + +#if CONFIG_HT_OPTIONAL_INTERNAL_KEY + /*! Declare a hash table with internal copies of the keys. This version does not + * require a hook, nor it requires the user to allocate static memory for the keys. + * It is mostly suggested for tables whose keys are computed on the fly and need + * to be stored somewhere. + */ + #define DECLARE_HASHTABLE_INTERNALKEY(name, size) \ + static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } + + /*! Exactly like \c DECLARE_HASHTABLE_INTERNALKEY, but the variable will be declared as static. */ + #define DECLARE_HASHTABLE_INTERNALKEY_STATIC(name, size) \ + static uint8_t name##_keys[(1 << UINT32_LOG2(size)) * (INTERNAL_KEY_MAX_LENGTH + 1)]; \ + static const void* name##_nodes[1 << UINT32_LOG2(size)]; \ + static struct HashTable name = { name##_nodes, UINT32_LOG2(size), { true }, name##_keys } +#endif + +/*! Initialize (and clear) a hash table in a memory buffer. + * + * \param ht Hash table declared with \c DECLARE_HASHTABLE + * + * \note This function must be called before using the hash table. Optionally, + * it can be called later in the program to clear the hash table, + * removing all its elements. + */ +void ht_init(struct HashTable* ht); + +/*! Insert an element into the hash table + * + * \param ht Handle of the hash table + * \param data Data to be inserted into the table + * \return true if insertion was successful, false otherwise (table is full) + * + * \note The key for the element to insert is extract from the data with + * the hook. This means that this function cannot be called for hashtables + * with internal keys. + * + * \note If an element with the same key already exists in the table, + * it will be overwritten. + * + * \note It is not allowed to store NULL in the table. If you pass NULL as data, + * the function call will fail. + */ +bool ht_insert(struct HashTable* ht, const void* data); + +/*! Insert an element into the hash table + * + * \param ht Handle of the hash table + * \param key Key of the element + * \param key_length Length of the key in characters + * \param data Data to be inserted into the table + * \return true if insertion was successful, false otherwise (table is full) + * + * \note If this function is called for hash table with external keys, + * the key provided must be match the key that would be extracted with the + * hook, otherwise the function will fail. + * + * \note If an element with the same key already exists in the table, + * it will be overwritten. + * + * \note It is not allowed to store NULL in the table. If you pass NULL as data, + * the function call will fail. + */ +bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data); + +/*! Find an element in the hash table + * + * \param ht Handle of the hash table + * \param key Key of the element + * \param key_length Length of the key in characters + * \return Data of the element, or NULL if no element was found for the given key. + */ +const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length); + +/*! Similar to \c ht_insert_with_key() but \a key is an ASCIIZ string */ +#define ht_insert_str(ht, key, data) ht_insert_with_key(ht, key, strlen(key), data) + +/*! Similar to \c ht_find() but \a key is an ASCIIZ string */ +#define ht_find_str(ht, key) ht_find(ht, key, strlen(key)) + +//! Get an iterator to the begin of the hash table \a ht +INLINE HashIterator ht_iter_begin(struct HashTable* ht) +{ + HashIterator h; + + h.pos = &ht->mem[0]; + h.end = &ht->mem[1 << ht->max_elts_log2]; + + while (h.pos != h.end && !*h.pos) + ++h.pos; + + return h; +} + +/*! Get an iterator to the (exclusive) end of the hash table \a ht + * + * \note Like in STL, the end iterator is not a valid iterator (you + * cannot call \c ht_iter_get() on it), and it must be used only to + * detect if we reached the end of the iteration (through \c ht_iter_cmp()). + */ +INLINE HashIterator ht_iter_end(struct HashTable* ht) +{ + HashIterator h; + + h.pos = h.end = &ht->mem[1 << ht->max_elts_log2]; + + return h; +} + +//! Compare \a it1 and \a it2 for equality +INLINE bool ht_iter_cmp(HashIterator it1, HashIterator it2) +{ + ASSERT(it1.end == it2.end); + return it1.pos == it2.pos; +} + +//! Get the element within the hash table \a ht pointed by the iterator \a iter +INLINE const void* ht_iter_get(HashIterator iter) +{ return *iter.pos; } + +/*! Return an iterator pointing to the element following \a h + * + * \note The order of the elements visited during the iteration is casual, + * and depends on the implementation. + * + */ +INLINE HashIterator ht_iter_next(HashIterator h) +{ + ++h.pos; + while (h.pos != h.end && !(*h.pos)) + ++h.pos; + + return h; +} + +#endif /* HASHTABLE_H */