4 * Copyright (C) 2004 Giovanni Bajo
5 * Copyright (C) 2004 Develer S.r.l. (http://www.develer.com/)
9 * \brief Portable hash table implementation
11 * Some rationales of our choices in implementation:
13 * \li For embedded systems, it is vital to allocate the table in static memory. To do
14 * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file.
15 * Nevertheless, they should be used as opaque types (that is, the users should not
16 * access the structure fields directly).
18 * \li To statically allocate the structures, a macro is provided. With this macro, we
19 * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without
20 * the macro, the user would have had to define both the \c HashNode and the \c HashTable
21 * manually, and pass both of them to \c ht_init() (which would have created the link between
22 * the two). Instead, the link is created with a literal initialization.
24 * \li The hash table is created as power of two to remove the divisions from the code.
25 * Of course, hash functions work at their best when the table size is a prime number.
26 * When calculating the modulus to convert the hash value to an index, the actual operation
27 * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher
28 * bits are first "merged" with the lower bits through some XOR operations (see the last line of
31 * \li To minimize the memory occupation, there is no flag to set for the empty node. An
32 * empty node is recognized by its data pointer set to NULL. It is then invalid to store
33 * NULL as data pointer in the table.
35 * \li The visiting interface through iterators is implemented with pass-by-value semantic.
36 * While this is overkill for medium-to-stupid compilers, it is the best designed from an
37 * user point of view. Moreover, being totally inlined (defined completely in the header),
38 * even a stupid compiler should be able to perform basic optimizations on it.
39 * We thought about using a pass-by-pointer semantic but it was much more awful to use, and
40 * the compiler is then forced to spill everything to the stack (unless it is *very* smart).
42 * \li The current implementation allows to either store the key internally (that is, copy
43 * the key within the hash table) or keep it external (that is, a hook is used to extract
44 * the key from the data in the node). The former is more memory-hungry of course, as it
45 * allocated static space to store the key copies. The overhead to keep both methods at
46 * the same time is minimal:
48 * <li>There is a run-time check in node_get_key which is execute per each node visited.</li>
49 * <li>Theoretically, there is no memory overhead. In practice, there were no
50 * flags in \c struct HashTable till now, so we had to add a first bit flag, but the
51 * overhead will disappear if a second flag is added for a different reason later.</li>
52 * <li>There is a little interface overhead, since we have two different versions of
53 * \c ht_insert(), one with the key passed as parameter and one without, but in
54 * the common case (external keys) both can be used.</li>
59 * \author Giovanni Bajo <rasky@develer.com>
64 * Revision 1.1 2004/07/14 14:08:16 rasky
65 * Implementazione di una tabella hash
67 * Revision 1.13 2004/07/12 16:33:36 rasky
68 * Aggiunta nuova ASSERT2, con stringa di descrizione del problema (disabilitabile tramite una macro di configurazione)
69 * Modificato il codice del firmware per utilizzare ASSERT2
70 * Modificato il progetto in modo da disabilitare le stringhe di errore nel target xROM-xRAM
72 * Revision 1.12 2004/06/14 15:15:24 rasky
73 * Cambiato key_data in un union invece di castare
74 * Aggiunto un ASSERT sull'indice calcolata nella key_internal_get_ptr
76 * Revision 1.11 2004/06/14 15:09:04 rasky
77 * Cambiati i messaggi di assert (è inutile citare il nome della funzione)
79 * Revision 1.10 2004/06/14 15:07:38 rasky
80 * Convertito il loop di calc_hash a interi (per farlo ottimizzare maggiormente)
82 * Revision 1.9 2004/06/14 14:59:40 rasky
83 * Rinominanta la macro di configurazione per rispettare il namespace, e aggiunta in un punto in cui mancava
85 * Revision 1.8 2004/06/12 15:18:05 rasky
86 * Nuova hashtable con chiave esterna o interna a scelta, come discusso
88 * Revision 1.7 2004/06/04 17:16:31 rasky
89 * Fixato un bug nel caso in cui la chiave ecceda la dimensione massima: il clamp non può essere fatto dentro la perform_lookup perché anche la ht_insert deve avere il valore clampato a disposizione per fare la memcpy
91 * Revision 1.6 2004/05/26 16:36:50 rasky
92 * Aggiunto il rationale per l'interfaccia degli iteratori
94 * Revision 1.5 2004/05/24 15:28:20 rasky
95 * Sistemata la documentazione, rimossa keycmp in favore della memcmp
98 #include "hashtable.h"
99 #include <drv/kdebug.h>
100 #include <compiler.h>
104 #define ROTATE_LEFT_16(num, count) (((num) << (count)) | ((num) >> (16-(count))))
105 #define ROTATE_RIGHT_16(num, count) ROTATE_LEFT_16(num, 16-(count))
107 typedef const void** HashNodePtr;
108 #define NODE_EMPTY(node) (!*(node))
109 #define HT_HAS_INTERNAL_KEY(ht) (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal)
111 /*! For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */
112 INLINE uint8_t* key_internal_get_ptr(struct HashTable* ht, HashNodePtr node)
114 uint8_t* key_buf = ht->key_data.mem;
117 // Compute the index of the node and use it to move within the whole key buffer
118 index = node - &ht->mem[0];
119 ASSERT(index < (1 << ht->max_elts_log2));
120 key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1);
126 INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length)
128 if (HT_HAS_INTERNAL_KEY(ht))
130 uint8_t* k = key_internal_get_ptr(ht, node);
132 // Key has its length stored in the first byte
137 *key = ht->key_data.hook(*node, key_length);
140 INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length)
145 node_get_key(ht, node, &key2, &key2_length);
147 return (key_length == key2_length && memcmp(key, key2, key_length) == 0);
150 static uint16_t calc_hash(const void* _key, uint8_t key_length)
152 const char* key = (const char*)_key;
153 uint16_t hash = key_length;
155 int len = (int)key_length;
157 for (i = 0; i < len; ++i)
158 hash = ROTATE_LEFT_16(hash, 4) ^ key[i];
160 return hash ^ (hash >> 6) ^ (hash >> 13);
163 static HashNodePtr perform_lookup(struct HashTable* ht,
164 const void* key, uint8_t key_length)
166 uint16_t hash = calc_hash(key, key_length);
167 uint16_t mask = ((1 << ht->max_elts_log2) - 1);
168 uint16_t index = hash & mask;
169 uint16_t first_index = index;
173 // Fast-path optimization: we check immediately if the current node
174 // is the one we were looking for, so we save the computation of the
175 // increment step in the common case.
176 node = &ht->mem[index];
178 || node_key_match(ht, node, key, key_length))
181 // Increment while going through the hash table in case of collision.
182 // This implements the double-hash technique: we use the higher part
183 // of the hash as a step increment instead of just going to the next
184 // element, to minimize the collisions.
185 // Notice that the number must be odd to be sure that the whole table
186 // is traversed. Actually MCD(table_size, step) must be 1, but
187 // table_size is always a power of 2, so we just ensure that step is
188 // never a multiple of 2.
189 step = (ROTATE_RIGHT_16(hash, ht->max_elts_log2) & mask) | 1;
196 node = &ht->mem[index];
198 || node_key_match(ht, node, key, key_length))
201 // The check is done after the key compare. This actually causes
202 // one more compare in the case the table is full (since the first
203 // element was compared at the very start, and then at the end),
204 // but it makes faster the common path where we enter this loop
205 // for the first time, and index will not match first_index for
207 } while (index != first_index);
212 void ht_init(struct HashTable* ht)
214 memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2));
217 static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
224 if (HT_HAS_INTERNAL_KEY(ht))
225 key_length = MIN(key_length, INTERNAL_KEY_MAX_LENGTH);
227 node = perform_lookup(ht, key, key_length);
231 if (HT_HAS_INTERNAL_KEY(ht))
233 uint8_t* k = key_internal_get_ptr(ht, node);
235 memcpy(k, key, key_length);
242 bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
245 if (!HT_HAS_INTERNAL_KEY(ht))
247 // Construct a fake node and use it to match the key
248 HashNodePtr node = &data;
249 if (!node_key_match(ht, node, key, key_length))
251 ASSERT2(0, "parameter key is different from the external key");
257 return insert(ht, key, key_length, data);
260 bool ht_insert(struct HashTable* ht, const void* data)
266 if (HT_HAS_INTERNAL_KEY(ht))
268 ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()"
274 key = ht->key_data.hook(data, &key_length);
276 return insert(ht, key, key_length, data);
279 const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length)
283 if (HT_HAS_INTERNAL_KEY(ht))
284 key_length = MIN(key_length, INTERNAL_KEY_MAX_LENGTH);
286 node = perform_lookup(ht, key, key_length);
288 if (!node || NODE_EMPTY(node))
301 static const void* test_get_key(const void* ptr, uint8_t* length)
308 #define NUM_ELEMENTS 256
309 DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key);
310 DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256);
312 static char data[NUM_ELEMENTS][10];
313 static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
315 static bool single_test(void)
322 for (i=0;i<NUM_ELEMENTS;i++)
329 klen = (rand() % 8) + 1;
331 data[i][k] = keydomain[rand() % (sizeof(keydomain)-1)];
333 } while (ht_find_str(&test1, data[i]) != NULL);
335 ASSERT(ht_insert(&test1, data[i]));
336 ASSERT(ht_insert_str(&test2, data[i], data[i]));
339 for (i=0;i<NUM_ELEMENTS;i++)
341 char *found1, *found2;
343 found1 = (char*)ht_find_str(&test1, data[i]);
344 if (strcmp(found1, data[i]) != 0)
346 ASSERT(strcmp(found1,data[i]) == 0);
350 found2 = (char*)ht_find_str(&test2, data[i]);
351 if (strcmp(found2, data[i]) != 0)
353 ASSERT(strcmp(found2,data[i]) == 0);
361 static uint16_t rand_seeds[] = { 1, 42, 666, 0xDEAD, 0xBEEF, 0x1337, 0xB00B };
367 for (i=0;i<countof(rand_seeds);++i)
369 srand(rand_seeds[i]);
372 kprintf("ht_test failed\n");
377 kprintf("ht_test successful\n");