mware/hashtable.c

   1 /**
   2  * \file
   3  * <!--
   4  * This file is part of BeRTOS.
   5  *
   6  * Bertos is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19  *
  20  * As a special exception, you may use this file as part of a free software
  21  * library without restriction.  Specifically, if other files instantiate
  22  * templates or use macros or inline functions from this file, or you compile
  23  * this file and link it with other files to produce an executable, this
  24  * file does not by itself cause the resulting executable to be covered by
  25  * the GNU General Public License.  This exception does not however
  26  * invalidate any other reasons why the executable file might be covered by
  27  * the GNU General Public License.
  28  *
  29  * Copyright 2004 Develer S.r.l. (http://www.develer.com/)
  30  * Copyright 2004 Giovanni Bajo
  31  * All Rights Reserved.
  32  * -->
  33  *
  34  * \brief Portable hash table implementation
  35  *
  36  * Some rationales of our choices in implementation:
  37  *
  38  * \li For embedded systems, it is vital to allocate the table in static memory. To do
  39  * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file.
  40  * Nevertheless, they should be used as opaque types (that is, the users should not
  41  * access the structure fields directly).
  42  *
  43  * \li To statically allocate the structures, a macro is provided. With this macro, we
  44  * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without
  45  * the macro, the user would have had to define both the \c HashNode and the \c HashTable
  46  * manually, and pass both of them to \c ht_init() (which would have created the link between
  47  * the two). Instead, the link is created with a literal initialization.
  48  *
  49  * \li The hash table is created as power of two to remove the divisions from the code.
  50  * Of course, hash functions work at their best when the table size is a prime number.
  51  * When calculating the modulus to convert the hash value to an index, the actual operation
  52  * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher
  53  * bits are first "merged" with the lower bits through some XOR operations (see the last line of
  54  * \c calc_hash()).
  55  *
  56  * \li To minimize the memory occupation, there is no flag to set for the empty node. An
  57  * empty node is recognized by its data pointer set to NULL. It is then invalid to store
  58  * NULL as data pointer in the table.
  59  *
  60  * \li The visiting interface through iterators is implemented with pass-by-value semantic.
  61  * While this is overkill for medium-to-stupid compilers, it is the best designed from an
  62  * user point of view. Moreover, being totally inlined (defined completely in the header),
  63  * even a stupid compiler should be able to perform basic optimizations on it.
  64  * We thought about using a pass-by-pointer semantic but it was much more awful to use, and
  65  * the compiler is then forced to spill everything to the stack (unless it is *very* smart).
  66  *
  67  * \li The current implementation allows to either store the key internally (that is, copy
  68  * the key within the hash table) or keep it external (that is, a hook is used to extract
  69  * the key from the data in the node). The former is more memory-hungry of course, as it
  70  * allocated static space to store the key copies. The overhead to keep both methods at
  71  * the same time is minimal:
  72  *    <ul>
  73  *    <li>There is a run-time check in node_get_key which is execute per each node visited.</li>
  74  *    <li>Theoretically, there is no memory overhead. In practice, there were no
  75  *        flags in \c struct HashTable till now, so we had to add a first bit flag, but the
  76  *        overhead will disappear if a second flag is added for a different reason later.</li>
  77  *    <li>There is a little interface overhead, since we have two different versions of
  78  *        \c ht_insert(), one with the key passed as parameter and one without, but in
  79  *        the common case (external keys) both can be used.</li>
  80  *    </ul>
  81  *
  82  * \version $Id$
  83  *
  84  * \author Giovanni Bajo <rasky@develer.com>
  85  */
  86
  87 /*#*
  88  *#* $Log$
  89  *#* Revision 1.8  2007/02/06 16:05:01  asterix
  90  *#* Replaced ROTATE_* with ROT* defined in macros.h
  91  *#*
  92  *#* Revision 1.7  2006/07/19 12:56:27  bernie
  93  *#* Convert to new Doxygen style.
  94  *#*
  95  *#* Revision 1.6  2006/06/01 12:27:39  marco
  96  *#* Added utilities for protocols
  97  *#*
  98  *#*/
  99
 100 #include "hashtable.h"
 101 #include <cfg/debug.h>
 102 #include <cfg/compiler.h>
 103 #include <cfg/macros.h> //ROTL(), ROTR();
 104
 105 #include <string.h>
 106
 107
 108
 109 typedef const void** HashNodePtr;
 110 #define NODE_EMPTY(node)               (!*(node))
 111 #define HT_HAS_INTERNAL_KEY(ht)        (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal)
 112
 113 /** For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */
 114 INLINE uint8_t *key_internal_get_ptr(struct HashTable *ht, HashNodePtr node)
 115 {
 116         uint8_t* key_buf = ht->key_data.mem;
 117         size_t index;
 118
 119         // Compute the index of the node and use it to move within the whole key buffer
 120         index = node - &ht->mem[0];
 121         ASSERT(index < (size_t)(1 << ht->max_elts_log2));
 122         key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1);
 123
 124         return key_buf;
 125 }
 126
 127
 128 INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length)
 129 {
 130         if (HT_HAS_INTERNAL_KEY(ht))
 131         {
 132                 uint8_t* k = key_internal_get_ptr(ht, node);
 133
 134                 // Key has its length stored in the first byte
 135                 *key_length = *k++;
 136                 *key = k;
 137         }
 138         else
 139                 *key = ht->key_data.hook(*node, key_length);
 140 }
 141
 142
 143 INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length)
 144 {
 145         const void* key2;
 146         uint8_t key2_length;
 147
 148         node_get_key(ht, node, &key2, &key2_length);
 149
 150         return (key_length == key2_length && memcmp(key, key2, key_length) == 0);
 151 }
 152
 153
 154 static uint16_t calc_hash(const void* _key, uint8_t key_length)
 155 {
 156         const char* key = (const char*)_key;
 157         uint16_t hash = key_length;
 158         int i;
 159         int len = (int)key_length;
 160
 161         for (i = 0; i < len; ++i)
 162                 hash = ROTL(hash, 4) ^ key[i];
 163
 164         return hash ^ (hash >> 6) ^ (hash >> 13);
 165 }
 166
 167
 168 static HashNodePtr perform_lookup(struct HashTable* ht,
 169                                   const void* key, uint8_t key_length)
 170 {
 171         uint16_t hash = calc_hash(key, key_length);
 172         uint16_t mask = ((1 << ht->max_elts_log2) - 1);
 173         uint16_t index = hash & mask;
 174         uint16_t first_index = index;
 175         uint16_t step;
 176         HashNodePtr node;
 177
 178         // Fast-path optimization: we check immediately if the current node
 179         //  is the one we were looking for, so we save the computation of the
 180         //  increment step in the common case.
 181         node = &ht->mem[index];
 182         if (NODE_EMPTY(node)
 183                 || node_key_match(ht, node, key, key_length))
 184                 return node;
 185
 186         // Increment while going through the hash table in case of collision.
 187         //  This implements the double-hash technique: we use the higher part
 188         //  of the hash as a step increment instead of just going to the next
 189         //  element, to minimize the collisions.
 190         // Notice that the number must be odd to be sure that the whole table
 191         //  is traversed. Actually MCD(table_size, step) must be 1, but
 192         //  table_size is always a power of 2, so we just ensure that step is
 193         //  never a multiple of 2.
 194         step = (ROTR(hash, ht->max_elts_log2) & mask) | 1;
 195
 196         do
 197         {
 198                 index += step;
 199                 index &= mask;
 200
 201                 node = &ht->mem[index];
 202                 if (NODE_EMPTY(node)
 203                         || node_key_match(ht, node, key, key_length))
 204                         return node;
 205
 206                 // The check is done after the key compare. This actually causes
 207                 //  one more compare in the case the table is full (since the first
 208                 //  element was compared at the very start, and then at the end),
 209                 //  but it makes faster the common path where we enter this loop
 210                 //  for the first time, and index will not match first_index for
 211                 //  sure.
 212         } while (index != first_index);
 213
 214         return NULL;
 215 }
 216
 217
 218 void ht_init(struct HashTable* ht)
 219 {
 220         memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2));
 221 }
 222
 223
 224 static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
 225 {
 226         HashNodePtr node;
 227
 228         if (!data)
 229                 return false;
 230
 231         if (HT_HAS_INTERNAL_KEY(ht))
 232                 key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH);
 233
 234         node = perform_lookup(ht, key, key_length);
 235         if (!node)
 236                 return false;
 237
 238         if (HT_HAS_INTERNAL_KEY(ht))
 239         {
 240                 uint8_t* k = key_internal_get_ptr(ht, node);
 241                 *k++ = key_length;
 242                 memcpy(k, key, key_length);
 243         }
 244
 245         *node = data;
 246         return true;
 247 }
 248
 249
 250 bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
 251 {
 252 #ifdef _DEBUG
 253         if (!HT_HAS_INTERNAL_KEY(ht))
 254         {
 255                 // Construct a fake node and use it to match the key
 256                 HashNodePtr node = &data;
 257                 if (!node_key_match(ht, node, key, key_length))
 258                 {
 259                         ASSERT2(0, "parameter key is different from the external key");
 260                         return false;
 261                 }
 262         }
 263 #endif
 264
 265         return insert(ht, key, key_length, data);
 266 }
 267
 268
 269 bool ht_insert(struct HashTable* ht, const void* data)
 270 {
 271         const void* key;
 272         uint8_t key_length;
 273
 274 #ifdef _DEBUG
 275         if (HT_HAS_INTERNAL_KEY(ht))
 276         {
 277                 ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()"
 278                        && 0);
 279                 return false;
 280         }
 281 #endif
 282
 283         key = ht->key_data.hook(data, &key_length);
 284
 285         return insert(ht, key, key_length, data);
 286 }
 287
 288
 289 const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length)
 290 {
 291         HashNodePtr node;
 292
 293         if (HT_HAS_INTERNAL_KEY(ht))
 294                 key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH);
 295
 296         node = perform_lookup(ht, key, key_length);
 297
 298         if (!node || NODE_EMPTY(node))
 299                 return NULL;
 300
 301         return *node;
 302 }
 303
 304
 305 #if 0
 306
 307 #include <stdlib.h>
 308
 309 bool ht_test(void);
 310
 311 static const void* test_get_key(const void* ptr, uint8_t* length)
 312 {
 313         const char* s = ptr;
 314         *length = strlen(s);
 315         return s;
 316 }
 317
 318 #define NUM_ELEMENTS   256
 319 DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key);
 320 DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256);
 321
 322 static char data[NUM_ELEMENTS][10];
 323 static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 324
 325 static bool single_test(void)
 326 {
 327         int i;
 328
 329         ht_init(&test1);
 330         ht_init(&test2);
 331
 332         for (i=0;i<NUM_ELEMENTS;i++)
 333         {
 334                 int k;
 335                 int klen;
 336
 337                 do
 338                 {
 339                         klen = (rand() % 8) + 1;
 340                         for (k=0;k<klen;k++)
 341                                 data[i][k] = keydomain[rand() % (sizeof(keydomain)-1)];
 342                         data[i][k]=0;
 343                 } while (ht_find_str(&test1, data[i]) != NULL);
 344
 345                 ASSERT(ht_insert(&test1, data[i]));
 346                 ASSERT(ht_insert_str(&test2, data[i], data[i]));
 347         }
 348
 349         for (i=0;i<NUM_ELEMENTS;i++)
 350         {
 351                 char *found1, *found2;
 352
 353                 found1 = (char*)ht_find_str(&test1, data[i]);
 354                 if (strcmp(found1, data[i]) != 0)
 355                 {
 356                         ASSERT(strcmp(found1,data[i]) == 0);
 357                         return false;
 358                 }
 359
 360                 found2 = (char*)ht_find_str(&test2, data[i]);
 361                 if (strcmp(found2, data[i]) != 0)
 362                 {
 363                         ASSERT(strcmp(found2,data[i]) == 0);
 364                         return false;
 365                 }
 366         }
 367
 368         return true;
 369 }
 370
 371 static uint16_t rand_seeds[] = { 1, 42, 666, 0xDEAD, 0xBEEF, 0x1337, 0xB00B };
 372
 373 bool ht_test(void)
 374 {
 375         int i;
 376
 377         for (i=0;i<countof(rand_seeds);++i)
 378         {
 379                 srand(rand_seeds[i]);
 380                 if (!single_test())
 381                 {
 382                         kprintf("ht_test failed\n");
 383                         return false;
 384                 }
 385         }
 386
 387         kprintf("ht_test successful\n");
 388         return true;
 389 }
 390
 391 #endif