bertos/struct/hashtable.c

   1 /**
   2  * \file
   3  * <!--
   4  * This file is part of BeRTOS.
   5  *
   6  * Bertos is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19  *
  20  * As a special exception, you may use this file as part of a free software
  21  * library without restriction.  Specifically, if other files instantiate
  22  * templates or use macros or inline functions from this file, or you compile
  23  * this file and link it with other files to produce an executable, this
  24  * file does not by itself cause the resulting executable to be covered by
  25  * the GNU General Public License.  This exception does not however
  26  * invalidate any other reasons why the executable file might be covered by
  27  * the GNU General Public License.
  28  *
  29  * Copyright 2004, 2008 Develer S.r.l. (http://www.develer.com/)
  30  * Copyright 2004 Giovanni Bajo
  31  * -->
  32  *
  33  * \brief Portable hash table implementation
  34  *
  35  * Some rationales of our choices in implementation:
  36  *
  37  * \li For embedded systems, it is vital to allocate the table in static memory. To do
  38  * so, it is necessary to expose the \c HashNode and \c HashTable structures in the header file.
  39  * Nevertheless, they should be used as opaque types (that is, the users should not
  40  * access the structure fields directly).
  41  *
  42  * \li To statically allocate the structures, a macro is provided. With this macro, we
  43  * are hiding completely \c HashNode to the user (who only manipulates \c HashTable). Without
  44  * the macro, the user would have had to define both the \c HashNode and the \c HashTable
  45  * manually, and pass both of them to \c ht_init() (which would have created the link between
  46  * the two). Instead, the link is created with a literal initialization.
  47  *
  48  * \li The hash table is created as power of two to remove the divisions from the code.
  49  * Of course, hash functions work at their best when the table size is a prime number.
  50  * When calculating the modulus to convert the hash value to an index, the actual operation
  51  * becomes a bitwise AND: this is fast, but truncates the value losing bits. Thus, the higher
  52  * bits are first "merged" with the lower bits through some XOR operations (see the last line of
  53  * \c calc_hash()).
  54  *
  55  * \li To minimize the memory occupation, there is no flag to set for the empty node. An
  56  * empty node is recognized by its data pointer set to NULL. It is then invalid to store
  57  * NULL as data pointer in the table.
  58  *
  59  * \li The visiting interface through iterators is implemented with pass-by-value semantic.
  60  * While this is overkill for medium-to-stupid compilers, it is the best designed from an
  61  * user point of view. Moreover, being totally inlined (defined completely in the header),
  62  * even a stupid compiler should be able to perform basic optimizations on it.
  63  * We thought about using a pass-by-pointer semantic but it was much more awful to use, and
  64  * the compiler is then forced to spill everything to the stack (unless it is *very* smart).
  65  *
  66  * \li The current implementation allows to either store the key internally (that is, copy
  67  * the key within the hash table) or keep it external (that is, a hook is used to extract
  68  * the key from the data in the node). The former is more memory-hungry of course, as it
  69  * allocated static space to store the key copies. The overhead to keep both methods at
  70  * the same time is minimal:
  71  *    <ul>
  72  *    <li>There is a run-time check in node_get_key which is execute per each node visited.</li>
  73  *    <li>Theoretically, there is no memory overhead. In practice, there were no
  74  *        flags in \c struct HashTable till now, so we had to add a first bit flag, but the
  75  *        overhead will disappear if a second flag is added for a different reason later.</li>
  76  *    <li>There is a little interface overhead, since we have two different versions of
  77  *        \c ht_insert(), one with the key passed as parameter and one without, but in
  78  *        the common case (external keys) both can be used.</li>
  79  *    </ul>
  80  *
  81  * \version $Id$
  82  * \author Giovanni Bajo <rasky@develer.com>
  83  */
  84
  85 #include "hashtable.h"
  86 #include <cfg/debug.h>
  87 #include <cfg/compiler.h>
  88 #include <cfg/macros.h> //ROTL(), ROTR();
  89
  90 #include <string.h>
  91
  92
  93 typedef const void** HashNodePtr;
  94 #define NODE_EMPTY(node)               (!*(node))
  95 #define HT_HAS_INTERNAL_KEY(ht)        (CONFIG_HT_OPTIONAL_INTERNAL_KEY && ht->flags.key_internal)
  96
  97 /** For hash tables with internal keys, compute the pointer to the internal key for a given \a node. */
  98 INLINE uint8_t *key_internal_get_ptr(struct HashTable *ht, HashNodePtr node)
  99 {
 100         uint8_t* key_buf = ht->key_data.mem;
 101         size_t index;
 102
 103         // Compute the index of the node and use it to move within the whole key buffer
 104         index = node - &ht->mem[0];
 105         ASSERT(index < (size_t)(1 << ht->max_elts_log2));
 106         key_buf += index * (INTERNAL_KEY_MAX_LENGTH + 1);
 107
 108         return key_buf;
 109 }
 110
 111
 112 INLINE void node_get_key(struct HashTable* ht, HashNodePtr node, const void** key, uint8_t* key_length)
 113 {
 114         if (HT_HAS_INTERNAL_KEY(ht))
 115         {
 116                 uint8_t* k = key_internal_get_ptr(ht, node);
 117
 118                 // Key has its length stored in the first byte
 119                 *key_length = *k++;
 120                 *key = k;
 121         }
 122         else
 123                 *key = ht->key_data.hook(*node, key_length);
 124 }
 125
 126
 127 INLINE bool node_key_match(struct HashTable* ht, HashNodePtr node, const void* key, uint8_t key_length)
 128 {
 129         const void* key2;
 130         uint8_t key2_length;
 131
 132         node_get_key(ht, node, &key2, &key2_length);
 133
 134         return (key_length == key2_length && memcmp(key, key2, key_length) == 0);
 135 }
 136
 137
 138 static uint16_t calc_hash(const void* _key, uint8_t key_length)
 139 {
 140         const char* key = (const char*)_key;
 141         uint16_t hash = key_length;
 142         int i;
 143         int len = (int)key_length;
 144
 145         for (i = 0; i < len; ++i)
 146                 hash = ROTL(hash, 4) ^ key[i];
 147
 148         return hash ^ (hash >> 6) ^ (hash >> 13);
 149 }
 150
 151
 152 static HashNodePtr perform_lookup(struct HashTable* ht,
 153                                   const void* key, uint8_t key_length)
 154 {
 155         uint16_t hash = calc_hash(key, key_length);
 156         uint16_t mask = ((1 << ht->max_elts_log2) - 1);
 157         uint16_t index = hash & mask;
 158         uint16_t first_index = index;
 159         uint16_t step;
 160         HashNodePtr node;
 161
 162         // Fast-path optimization: we check immediately if the current node
 163         //  is the one we were looking for, so we save the computation of the
 164         //  increment step in the common case.
 165         node = &ht->mem[index];
 166         if (NODE_EMPTY(node)
 167                 || node_key_match(ht, node, key, key_length))
 168                 return node;
 169
 170         // Increment while going through the hash table in case of collision.
 171         //  This implements the double-hash technique: we use the higher part
 172         //  of the hash as a step increment instead of just going to the next
 173         //  element, to minimize the collisions.
 174         // Notice that the number must be odd to be sure that the whole table
 175         //  is traversed. Actually MCD(table_size, step) must be 1, but
 176         //  table_size is always a power of 2, so we just ensure that step is
 177         //  never a multiple of 2.
 178         step = (ROTR(hash, ht->max_elts_log2) & mask) | 1;
 179
 180         do
 181         {
 182                 index += step;
 183                 index &= mask;
 184
 185                 node = &ht->mem[index];
 186                 if (NODE_EMPTY(node)
 187                         || node_key_match(ht, node, key, key_length))
 188                         return node;
 189
 190                 // The check is done after the key compare. This actually causes
 191                 //  one more compare in the case the table is full (since the first
 192                 //  element was compared at the very start, and then at the end),
 193                 //  but it makes faster the common path where we enter this loop
 194                 //  for the first time, and index will not match first_index for
 195                 //  sure.
 196         } while (index != first_index);
 197
 198         return NULL;
 199 }
 200
 201
 202 void ht_init(struct HashTable* ht)
 203 {
 204         memset(ht->mem, 0, sizeof(ht->mem[0]) * (1 << ht->max_elts_log2));
 205 }
 206
 207
 208 static bool insert(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
 209 {
 210         HashNodePtr node;
 211
 212         if (!data)
 213                 return false;
 214
 215         if (HT_HAS_INTERNAL_KEY(ht))
 216                 key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH);
 217
 218         node = perform_lookup(ht, key, key_length);
 219         if (!node)
 220                 return false;
 221
 222         if (HT_HAS_INTERNAL_KEY(ht))
 223         {
 224                 uint8_t* k = key_internal_get_ptr(ht, node);
 225                 *k++ = key_length;
 226                 memcpy(k, key, key_length);
 227         }
 228
 229         *node = data;
 230         return true;
 231 }
 232
 233
 234 bool ht_insert_with_key(struct HashTable* ht, const void* key, uint8_t key_length, const void* data)
 235 {
 236 #ifdef _DEBUG
 237         if (!HT_HAS_INTERNAL_KEY(ht))
 238         {
 239                 // Construct a fake node and use it to match the key
 240                 HashNodePtr node = &data;
 241                 if (!node_key_match(ht, node, key, key_length))
 242                 {
 243                         ASSERT2(0, "parameter key is different from the external key");
 244                         return false;
 245                 }
 246         }
 247 #endif
 248
 249         return insert(ht, key, key_length, data);
 250 }
 251
 252
 253 bool ht_insert(struct HashTable* ht, const void* data)
 254 {
 255         const void* key;
 256         uint8_t key_length;
 257
 258 #ifdef _DEBUG
 259         if (HT_HAS_INTERNAL_KEY(ht))
 260         {
 261                 ASSERT("parameter cannot be a hash table with internal keys - use ht_insert_with_key()"
 262                        && 0);
 263                 return false;
 264         }
 265 #endif
 266
 267         key = ht->key_data.hook(data, &key_length);
 268
 269         return insert(ht, key, key_length, data);
 270 }
 271
 272
 273 const void* ht_find(struct HashTable* ht, const void* key, uint8_t key_length)
 274 {
 275         HashNodePtr node;
 276
 277         if (HT_HAS_INTERNAL_KEY(ht))
 278                 key_length = MIN(key_length, (uint8_t)INTERNAL_KEY_MAX_LENGTH);
 279
 280         node = perform_lookup(ht, key, key_length);
 281
 282         if (!node || NODE_EMPTY(node))
 283                 return NULL;
 284
 285         return *node;
 286 }
 287
 288
 289 #if 0
 290
 291 #include <stdlib.h>
 292
 293 bool ht_test(void);
 294
 295 static const void* test_get_key(const void* ptr, uint8_t* length)
 296 {
 297         const char* s = ptr;
 298         *length = strlen(s);
 299         return s;
 300 }
 301
 302 #define NUM_ELEMENTS   256
 303 DECLARE_HASHTABLE_STATIC(test1, 256, test_get_key);
 304 DECLARE_HASHTABLE_INTERNALKEY_STATIC(test2, 256);
 305
 306 static char data[NUM_ELEMENTS][10];
 307 static char keydomain[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 308
 309 static bool single_test(void)
 310 {
 311         int i;
 312
 313         ht_init(&test1);
 314         ht_init(&test2);
 315
 316         for (i=0;i<NUM_ELEMENTS;i++)
 317         {
 318                 int k;
 319                 int klen;
 320
 321                 do
 322                 {
 323                         klen = (rand() % 8) + 1;
 324                         for (k=0;k<klen;k++)
 325                                 data[i][k] = keydomain[rand() % (sizeof(keydomain)-1)];
 326                         data[i][k]=0;
 327                 } while (ht_find_str(&test1, data[i]) != NULL);
 328
 329                 ASSERT(ht_insert(&test1, data[i]));
 330                 ASSERT(ht_insert_str(&test2, data[i], data[i]));
 331         }
 332
 333         for (i=0;i<NUM_ELEMENTS;i++)
 334         {
 335                 char *found1, *found2;
 336
 337                 found1 = (char*)ht_find_str(&test1, data[i]);
 338                 if (strcmp(found1, data[i]) != 0)
 339                 {
 340                         ASSERT(strcmp(found1,data[i]) == 0);
 341                         return false;
 342                 }
 343
 344                 found2 = (char*)ht_find_str(&test2, data[i]);
 345                 if (strcmp(found2, data[i]) != 0)
 346                 {
 347                         ASSERT(strcmp(found2,data[i]) == 0);
 348                         return false;
 349                 }
 350         }
 351
 352         return true;
 353 }
 354
 355 static uint16_t rand_seeds[] = { 1, 42, 666, 0xDEAD, 0xBEEF, 0x1337, 0xB00B };
 356
 357 bool ht_test(void)
 358 {
 359         int i;
 360
 361         for (i=0;i<countof(rand_seeds);++i)
 362         {
 363                 srand(rand_seeds[i]);
 364                 if (!single_test())
 365                 {
 366                         kprintf("ht_test failed\n");
 367                         return false;
 368                 }
 369         }
 370
 371         kprintf("ht_test successful\n");
 372         return true;
 373 }
 374
 375 #endif