utp_hash.cpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. /*
  2. * Copyright (c) 2010-2013 BitTorrent, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. * THE SOFTWARE.
  21. */
  22. #include "utp_hash.h"
  23. #include "utp_types.h"
  24. #define LIBUTP_HASH_UNUSED ((utp_link_t)-1)
  25. #ifdef STRICT_ALIGN
  26. inline uint32 Read32(const void *p)
  27. {
  28. uint32 tmp;
  29. memcpy(&tmp, p, sizeof tmp);
  30. return tmp;
  31. }
  32. #else
  33. inline uint32 Read32(const void *p) { return *(uint32*)p; }
  34. #endif
  35. // Get the amount of memory required for the hash parameters and the bucket set
  36. // Waste a space for an unused bucket in order to ensure the following managed memory have 32-bit aligned addresses
  37. // TODO: make this 64-bit clean
  38. #define BASE_SIZE(bc) (sizeof(utp_hash_t) + sizeof(utp_link_t) * ((bc) + 1))
  39. // Get a pointer to the base of the structure array managed by the hash table
  40. #define get_bep(h) ((byte*)(h)) + BASE_SIZE((h)->N)
  41. // Get the address of the information associated with a specific structure in the array,
  42. // given the address of the base of the structure.
  43. // This assumes a utp_link_t link member is at the end of the structure.
  44. // Given compilers filling out the memory to a 32-bit clean value, this may mean that
  45. // the location named in the structure may not be the location actually used by the hash table,
  46. // since the compiler may have padded the end of the structure with 2 bytes after the utp_link_t member.
  47. // TODO: this macro should not require that the variable pointing at the hash table be named 'hash'
  48. #define ptr_to_link(p) (utp_link_t *) (((byte *) (p)) + hash->E - sizeof(utp_link_t))
  49. // Calculate how much to allocate for a hash table with bucket count, total size, and structure count
  50. // TODO: make this 64-bit clean
  51. #define ALLOCATION_SIZE(bc, ts, sc) (BASE_SIZE((bc)) + (ts) * (sc))
  52. utp_hash_t *utp_hash_create(int N, int key_size, int total_size, int initial, utp_hash_compute_t hashfun, utp_hash_equal_t compfun)
  53. {
  54. // Must have odd number of hash buckets (prime number is best)
  55. assert(N % 2);
  56. // Ensure structures will be at aligned memory addresses
  57. // TODO: make this 64-bit clean
  58. assert(0 == (total_size % 4));
  59. int size = ALLOCATION_SIZE(N, total_size, initial);
  60. utp_hash_t *hash = (utp_hash_t *) malloc( size );
  61. memset( hash, 0, size );
  62. for (int i = 0; i < N + 1; ++i)
  63. hash->inits[i] = LIBUTP_HASH_UNUSED;
  64. hash->N = N;
  65. hash->K = key_size;
  66. hash->E = total_size;
  67. hash->hash_compute = hashfun;
  68. hash->hash_equal = compfun;
  69. hash->allocated = initial;
  70. hash->count = 0;
  71. hash->used = 0;
  72. hash->free = LIBUTP_HASH_UNUSED;
  73. return hash;
  74. }
  75. uint utp_hash_mem(const void *keyp, size_t keysize)
  76. {
  77. uint hash = 0;
  78. uint n = keysize;
  79. while (n >= 4) {
  80. hash ^= Read32(keyp);
  81. keyp = (byte*)keyp + sizeof(uint32);
  82. hash = (hash << 13) | (hash >> 19);
  83. n -= 4;
  84. }
  85. while (n != 0) {
  86. hash ^= *(byte*)keyp;
  87. keyp = (byte*)keyp + sizeof(byte);
  88. hash = (hash << 8) | (hash >> 24);
  89. n--;
  90. }
  91. return hash;
  92. }
  93. uint utp_hash_mkidx(utp_hash_t *hash, const void *keyp)
  94. {
  95. // Generate a key from the hash
  96. return hash->hash_compute(keyp, hash->K) % hash->N;
  97. }
  98. static inline bool compare(byte *a, byte *b,int n)
  99. {
  100. assert(n >= 4);
  101. if (Read32(a) != Read32(b)) return false;
  102. return memcmp(a+4, b+4, n-4) == 0;
  103. }
  104. #define COMPARE(h,k1,k2,ks) (((h)->hash_equal) ? (h)->hash_equal((void*)k1,(void*)k2,ks) : compare(k1,k2,ks))
  105. // Look-up a key in the hash table.
  106. // Returns NULL if not found
  107. void *utp_hash_lookup(utp_hash_t *hash, const void *key)
  108. {
  109. utp_link_t idx = utp_hash_mkidx(hash, key);
  110. // base pointer
  111. byte *bep = get_bep(hash);
  112. utp_link_t cur = hash->inits[idx];
  113. while (cur != LIBUTP_HASH_UNUSED) {
  114. byte *key2 = bep + (cur * hash->E);
  115. if (COMPARE(hash, (byte*)key, key2, hash->K))
  116. return key2;
  117. cur = *ptr_to_link(key2);
  118. }
  119. return NULL;
  120. }
  121. // Add a new element to the hash table.
  122. // Returns a pointer to the new element.
  123. // This assumes the element is not already present!
  124. void *utp_hash_add(utp_hash_t **hashp, const void *key)
  125. {
  126. //Allocate a new entry
  127. byte *elemp;
  128. utp_link_t elem;
  129. utp_hash_t *hash = *hashp;
  130. utp_link_t idx = utp_hash_mkidx(hash, key);
  131. if ((elem=hash->free) == LIBUTP_HASH_UNUSED) {
  132. utp_link_t all = hash->allocated;
  133. if (hash->used == all) {
  134. utp_hash_t *nhash;
  135. if (all <= (LIBUTP_HASH_UNUSED/2)) {
  136. all *= 2;
  137. } else if (all != LIBUTP_HASH_UNUSED) {
  138. all = LIBUTP_HASH_UNUSED;
  139. } else {
  140. // too many items! can't grow!
  141. assert(0);
  142. return NULL;
  143. }
  144. // otherwise need to allocate.
  145. nhash = (utp_hash_t*)realloc(hash, ALLOCATION_SIZE(hash->N, hash->E, all));
  146. if (!nhash) {
  147. // out of memory (or too big to allocate)
  148. assert(nhash);
  149. return NULL;
  150. }
  151. hash = *hashp = nhash;
  152. hash->allocated = all;
  153. }
  154. elem = hash->used++;
  155. elemp = get_bep(hash) + elem * hash->E;
  156. } else {
  157. elemp = get_bep(hash) + elem * hash->E;
  158. hash->free = *ptr_to_link(elemp);
  159. }
  160. *ptr_to_link(elemp) = hash->inits[idx];
  161. hash->inits[idx] = elem;
  162. hash->count++;
  163. // copy key into it
  164. memcpy(elemp, key, hash->K);
  165. return elemp;
  166. }
  167. // Delete an element from the utp_hash_t
  168. // Returns a pointer to the already deleted element.
  169. void *utp_hash_del(utp_hash_t *hash, const void *key)
  170. {
  171. utp_link_t idx = utp_hash_mkidx(hash, key);
  172. // base pointer
  173. byte *bep = get_bep(hash);
  174. utp_link_t *curp = &hash->inits[idx];
  175. utp_link_t cur;
  176. while ((cur=*curp) != LIBUTP_HASH_UNUSED) {
  177. byte *key2 = bep + (cur * hash->E);
  178. if (COMPARE(hash,(byte*)key,(byte*)key2, hash->K )) {
  179. // found an item that matched. unlink it
  180. *curp = *ptr_to_link(key2);
  181. // Insert into freelist
  182. *ptr_to_link(key2) = hash->free;
  183. hash->free = cur;
  184. hash->count--;
  185. return key2;
  186. }
  187. curp = ptr_to_link(key2);
  188. }
  189. return NULL;
  190. }
  191. void *utp_hash_iterate(utp_hash_t *hash, utp_hash_iterator_t *iter)
  192. {
  193. utp_link_t elem;
  194. if ((elem=iter->elem) == LIBUTP_HASH_UNUSED) {
  195. // Find a bucket with an element
  196. utp_link_t buck = iter->bucket + 1;
  197. for(;;) {
  198. if (buck >= hash->N)
  199. return NULL;
  200. if ((elem = hash->inits[buck]) != LIBUTP_HASH_UNUSED)
  201. break;
  202. buck++;
  203. }
  204. iter->bucket = buck;
  205. }
  206. byte *elemp = get_bep(hash) + (elem * hash->E);
  207. iter->elem = *ptr_to_link(elemp);
  208. return elemp;
  209. }
  210. void utp_hash_free_mem(utp_hash_t* hash)
  211. {
  212. free(hash);
  213. }