hash.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <assert.h>
  25. /*
  26. Simple hashtable with open addressing linear probe.
  27. Unlike text book examples, removing entries works
  28. correctly in this implementation, so it won't start
  29. exhibiting bad behaviour if entries are inserted
  30. and removed frequently.
  31. */
  32. typedef struct
  33. {
  34. unsigned char key[FZ_HASH_TABLE_KEY_LENGTH];
  35. void *val;
  36. } fz_hash_entry;
  37. struct fz_hash_table
  38. {
  39. int keylen;
  40. int size;
  41. int load;
  42. int lock; /* -1 or the lock used to protect this hash table */
  43. fz_hash_table_drop_fn *drop_val;
  44. fz_hash_entry *ents;
  45. };
  46. static unsigned hash(const unsigned char *s, int len)
  47. {
  48. unsigned val = 0;
  49. int i;
  50. for (i = 0; i < len; i++)
  51. {
  52. val += s[i];
  53. val += (val << 10);
  54. val ^= (val >> 6);
  55. }
  56. val += (val << 3);
  57. val ^= (val >> 11);
  58. val += (val << 15);
  59. return val;
  60. }
  61. fz_hash_table *
  62. fz_new_hash_table(fz_context *ctx, int initialsize, int keylen, int lock, fz_hash_table_drop_fn *drop_val)
  63. {
  64. fz_hash_table *table;
  65. if (keylen > FZ_HASH_TABLE_KEY_LENGTH)
  66. fz_throw(ctx, FZ_ERROR_ARGUMENT, "hash table key length too large");
  67. table = fz_malloc_struct(ctx, fz_hash_table);
  68. table->keylen = keylen;
  69. table->size = initialsize;
  70. table->load = 0;
  71. table->lock = lock;
  72. table->drop_val = drop_val;
  73. fz_try(ctx)
  74. {
  75. table->ents = Memento_label(fz_malloc_array(ctx, table->size, fz_hash_entry), "hash_entries");
  76. memset(table->ents, 0, sizeof(fz_hash_entry) * table->size);
  77. }
  78. fz_catch(ctx)
  79. {
  80. fz_free(ctx, table);
  81. fz_rethrow(ctx);
  82. }
  83. return table;
  84. }
  85. void
  86. fz_drop_hash_table(fz_context *ctx, fz_hash_table *table)
  87. {
  88. if (!table)
  89. return;
  90. if (table->drop_val)
  91. {
  92. int i, n = table->size;
  93. for (i = 0; i < n; ++i)
  94. {
  95. void *v = table->ents[i].val;
  96. if (v)
  97. table->drop_val(ctx, v);
  98. }
  99. }
  100. fz_free(ctx, table->ents);
  101. fz_free(ctx, table);
  102. }
  103. static void *
  104. do_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
  105. {
  106. fz_hash_entry *ents;
  107. unsigned size;
  108. unsigned pos;
  109. ents = table->ents;
  110. size = table->size;
  111. pos = hash(key, table->keylen) % size;
  112. if (table->lock >= 0)
  113. fz_assert_lock_held(ctx, table->lock);
  114. while (1)
  115. {
  116. if (!ents[pos].val)
  117. {
  118. memcpy(ents[pos].key, key, table->keylen);
  119. ents[pos].val = val;
  120. table->load ++;
  121. return NULL;
  122. }
  123. if (memcmp(key, ents[pos].key, table->keylen) == 0)
  124. {
  125. /* This is legal, but should rarely happen. */
  126. return ents[pos].val;
  127. }
  128. pos = (pos + 1) % size;
  129. }
  130. }
  131. /* Entered with the lock taken, held throughout and at exit, UNLESS the lock
  132. * is the alloc lock in which case it may be momentarily dropped. */
  133. static void
  134. fz_resize_hash(fz_context *ctx, fz_hash_table *table, int newsize)
  135. {
  136. fz_hash_entry *oldents = table->ents;
  137. fz_hash_entry *newents;
  138. int oldsize = table->size;
  139. int oldload = table->load;
  140. int i;
  141. if (newsize < oldload * 8 / 10)
  142. {
  143. fz_warn(ctx, "assert: resize hash too small");
  144. return;
  145. }
  146. if (table->lock == FZ_LOCK_ALLOC)
  147. fz_unlock(ctx, table->lock);
  148. newents = Memento_label(fz_malloc_no_throw(ctx, newsize * sizeof (fz_hash_entry)), "hash_entries");
  149. if (table->lock == FZ_LOCK_ALLOC)
  150. fz_lock(ctx, table->lock);
  151. if (table->lock >= 0)
  152. {
  153. if (table->size >= newsize)
  154. {
  155. /* Someone else fixed it before we could lock! */
  156. if (table->lock == FZ_LOCK_ALLOC)
  157. fz_unlock(ctx, table->lock);
  158. fz_free(ctx, newents);
  159. if (table->lock == FZ_LOCK_ALLOC)
  160. fz_lock(ctx, table->lock);
  161. return;
  162. }
  163. }
  164. if (newents == NULL)
  165. fz_throw(ctx, FZ_ERROR_SYSTEM, "hash table resize failed; out of memory (%d entries)", newsize);
  166. table->ents = newents;
  167. memset(table->ents, 0, sizeof(fz_hash_entry) * newsize);
  168. table->size = newsize;
  169. table->load = 0;
  170. for (i = 0; i < oldsize; i++)
  171. {
  172. if (oldents[i].val)
  173. {
  174. do_hash_insert(ctx, table, oldents[i].key, oldents[i].val);
  175. }
  176. }
  177. if (table->lock == FZ_LOCK_ALLOC)
  178. fz_unlock(ctx, table->lock);
  179. fz_free(ctx, oldents);
  180. if (table->lock == FZ_LOCK_ALLOC)
  181. fz_lock(ctx, table->lock);
  182. }
  183. void *
  184. fz_hash_find(fz_context *ctx, fz_hash_table *table, const void *key)
  185. {
  186. fz_hash_entry *ents = table->ents;
  187. unsigned size = table->size;
  188. unsigned pos = hash(key, table->keylen) % size;
  189. if (table->lock >= 0)
  190. fz_assert_lock_held(ctx, table->lock);
  191. while (1)
  192. {
  193. if (!ents[pos].val)
  194. return NULL;
  195. if (memcmp(key, ents[pos].key, table->keylen) == 0)
  196. return ents[pos].val;
  197. pos = (pos + 1) % size;
  198. }
  199. }
  200. void *
  201. fz_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val)
  202. {
  203. if (table->load > table->size * 8 / 10)
  204. fz_resize_hash(ctx, table, table->size * 2);
  205. return do_hash_insert(ctx, table, key, val);
  206. }
  207. static void
  208. do_removal(fz_context *ctx, fz_hash_table *table, unsigned hole)
  209. {
  210. fz_hash_entry *ents = table->ents;
  211. unsigned size = table->size;
  212. unsigned look, code;
  213. if (table->lock >= 0)
  214. fz_assert_lock_held(ctx, table->lock);
  215. ents[hole].val = NULL;
  216. look = hole + 1;
  217. if (look == size)
  218. look = 0;
  219. while (ents[look].val)
  220. {
  221. code = hash(ents[look].key, table->keylen) % size;
  222. if ((code <= hole && hole < look) ||
  223. (look < code && code <= hole) ||
  224. (hole < look && look < code))
  225. {
  226. ents[hole] = ents[look];
  227. ents[look].val = NULL;
  228. hole = look;
  229. }
  230. look++;
  231. if (look == size)
  232. look = 0;
  233. }
  234. table->load --;
  235. }
  236. void
  237. fz_hash_remove(fz_context *ctx, fz_hash_table *table, const void *key)
  238. {
  239. fz_hash_entry *ents = table->ents;
  240. unsigned size = table->size;
  241. unsigned pos = hash(key, table->keylen) % size;
  242. if (table->lock >= 0)
  243. fz_assert_lock_held(ctx, table->lock);
  244. while (1)
  245. {
  246. if (!ents[pos].val)
  247. {
  248. fz_warn(ctx, "assert: remove non-existent hash entry");
  249. return;
  250. }
  251. if (memcmp(key, ents[pos].key, table->keylen) == 0)
  252. {
  253. do_removal(ctx, table, pos);
  254. return;
  255. }
  256. pos++;
  257. if (pos == size)
  258. pos = 0;
  259. }
  260. }
  261. void
  262. fz_hash_for_each(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_for_each_fn *callback)
  263. {
  264. int i;
  265. for (i = 0; i < table->size; ++i)
  266. if (table->ents[i].val)
  267. callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val);
  268. }
  269. void
  270. fz_hash_filter(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_filter_fn *callback)
  271. {
  272. int i;
  273. restart:
  274. for (i = 0; i < table->size; ++i)
  275. {
  276. if (table->ents[i].val)
  277. {
  278. if (callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val))
  279. {
  280. do_removal(ctx, table, i);
  281. goto restart; /* we may have moved some slots around, so just restart the scan */
  282. }
  283. }
  284. }
  285. }