compound_dictionary.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /* Copyright 2017 Google Inc. All Rights Reserved.
  2. Distributed under MIT license.
  3. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  4. */
  5. #include "compound_dictionary.h"
  6. #include <brotli/types.h>
  7. #include "../common/platform.h"
  8. #include "memory.h"
  9. #include "quality.h"
  10. static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
  11. const uint8_t* source, size_t source_size, uint32_t bucket_bits,
  12. uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
  13. /* Step 1: create "bloated" hasher. */
  14. uint32_t num_slots = 1u << slot_bits;
  15. uint32_t num_buckets = 1u << bucket_bits;
  16. uint32_t hash_shift = 64u - bucket_bits;
  17. uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
  18. uint32_t slot_mask = num_slots - 1;
  19. size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
  20. (sizeof(uint32_t) << slot_bits) +
  21. (sizeof(uint16_t) << bucket_bits) +
  22. (sizeof(uint32_t) << bucket_bits) +
  23. (sizeof(uint32_t) * source_size);
  24. uint8_t* flat = NULL;
  25. PreparedDictionary* result = NULL;
  26. uint16_t* num = NULL;
  27. uint32_t* bucket_heads = NULL;
  28. uint32_t* next_bucket = NULL;
  29. uint32_t* slot_offsets = NULL;
  30. uint16_t* heads = NULL;
  31. uint32_t* items = NULL;
  32. uint8_t** source_ref = NULL;
  33. uint32_t i;
  34. uint32_t* slot_size = NULL;
  35. uint32_t* slot_limit = NULL;
  36. uint32_t total_items = 0;
  37. if (slot_bits > 16) return NULL;
  38. if (slot_bits > bucket_bits) return NULL;
  39. if (bucket_bits - slot_bits >= 16) return NULL;
  40. flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
  41. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
  42. slot_size = (uint32_t*)flat;
  43. slot_limit = (uint32_t*)(&slot_size[num_slots]);
  44. num = (uint16_t*)(&slot_limit[num_slots]);
  45. bucket_heads = (uint32_t*)(&num[num_buckets]);
  46. next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
  47. memset(num, 0, num_buckets * sizeof(num[0]));
  48. /* TODO(eustas): apply custom "store" order. */
  49. for (i = 0; i + 7 < source_size; ++i) {
  50. const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
  51. kPreparedDictionaryHashMul64Long;
  52. const uint32_t key = (uint32_t)(h >> hash_shift);
  53. uint16_t count = num[key];
  54. next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
  55. bucket_heads[key] = i;
  56. count++;
  57. if (count > bucket_limit) count = bucket_limit;
  58. num[key] = count;
  59. }
  60. /* Step 2: find slot limits. */
  61. for (i = 0; i < num_slots; ++i) {
  62. BROTLI_BOOL overflow = BROTLI_FALSE;
  63. slot_limit[i] = bucket_limit;
  64. while (BROTLI_TRUE) {
  65. uint32_t limit = slot_limit[i];
  66. size_t j;
  67. uint32_t count = 0;
  68. overflow = BROTLI_FALSE;
  69. for (j = i; j < num_buckets; j += num_slots) {
  70. uint32_t size = num[j];
  71. /* Last chain may span behind 64K limit; overflow happens only if
  72. we are about to use 0xFFFF+ as item offset. */
  73. if (count >= 0xFFFF) {
  74. overflow = BROTLI_TRUE;
  75. break;
  76. }
  77. if (size > limit) size = limit;
  78. count += size;
  79. }
  80. if (!overflow) {
  81. slot_size[i] = count;
  82. total_items += count;
  83. break;
  84. }
  85. slot_limit[i]--;
  86. }
  87. }
  88. /* Step 3: transfer data to "slim" hasher. */
  89. alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
  90. (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
  91. sizeof(uint8_t*);
  92. result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
  93. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
  94. BROTLI_FREE(m, flat);
  95. return NULL;
  96. }
  97. slot_offsets = (uint32_t*)(&result[1]);
  98. heads = (uint16_t*)(&slot_offsets[num_slots]);
  99. items = (uint32_t*)(&heads[num_buckets]);
  100. source_ref = (uint8_t**)(&items[total_items]);
  101. result->magic = kLeanPreparedDictionaryMagic;
  102. result->num_items = total_items;
  103. result->source_size = (uint32_t)source_size;
  104. result->hash_bits = hash_bits;
  105. result->bucket_bits = bucket_bits;
  106. result->slot_bits = slot_bits;
  107. BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
  108. total_items = 0;
  109. for (i = 0; i < num_slots; ++i) {
  110. slot_offsets[i] = total_items;
  111. total_items += slot_size[i];
  112. slot_size[i] = 0;
  113. }
  114. for (i = 0; i < num_buckets; ++i) {
  115. uint32_t slot = i & slot_mask;
  116. uint32_t count = num[i];
  117. uint32_t pos;
  118. size_t j;
  119. size_t cursor = slot_size[slot];
  120. if (count > slot_limit[slot]) count = slot_limit[slot];
  121. if (count == 0) {
  122. heads[i] = 0xFFFF;
  123. continue;
  124. }
  125. heads[i] = (uint16_t)cursor;
  126. cursor += slot_offsets[slot];
  127. slot_size[slot] += count;
  128. pos = bucket_heads[i];
  129. for (j = 0; j < count; j++) {
  130. items[cursor++] = pos;
  131. pos = next_bucket[pos];
  132. }
  133. items[cursor - 1] |= 0x80000000;
  134. }
  135. BROTLI_FREE(m, flat);
  136. return result;
  137. }
  138. PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
  139. const uint8_t* source, size_t source_size) {
  140. uint32_t bucket_bits = 17;
  141. uint32_t slot_bits = 7;
  142. uint32_t hash_bits = 40;
  143. uint16_t bucket_limit = 32;
  144. size_t volume = 16u << bucket_bits;
  145. /* Tune parameters to fit dictionary size. */
  146. while (volume < source_size && bucket_bits < 22) {
  147. bucket_bits++;
  148. slot_bits++;
  149. volume <<= 1;
  150. }
  151. return CreatePreparedDictionaryWithParams(m,
  152. source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
  153. }
  154. void DestroyPreparedDictionary(MemoryManager* m,
  155. PreparedDictionary* dictionary) {
  156. if (!dictionary) return;
  157. BROTLI_FREE(m, dictionary);
  158. }
  159. BROTLI_BOOL AttachPreparedDictionary(
  160. CompoundDictionary* compound, const PreparedDictionary* dictionary) {
  161. size_t length = 0;
  162. size_t index = 0;
  163. if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
  164. return BROTLI_FALSE;
  165. }
  166. if (!dictionary) return BROTLI_FALSE;
  167. length = dictionary->source_size;
  168. index = compound->num_chunks;
  169. compound->total_size += length;
  170. compound->chunks[index] = dictionary;
  171. compound->chunk_offsets[index + 1] = compound->total_size;
  172. {
  173. uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
  174. uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << dictionary->slot_bits]);
  175. uint32_t* items = (uint32_t*)(&heads[(size_t)1u << dictionary->bucket_bits]);
  176. const void* tail = (void*)&items[dictionary->num_items];
  177. if (dictionary->magic == kPreparedDictionaryMagic) {
  178. compound->chunk_source[index] = (const uint8_t*)tail;
  179. } else {
  180. /* dictionary->magic == kLeanPreparedDictionaryMagic */
  181. compound->chunk_source[index] =
  182. (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
  183. }
  184. }
  185. compound->num_chunks++;
  186. return BROTLI_TRUE;
  187. }