hash_rolling_inc.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /* NOLINT(build/header_guard) */
  2. /* Copyright 2018 Google Inc. All Rights Reserved.
  3. Distributed under MIT license.
  4. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  5. */
  6. /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
  7. /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
  8. /* JUMP = skip bytes for speedup */
  9. /* Rolling hash for long distance long string matches. Stores one position
  10. per bucket, bucket key is computed over a long region. */
  11. #define HashRolling HASHER()
  12. static const uint32_t FN(kRollingHashMul32) = 69069;
  13. static const uint32_t FN(kInvalidPos) = 0xffffffff;
  14. /* This hasher uses a longer forward length, but returning a higher value here
  15. will hurt compression by the main hasher when combined with a composite
  16. hasher. The hasher tests for forward itself instead. */
  17. static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
  18. static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
  19. /* Computes a code from a single byte. A lookup table of 256 values could be
  20. used, but simply adding 1 works about as good. */
  21. static uint32_t FN(HashByte)(uint8_t byte) {
  22. return (uint32_t)byte + 1u;
  23. }
  24. static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
  25. uint32_t factor) {
  26. return (uint32_t)(factor * state + FN(HashByte)(add));
  27. }
  28. static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
  29. uint8_t rem, uint32_t factor,
  30. uint32_t factor_remove) {
  31. return (uint32_t)(factor * state +
  32. FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
  33. }
  34. typedef struct HashRolling {
  35. uint32_t state;
  36. uint32_t* table;
  37. size_t next_ix;
  38. uint32_t chunk_len;
  39. uint32_t factor;
  40. uint32_t factor_remove;
  41. } HashRolling;
  42. static void FN(Initialize)(
  43. HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
  44. const BrotliEncoderParams* params) {
  45. size_t i;
  46. self->state = 0;
  47. self->next_ix = 0;
  48. self->factor = FN(kRollingHashMul32);
  49. /* Compute the factor of the oldest byte to remove: factor**steps modulo
  50. 0xffffffff (the multiplications rely on 32-bit overflow) */
  51. self->factor_remove = 1;
  52. for (i = 0; i < CHUNKLEN; i += JUMP) {
  53. self->factor_remove *= self->factor;
  54. }
  55. self->table = (uint32_t*)common->extra[0];
  56. for (i = 0; i < NUMBUCKETS; i++) {
  57. self->table[i] = FN(kInvalidPos);
  58. }
  59. BROTLI_UNUSED(params);
  60. }
  61. static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
  62. size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
  63. size_t i;
  64. /* Too small size, cannot use this hasher. */
  65. if (input_size < CHUNKLEN) return;
  66. self->state = 0;
  67. for (i = 0; i < CHUNKLEN; i += JUMP) {
  68. self->state = FN(HashRollingFunctionInitial)(
  69. self->state, data[i], self->factor);
  70. }
  71. BROTLI_UNUSED(one_shot);
  72. }
  73. static BROTLI_INLINE void FN(HashMemAllocInBytes)(
  74. const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
  75. size_t input_size, size_t* alloc_size) {
  76. BROTLI_UNUSED(params);
  77. BROTLI_UNUSED(one_shot);
  78. BROTLI_UNUSED(input_size);
  79. alloc_size[0] = NUMBUCKETS * sizeof(uint32_t);
  80. }
  81. static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
  82. const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
  83. BROTLI_UNUSED(self);
  84. BROTLI_UNUSED(data);
  85. BROTLI_UNUSED(mask);
  86. BROTLI_UNUSED(ix);
  87. }
  88. static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
  89. const uint8_t* BROTLI_RESTRICT data, const size_t mask,
  90. const size_t ix_start, const size_t ix_end) {
  91. BROTLI_UNUSED(self);
  92. BROTLI_UNUSED(data);
  93. BROTLI_UNUSED(mask);
  94. BROTLI_UNUSED(ix_start);
  95. BROTLI_UNUSED(ix_end);
  96. }
  97. static BROTLI_INLINE void FN(StitchToPreviousBlock)(
  98. HashRolling* BROTLI_RESTRICT self,
  99. size_t num_bytes, size_t position, const uint8_t* ringbuffer,
  100. size_t ring_buffer_mask) {
  101. /* In this case we must re-initialize the hasher from scratch from the
  102. current position. */
  103. size_t position_masked;
  104. size_t available = num_bytes;
  105. if ((position & (JUMP - 1)) != 0) {
  106. size_t diff = JUMP - (position & (JUMP - 1));
  107. available = (diff > available) ? 0 : (available - diff);
  108. position += diff;
  109. }
  110. position_masked = position & ring_buffer_mask;
  111. /* wrapping around ringbuffer not handled. */
  112. if (available > ring_buffer_mask - position_masked) {
  113. available = ring_buffer_mask - position_masked;
  114. }
  115. FN(Prepare)(self, BROTLI_FALSE, available,
  116. ringbuffer + (position & ring_buffer_mask));
  117. self->next_ix = position;
  118. BROTLI_UNUSED(num_bytes);
  119. }
  120. static BROTLI_INLINE void FN(PrepareDistanceCache)(
  121. HashRolling* BROTLI_RESTRICT self,
  122. int* BROTLI_RESTRICT distance_cache) {
  123. BROTLI_UNUSED(self);
  124. BROTLI_UNUSED(distance_cache);
  125. }
  126. static BROTLI_INLINE void FN(FindLongestMatch)(
  127. HashRolling* BROTLI_RESTRICT self,
  128. const BrotliEncoderDictionary* dictionary,
  129. const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
  130. const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
  131. const size_t max_length, const size_t max_backward,
  132. const size_t dictionary_distance, const size_t max_distance,
  133. HasherSearchResult* BROTLI_RESTRICT out) {
  134. const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
  135. size_t pos;
  136. if ((cur_ix & (JUMP - 1)) != 0) return;
  137. /* Not enough lookahead */
  138. if (max_length < CHUNKLEN) return;
  139. for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
  140. uint32_t code = self->state & MASK;
  141. uint8_t rem = data[pos & ring_buffer_mask];
  142. uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
  143. size_t found_ix = FN(kInvalidPos);
  144. self->state = FN(HashRollingFunction)(
  145. self->state, add, rem, self->factor, self->factor_remove);
  146. if (code < NUMBUCKETS) {
  147. found_ix = self->table[code];
  148. self->table[code] = (uint32_t)pos;
  149. if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
  150. /* The cast to 32-bit makes backward distances up to 4GB work even
  151. if cur_ix is above 4GB, despite using 32-bit values in the table. */
  152. size_t backward = (uint32_t)(cur_ix - found_ix);
  153. if (backward <= max_backward) {
  154. const size_t found_ix_masked = found_ix & ring_buffer_mask;
  155. const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
  156. &data[cur_ix_masked],
  157. max_length);
  158. if (len >= 4 && len > out->len) {
  159. score_t score = BackwardReferenceScore(len, backward);
  160. if (score > out->score) {
  161. out->len = len;
  162. out->distance = backward;
  163. out->score = score;
  164. out->len_code_delta = 0;
  165. }
  166. }
  167. }
  168. }
  169. }
  170. }
  171. self->next_ix = cur_ix + JUMP;
  172. /* NOTE: this hasher does not search in the dictionary. It is used as
  173. backup-hasher, the main hasher already searches in it. */
  174. BROTLI_UNUSED(dictionary);
  175. BROTLI_UNUSED(distance_cache);
  176. BROTLI_UNUSED(dictionary_distance);
  177. BROTLI_UNUSED(max_distance);
  178. }
  179. #undef HashRolling