metablock_inc.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /* NOLINT(build/header_guard) */
  2. /* Copyright 2015 Google Inc. All Rights Reserved.
  3. Distributed under MIT license.
  4. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  5. */
  6. /* template parameters: FN */
  7. #define HistogramType FN(Histogram)
  8. /* Greedy block splitter for one block category (literal, command or distance).
  9. */
  10. typedef struct FN(BlockSplitter) {
  11. /* Alphabet size of particular block category. */
  12. size_t alphabet_size_;
  13. /* We collect at least this many symbols for each block. */
  14. size_t min_block_size_;
  15. /* We merge histograms A and B if
  16. entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
  17. where A is the current histogram and B is the histogram of the last or the
  18. second last block type. */
  19. double split_threshold_;
  20. size_t num_blocks_;
  21. BlockSplit* split_; /* not owned */
  22. HistogramType* histograms_; /* not owned */
  23. size_t* histograms_size_; /* not owned */
  24. /* Temporary storage for BlockSplitterFinishBlock. */
  25. HistogramType combined_histo[2];
  26. /* The number of symbols that we want to collect before deciding on whether
  27. or not to merge the block with a previous one or emit a new block. */
  28. size_t target_block_size_;
  29. /* The number of symbols in the current histogram. */
  30. size_t block_size_;
  31. /* Offset of the current histogram. */
  32. size_t curr_histogram_ix_;
  33. /* Offset of the histograms of the previous two block types. */
  34. size_t last_histogram_ix_[2];
  35. /* Entropy of the previous two block types. */
  36. double last_entropy_[2];
  37. /* The number of times we merged the current block with the last one. */
  38. size_t merge_last_count_;
  39. } FN(BlockSplitter);
  40. static void FN(InitBlockSplitter)(
  41. MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
  42. size_t min_block_size, double split_threshold, size_t num_symbols,
  43. BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
  44. size_t max_num_blocks = num_symbols / min_block_size + 1;
  45. /* We have to allocate one more histogram than the maximum number of block
  46. types for the current histogram when the meta-block is too big. */
  47. size_t max_num_types =
  48. BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
  49. self->alphabet_size_ = alphabet_size;
  50. self->min_block_size_ = min_block_size;
  51. self->split_threshold_ = split_threshold;
  52. self->num_blocks_ = 0;
  53. self->split_ = split;
  54. self->histograms_size_ = histograms_size;
  55. self->target_block_size_ = min_block_size;
  56. self->block_size_ = 0;
  57. self->curr_histogram_ix_ = 0;
  58. self->merge_last_count_ = 0;
  59. BROTLI_ENSURE_CAPACITY(m, uint8_t,
  60. split->types, split->types_alloc_size, max_num_blocks);
  61. BROTLI_ENSURE_CAPACITY(m, uint32_t,
  62. split->lengths, split->lengths_alloc_size, max_num_blocks);
  63. if (BROTLI_IS_OOM(m)) return;
  64. self->split_->num_blocks = max_num_blocks;
  65. BROTLI_DCHECK(*histograms == 0);
  66. *histograms_size = max_num_types;
  67. *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
  68. self->histograms_ = *histograms;
  69. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
  70. /* Clear only current histogram. */
  71. FN(HistogramClear)(&self->histograms_[0]);
  72. self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
  73. }
  74. /* Does either of three things:
  75. (1) emits the current block with a new block type;
  76. (2) emits the current block with the type of the second last block;
  77. (3) merges the current block with the last block. */
  78. static void FN(BlockSplitterFinishBlock)(
  79. FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
  80. BlockSplit* split = self->split_;
  81. double* last_entropy = self->last_entropy_;
  82. HistogramType* histograms = self->histograms_;
  83. self->block_size_ =
  84. BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
  85. if (self->num_blocks_ == 0) {
  86. /* Create first block. */
  87. split->lengths[0] = (uint32_t)self->block_size_;
  88. split->types[0] = 0;
  89. last_entropy[0] =
  90. BitsEntropy(histograms[0].data_, self->alphabet_size_);
  91. last_entropy[1] = last_entropy[0];
  92. ++self->num_blocks_;
  93. ++split->num_types;
  94. ++self->curr_histogram_ix_;
  95. if (self->curr_histogram_ix_ < *self->histograms_size_)
  96. FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
  97. self->block_size_ = 0;
  98. } else if (self->block_size_ > 0) {
  99. double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
  100. self->alphabet_size_);
  101. double combined_entropy[2];
  102. double diff[2];
  103. size_t j;
  104. for (j = 0; j < 2; ++j) {
  105. size_t last_histogram_ix = self->last_histogram_ix_[j];
  106. self->combined_histo[j] = histograms[self->curr_histogram_ix_];
  107. FN(HistogramAddHistogram)(&self->combined_histo[j],
  108. &histograms[last_histogram_ix]);
  109. combined_entropy[j] = BitsEntropy(
  110. &self->combined_histo[j].data_[0], self->alphabet_size_);
  111. diff[j] = combined_entropy[j] - entropy - last_entropy[j];
  112. }
  113. if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
  114. diff[0] > self->split_threshold_ &&
  115. diff[1] > self->split_threshold_) {
  116. /* Create new block. */
  117. split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
  118. split->types[self->num_blocks_] = (uint8_t)split->num_types;
  119. self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
  120. self->last_histogram_ix_[0] = (uint8_t)split->num_types;
  121. last_entropy[1] = last_entropy[0];
  122. last_entropy[0] = entropy;
  123. ++self->num_blocks_;
  124. ++split->num_types;
  125. ++self->curr_histogram_ix_;
  126. if (self->curr_histogram_ix_ < *self->histograms_size_)
  127. FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
  128. self->block_size_ = 0;
  129. self->merge_last_count_ = 0;
  130. self->target_block_size_ = self->min_block_size_;
  131. } else if (diff[1] < diff[0] - 20.0) {
  132. /* Combine this block with second last block. */
  133. split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
  134. split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
  135. BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
  136. histograms[self->last_histogram_ix_[0]] = self->combined_histo[1];
  137. last_entropy[1] = last_entropy[0];
  138. last_entropy[0] = combined_entropy[1];
  139. ++self->num_blocks_;
  140. self->block_size_ = 0;
  141. FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
  142. self->merge_last_count_ = 0;
  143. self->target_block_size_ = self->min_block_size_;
  144. } else {
  145. /* Combine this block with last block. */
  146. split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
  147. histograms[self->last_histogram_ix_[0]] = self->combined_histo[0];
  148. last_entropy[0] = combined_entropy[0];
  149. if (split->num_types == 1) {
  150. last_entropy[1] = last_entropy[0];
  151. }
  152. self->block_size_ = 0;
  153. FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
  154. if (++self->merge_last_count_ > 1) {
  155. self->target_block_size_ += self->min_block_size_;
  156. }
  157. }
  158. }
  159. if (is_final) {
  160. *self->histograms_size_ = split->num_types;
  161. split->num_blocks = self->num_blocks_;
  162. }
  163. }
  164. /* Adds the next symbol to the current histogram. When the current histogram
  165. reaches the target size, decides on merging the block. */
  166. static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
  167. FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
  168. ++self->block_size_;
  169. if (self->block_size_ == self->target_block_size_) {
  170. FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
  171. }
  172. }
  173. #undef HistogramType