block_splitter.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /* Copyright 2013 Google Inc. All Rights Reserved.
  2. Distributed under MIT license.
  3. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  4. */
  5. /* Block split point selection utilities. */
  6. #include "block_splitter.h"
  7. #include <string.h> /* memcpy, memset */
  8. #include "../common/platform.h"
  9. #include "bit_cost.h"
  10. #include "cluster.h"
  11. #include "command.h"
  12. #include "fast_log.h"
  13. #include "histogram.h"
  14. #include "memory.h"
  15. #include "quality.h"
  16. #if defined(__cplusplus) || defined(c_plusplus)
  17. extern "C" {
  18. #endif
  19. static const size_t kMaxLiteralHistograms = 100;
  20. static const size_t kMaxCommandHistograms = 50;
  21. static const double kLiteralBlockSwitchCost = 28.1;
  22. static const double kCommandBlockSwitchCost = 13.5;
  23. static const double kDistanceBlockSwitchCost = 14.6;
  24. static const size_t kLiteralStrideLength = 70;
  25. static const size_t kCommandStrideLength = 40;
  26. static const size_t kDistanceStrideLength = 40;
  27. static const size_t kSymbolsPerLiteralHistogram = 544;
  28. static const size_t kSymbolsPerCommandHistogram = 530;
  29. static const size_t kSymbolsPerDistanceHistogram = 544;
  30. static const size_t kMinLengthForBlockSplitting = 128;
  31. static const size_t kIterMulForRefining = 2;
  32. static const size_t kMinItersForRefining = 100;
  33. static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
  34. /* Count how many we have. */
  35. size_t total_length = 0;
  36. size_t i;
  37. for (i = 0; i < num_commands; ++i) {
  38. total_length += cmds[i].insert_len_;
  39. }
  40. return total_length;
  41. }
  42. static void CopyLiteralsToByteArray(const Command* cmds,
  43. const size_t num_commands,
  44. const uint8_t* data,
  45. const size_t offset,
  46. const size_t mask,
  47. uint8_t* literals) {
  48. size_t pos = 0;
  49. size_t from_pos = offset & mask;
  50. size_t i;
  51. for (i = 0; i < num_commands; ++i) {
  52. size_t insert_len = cmds[i].insert_len_;
  53. if (from_pos + insert_len > mask) {
  54. size_t head_size = mask + 1 - from_pos;
  55. memcpy(literals + pos, data + from_pos, head_size);
  56. from_pos = 0;
  57. pos += head_size;
  58. insert_len -= head_size;
  59. }
  60. if (insert_len > 0) {
  61. memcpy(literals + pos, data + from_pos, insert_len);
  62. pos += insert_len;
  63. }
  64. from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
  65. }
  66. }
  67. static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
  68. /* Initial seed should be 7. In this case, loop length is (1 << 29). */
  69. *seed *= 16807U;
  70. return *seed;
  71. }
  72. static BROTLI_INLINE double BitCost(size_t count) {
  73. return count == 0 ? -2.0 : FastLog2(count);
  74. }
  75. #define HISTOGRAMS_PER_BATCH 64
  76. #define CLUSTERS_PER_BATCH 16
  77. #define FN(X) X ## Literal
  78. #define DataType uint8_t
  79. /* NOLINTNEXTLINE(build/include) */
  80. #include "block_splitter_inc.h"
  81. #undef DataType
  82. #undef FN
  83. #define FN(X) X ## Command
  84. #define DataType uint16_t
  85. /* NOLINTNEXTLINE(build/include) */
  86. #include "block_splitter_inc.h"
  87. #undef FN
  88. #define FN(X) X ## Distance
  89. /* NOLINTNEXTLINE(build/include) */
  90. #include "block_splitter_inc.h"
  91. #undef DataType
  92. #undef FN
  93. void BrotliInitBlockSplit(BlockSplit* self) {
  94. self->num_types = 0;
  95. self->num_blocks = 0;
  96. self->types = 0;
  97. self->lengths = 0;
  98. self->types_alloc_size = 0;
  99. self->lengths_alloc_size = 0;
  100. }
  101. void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
  102. BROTLI_FREE(m, self->types);
  103. BROTLI_FREE(m, self->lengths);
  104. }
  105. /* Extracts literals, command distance and prefix codes, then applies
  106. * SplitByteVector to create partitioning. */
  107. void BrotliSplitBlock(MemoryManager* m,
  108. const Command* cmds,
  109. const size_t num_commands,
  110. const uint8_t* data,
  111. const size_t pos,
  112. const size_t mask,
  113. const BrotliEncoderParams* params,
  114. BlockSplit* literal_split,
  115. BlockSplit* insert_and_copy_split,
  116. BlockSplit* dist_split) {
  117. {
  118. size_t literals_count = CountLiterals(cmds, num_commands);
  119. uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
  120. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
  121. /* Create a continuous array of literals. */
  122. CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
  123. /* Create the block split on the array of literals.
  124. * Literal histograms can have alphabet size up to 256.
  125. * Though, to accomodate context modeling, less than half of maximum size
  126. * is allowed. */
  127. SplitByteVectorLiteral(
  128. m, literals, literals_count,
  129. kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
  130. kLiteralStrideLength, kLiteralBlockSwitchCost, params,
  131. literal_split);
  132. if (BROTLI_IS_OOM(m)) return;
  133. BROTLI_FREE(m, literals);
  134. /* NB: this might be a good place for injecting extra splitting without
  135. * increasing encoder complexity; however, output parition would be less
  136. * optimal than one produced with forced splitting inside
  137. * SplitByteVector (FindBlocks / ClusterBlocks). */
  138. }
  139. {
  140. /* Compute prefix codes for commands. */
  141. uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
  142. size_t i;
  143. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
  144. for (i = 0; i < num_commands; ++i) {
  145. insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
  146. }
  147. /* Create the block split on the array of command prefixes. */
  148. SplitByteVectorCommand(
  149. m, insert_and_copy_codes, num_commands,
  150. kSymbolsPerCommandHistogram, kMaxCommandHistograms,
  151. kCommandStrideLength, kCommandBlockSwitchCost, params,
  152. insert_and_copy_split);
  153. if (BROTLI_IS_OOM(m)) return;
  154. /* TODO(eustas): reuse for distances? */
  155. BROTLI_FREE(m, insert_and_copy_codes);
  156. }
  157. {
  158. /* Create a continuous array of distance prefixes. */
  159. uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
  160. size_t j = 0;
  161. size_t i;
  162. if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
  163. for (i = 0; i < num_commands; ++i) {
  164. const Command* cmd = &cmds[i];
  165. if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
  166. distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
  167. }
  168. }
  169. /* Create the block split on the array of distance prefixes. */
  170. SplitByteVectorDistance(
  171. m, distance_prefixes, j,
  172. kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
  173. kDistanceStrideLength, kDistanceBlockSwitchCost, params,
  174. dist_split);
  175. if (BROTLI_IS_OOM(m)) return;
  176. BROTLI_FREE(m, distance_prefixes);
  177. }
  178. }
  179. #if defined(BROTLI_TEST)
  180. size_t CountLiteralsForTest(const Command*, const size_t);
  181. size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
  182. return CountLiterals(cmds, num_commands);
  183. }
  184. void CopyLiteralsToByteArrayForTest(const Command*,
  185. const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
  186. void CopyLiteralsToByteArrayForTest(const Command* cmds,
  187. const size_t num_commands, const uint8_t* data, const size_t offset,
  188. const size_t mask, uint8_t* literals) {
  189. CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
  190. }
  191. #endif
  192. #if defined(__cplusplus) || defined(c_plusplus)
  193. } /* extern "C" */
  194. #endif