hb-set-digest.hh 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /*
  2. * Copyright © 2012 Google, Inc.
  3. *
  4. * This is part of HarfBuzz, a text shaping library.
  5. *
  6. * Permission is hereby granted, without written agreement and without
  7. * license or royalty fees, to use, copy, modify, and distribute this
  8. * software and its documentation for any purpose, provided that the
  9. * above copyright notice and the following two paragraphs appear in
  10. * all copies of this software.
  11. *
  12. * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13. * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14. * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15. * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16. * DAMAGE.
  17. *
  18. * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19. * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20. * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
  21. * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22. * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23. *
  24. * Google Author(s): Behdad Esfahbod
  25. */
  26. #ifndef HB_SET_DIGEST_HH
  27. #define HB_SET_DIGEST_HH
  28. #include "hb.hh"
  29. #include "hb-machinery.hh"
  30. /*
  31. * The set-digests here implement various "filters" that support
  32. * "approximate member query". Conceptually these are like Bloom
  33. * Filter and Quotient Filter, however, much smaller, faster, and
  34. * designed to fit the requirements of our uses for glyph coverage
  35. * queries.
  36. *
  37. * Our filters are highly accurate if the lookup covers fairly local
  38. * set of glyphs, but fully flooded and ineffective if coverage is
  39. * all over the place.
  40. *
  41. * The way these are used is that the filter is first populated by
  42. * a lookup's or subtable's Coverage table(s), and then when we
  43. * want to apply the lookup or subtable to a glyph, before trying
  44. * to apply, we ask the filter if the glyph may be covered. If it's
  45. * not, we return early.
  46. *
  47. * We use these filters both at the lookup-level, and then again,
  48. * at the subtable-level. Both have performance win.
  49. *
  50. * The main filter we use is a combination of three bits-pattern
  51. * filters. A bits-pattern filter checks a number of bits (5 or 6)
  52. * of the input number (glyph-id in this case) and checks whether
  53. * its pattern is amongst the patterns of any of the accepted values.
  54. * The accepted patterns are represented as a "long" integer. The
  55. * check is done using four bitwise operations only.
  56. */
  57. template <typename mask_t, unsigned int shift>
  58. struct hb_set_digest_bits_pattern_t
  59. {
  60. static constexpr unsigned mask_bytes = sizeof (mask_t);
  61. static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
  62. static constexpr unsigned num_bits = 0
  63. + (mask_bytes >= 1 ? 3 : 0)
  64. + (mask_bytes >= 2 ? 1 : 0)
  65. + (mask_bytes >= 4 ? 1 : 0)
  66. + (mask_bytes >= 8 ? 1 : 0)
  67. + (mask_bytes >= 16? 1 : 0)
  68. + 0;
  69. static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
  70. static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
  71. void init () { mask = 0; }
  72. void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
  73. void add (hb_codepoint_t g) { mask |= mask_for (g); }
  74. bool add_range (hb_codepoint_t a, hb_codepoint_t b)
  75. {
  76. if ((b >> shift) - (a >> shift) >= mask_bits - 1)
  77. mask = (mask_t) -1;
  78. else {
  79. mask_t ma = mask_for (a);
  80. mask_t mb = mask_for (b);
  81. mask |= mb + (mb - ma) - (mb < ma);
  82. }
  83. return true;
  84. }
  85. template <typename T>
  86. void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
  87. {
  88. for (unsigned int i = 0; i < count; i++)
  89. {
  90. add (*array);
  91. array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
  92. }
  93. }
  94. template <typename T>
  95. void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
  96. template <typename T>
  97. bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
  98. {
  99. add_array (array, count, stride);
  100. return true;
  101. }
  102. template <typename T>
  103. bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
  104. bool may_have (const hb_set_digest_bits_pattern_t &o) const
  105. { return mask & o.mask; }
  106. bool may_have (hb_codepoint_t g) const
  107. { return mask & mask_for (g); }
  108. private:
  109. static mask_t mask_for (hb_codepoint_t g)
  110. { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
  111. mask_t mask;
  112. };
  113. template <typename head_t, typename tail_t>
  114. struct hb_set_digest_combiner_t
  115. {
  116. void init ()
  117. {
  118. head.init ();
  119. tail.init ();
  120. }
  121. void add (const hb_set_digest_combiner_t &o)
  122. {
  123. head.add (o.head);
  124. tail.add (o.tail);
  125. }
  126. void add (hb_codepoint_t g)
  127. {
  128. head.add (g);
  129. tail.add (g);
  130. }
  131. bool add_range (hb_codepoint_t a, hb_codepoint_t b)
  132. {
  133. return head.add_range (a, b) &&
  134. tail.add_range (a, b);
  135. }
  136. template <typename T>
  137. void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
  138. {
  139. head.add_array (array, count, stride);
  140. tail.add_array (array, count, stride);
  141. }
  142. template <typename T>
  143. void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
  144. template <typename T>
  145. bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
  146. {
  147. return head.add_sorted_array (array, count, stride) &&
  148. tail.add_sorted_array (array, count, stride);
  149. }
  150. template <typename T>
  151. bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
  152. bool may_have (const hb_set_digest_combiner_t &o) const
  153. {
  154. return head.may_have (o.head) && tail.may_have (o.tail);
  155. }
  156. bool may_have (hb_codepoint_t g) const
  157. {
  158. return head.may_have (g) && tail.may_have (g);
  159. }
  160. private:
  161. head_t head;
  162. tail_t tail;
  163. };
  164. /*
  165. * hb_set_digest_t
  166. *
  167. * This is a combination of digests that performs "best".
  168. * There is not much science to this: it's a result of intuition
  169. * and testing.
  170. */
  171. using hb_set_digest_t =
  172. hb_set_digest_combiner_t
  173. <
  174. hb_set_digest_bits_pattern_t<unsigned long, 4>,
  175. hb_set_digest_combiner_t
  176. <
  177. hb_set_digest_bits_pattern_t<unsigned long, 0>,
  178. hb_set_digest_bits_pattern_t<unsigned long, 9>
  179. >
  180. >
  181. ;
  182. #endif /* HB_SET_DIGEST_HH */