hb-ot-shaper-thai.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /*
  2. * Copyright © 2010,2012 Google, Inc.
  3. *
  4. * This is part of HarfBuzz, a text shaping library.
  5. *
  6. * Permission is hereby granted, without written agreement and without
  7. * license or royalty fees, to use, copy, modify, and distribute this
  8. * software and its documentation for any purpose, provided that the
  9. * above copyright notice and the following two paragraphs appear in
  10. * all copies of this software.
  11. *
  12. * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13. * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14. * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15. * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16. * DAMAGE.
  17. *
  18. * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19. * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20. * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
  21. * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22. * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23. *
  24. * Google Author(s): Behdad Esfahbod
  25. */
  26. #include "hb.hh"
  27. #ifndef HB_NO_OT_SHAPE
  28. #include "hb-ot-shaper.hh"
  29. /* Thai / Lao shaper */
  30. /* PUA shaping */
  31. enum thai_consonant_type_t
  32. {
  33. NC,
  34. AC,
  35. RC,
  36. DC,
  37. NOT_CONSONANT,
  38. NUM_CONSONANT_TYPES = NOT_CONSONANT
  39. };
  40. static thai_consonant_type_t
  41. get_consonant_type (hb_codepoint_t u)
  42. {
  43. if (u == 0x0E1Bu || u == 0x0E1Du || u == 0x0E1Fu/* || u == 0x0E2Cu*/)
  44. return AC;
  45. if (u == 0x0E0Du || u == 0x0E10u)
  46. return RC;
  47. if (u == 0x0E0Eu || u == 0x0E0Fu)
  48. return DC;
  49. if (hb_in_range<hb_codepoint_t> (u, 0x0E01u, 0x0E2Eu))
  50. return NC;
  51. return NOT_CONSONANT;
  52. }
  53. enum thai_mark_type_t
  54. {
  55. AV,
  56. BV,
  57. T,
  58. NOT_MARK,
  59. NUM_MARK_TYPES = NOT_MARK
  60. };
  61. static thai_mark_type_t
  62. get_mark_type (hb_codepoint_t u)
  63. {
  64. if (u == 0x0E31u || hb_in_range<hb_codepoint_t> (u, 0x0E34u, 0x0E37u) ||
  65. u == 0x0E47u || hb_in_range<hb_codepoint_t> (u, 0x0E4Du, 0x0E4Eu))
  66. return AV;
  67. if (hb_in_range<hb_codepoint_t> (u, 0x0E38u, 0x0E3Au))
  68. return BV;
  69. if (hb_in_range<hb_codepoint_t> (u, 0x0E48u, 0x0E4Cu))
  70. return T;
  71. return NOT_MARK;
  72. }
  73. enum thai_action_t
  74. {
  75. NOP,
  76. SD, /* Shift combining-mark down */
  77. SL, /* Shift combining-mark left */
  78. SDL, /* Shift combining-mark down-left */
  79. RD /* Remove descender from base */
  80. };
  81. static hb_codepoint_t
  82. thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font)
  83. {
  84. struct thai_pua_mapping_t {
  85. uint16_t u;
  86. uint16_t win_pua;
  87. uint16_t mac_pua;
  88. } const *pua_mappings = nullptr;
  89. static const thai_pua_mapping_t SD_mappings[] = {
  90. {0x0E48u, 0xF70Au, 0xF88Bu}, /* MAI EK */
  91. {0x0E49u, 0xF70Bu, 0xF88Eu}, /* MAI THO */
  92. {0x0E4Au, 0xF70Cu, 0xF891u}, /* MAI TRI */
  93. {0x0E4Bu, 0xF70Du, 0xF894u}, /* MAI CHATTAWA */
  94. {0x0E4Cu, 0xF70Eu, 0xF897u}, /* THANTHAKHAT */
  95. {0x0E38u, 0xF718u, 0xF89Bu}, /* SARA U */
  96. {0x0E39u, 0xF719u, 0xF89Cu}, /* SARA UU */
  97. {0x0E3Au, 0xF71Au, 0xF89Du}, /* PHINTHU */
  98. {0x0000u, 0x0000u, 0x0000u}
  99. };
  100. static const thai_pua_mapping_t SDL_mappings[] = {
  101. {0x0E48u, 0xF705u, 0xF88Cu}, /* MAI EK */
  102. {0x0E49u, 0xF706u, 0xF88Fu}, /* MAI THO */
  103. {0x0E4Au, 0xF707u, 0xF892u}, /* MAI TRI */
  104. {0x0E4Bu, 0xF708u, 0xF895u}, /* MAI CHATTAWA */
  105. {0x0E4Cu, 0xF709u, 0xF898u}, /* THANTHAKHAT */
  106. {0x0000u, 0x0000u, 0x0000u}
  107. };
  108. static const thai_pua_mapping_t SL_mappings[] = {
  109. {0x0E48u, 0xF713u, 0xF88Au}, /* MAI EK */
  110. {0x0E49u, 0xF714u, 0xF88Du}, /* MAI THO */
  111. {0x0E4Au, 0xF715u, 0xF890u}, /* MAI TRI */
  112. {0x0E4Bu, 0xF716u, 0xF893u}, /* MAI CHATTAWA */
  113. {0x0E4Cu, 0xF717u, 0xF896u}, /* THANTHAKHAT */
  114. {0x0E31u, 0xF710u, 0xF884u}, /* MAI HAN-AKAT */
  115. {0x0E34u, 0xF701u, 0xF885u}, /* SARA I */
  116. {0x0E35u, 0xF702u, 0xF886u}, /* SARA II */
  117. {0x0E36u, 0xF703u, 0xF887u}, /* SARA UE */
  118. {0x0E37u, 0xF704u, 0xF888u}, /* SARA UEE */
  119. {0x0E47u, 0xF712u, 0xF889u}, /* MAITAIKHU */
  120. {0x0E4Du, 0xF711u, 0xF899u}, /* NIKHAHIT */
  121. {0x0000u, 0x0000u, 0x0000u}
  122. };
  123. static const thai_pua_mapping_t RD_mappings[] = {
  124. {0x0E0Du, 0xF70Fu, 0xF89Au}, /* YO YING */
  125. {0x0E10u, 0xF700u, 0xF89Eu}, /* THO THAN */
  126. {0x0000u, 0x0000u, 0x0000u}
  127. };
  128. switch (action) {
  129. case NOP: return u;
  130. case SD: pua_mappings = SD_mappings; break;
  131. case SDL: pua_mappings = SDL_mappings; break;
  132. case SL: pua_mappings = SL_mappings; break;
  133. case RD: pua_mappings = RD_mappings; break;
  134. }
  135. for (; pua_mappings->u; pua_mappings++)
  136. if (pua_mappings->u == u)
  137. {
  138. hb_codepoint_t glyph;
  139. if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph))
  140. return pua_mappings->win_pua;
  141. if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph))
  142. return pua_mappings->mac_pua;
  143. break;
  144. }
  145. return u;
  146. }
  147. static enum thai_above_state_t
  148. { /* Cluster above looks like: */
  149. T0, /* ⣤ */
  150. T1, /* ⣼ */
  151. T2, /* ⣾ */
  152. T3, /* ⣿ */
  153. NUM_ABOVE_STATES
  154. } thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
  155. {
  156. T0, /* NC */
  157. T1, /* AC */
  158. T0, /* RC */
  159. T0, /* DC */
  160. T3, /* NOT_CONSONANT */
  161. };
  162. static const struct thai_above_state_machine_edge_t {
  163. thai_action_t action;
  164. thai_above_state_t next_state;
  165. } thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] =
  166. { /*AV*/ /*BV*/ /*T*/
  167. /*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}},
  168. /*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}},
  169. /*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}},
  170. /*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}},
  171. };
  172. static enum thai_below_state_t
  173. {
  174. B0, /* No descender */
  175. B1, /* Removable descender */
  176. B2, /* Strict descender */
  177. NUM_BELOW_STATES
  178. } thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
  179. {
  180. B0, /* NC */
  181. B0, /* AC */
  182. B1, /* RC */
  183. B2, /* DC */
  184. B2, /* NOT_CONSONANT */
  185. };
  186. static const struct thai_below_state_machine_edge_t {
  187. thai_action_t action;
  188. thai_below_state_t next_state;
  189. } thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] =
  190. { /*AV*/ /*BV*/ /*T*/
  191. /*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}},
  192. /*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}},
  193. /*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}},
  194. };
  195. static void
  196. do_thai_pua_shaping (const hb_ot_shape_plan_t *plan HB_UNUSED,
  197. hb_buffer_t *buffer,
  198. hb_font_t *font)
  199. {
  200. #ifdef HB_NO_OT_SHAPER_THAI_FALLBACK
  201. return;
  202. #endif
  203. thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT];
  204. thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT];
  205. unsigned int base = 0;
  206. hb_glyph_info_t *info = buffer->info;
  207. unsigned int count = buffer->len;
  208. for (unsigned int i = 0; i < count; i++)
  209. {
  210. thai_mark_type_t mt = get_mark_type (info[i].codepoint);
  211. if (mt == NOT_MARK) {
  212. thai_consonant_type_t ct = get_consonant_type (info[i].codepoint);
  213. above_state = thai_above_start_state[ct];
  214. below_state = thai_below_start_state[ct];
  215. base = i;
  216. continue;
  217. }
  218. const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt];
  219. const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt];
  220. above_state = above_edge.next_state;
  221. below_state = below_edge.next_state;
  222. /* At least one of the above/below actions is NOP. */
  223. thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action;
  224. buffer->unsafe_to_break (base, i);
  225. if (action == RD)
  226. info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font);
  227. else
  228. info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font);
  229. }
  230. }
  231. static void
  232. preprocess_text_thai (const hb_ot_shape_plan_t *plan,
  233. hb_buffer_t *buffer,
  234. hb_font_t *font)
  235. {
  236. /* This function implements the shaping logic documented here:
  237. *
  238. * https://linux.thai.net/~thep/th-otf/shaping.html
  239. *
  240. * The first shaping rule listed there is needed even if the font has Thai
  241. * OpenType tables. The rest do fallback positioning based on PUA codepoints.
  242. * We implement that only if there exist no Thai GSUB in the font.
  243. */
  244. /* The following is NOT specified in the MS OT Thai spec, however, it seems
  245. * to be what Uniscribe and other engines implement. According to Eric Muller:
  246. *
  247. * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
  248. * NIKHAHIT backwards over any above-base marks.
  249. *
  250. * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
  251. *
  252. * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
  253. * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
  254. * not what a user wanted, but the rendering is nevertheless nikhahit above
  255. * chattawa.
  256. *
  257. * Same for Lao.
  258. *
  259. * Note:
  260. *
  261. * Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A
  262. * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
  263. * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
  264. * equivalent.
  265. */
  266. /*
  267. * Here are the characters of significance:
  268. *
  269. * Thai Lao
  270. * SARA AM: U+0E33 U+0EB3
  271. * SARA AA: U+0E32 U+0EB2
  272. * Nikhahit: U+0E4D U+0ECD
  273. *
  274. * Testing shows that Uniscribe reorder the following marks:
  275. * Thai: <0E31,0E34..0E37, 0E47..0E4E>
  276. * Lao: <0EB1,0EB4..0EB7,0EBB,0EC8..0ECD>
  277. *
  278. * Note how the Lao versions are the same as Thai + 0x80.
  279. */
  280. /* We only get one script at a time, so a script-agnostic implementation
  281. * is adequate here. */
  282. #define IS_SARA_AM(x) (((x) & ~0x0080u) == 0x0E33u)
  283. #define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0x0E33u + 0x0E4Du)
  284. #define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
  285. #define IS_ABOVE_BASE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080u, 0x0E34u, 0x0E37u, 0x0E47u, 0x0E4Eu, 0x0E31u, 0x0E31u, 0x0E3Bu, 0x0E3Bu))
  286. buffer->clear_output ();
  287. unsigned int count = buffer->len;
  288. for (buffer->idx = 0; buffer->idx < count /* No need for: && buffer->successful */;)
  289. {
  290. hb_codepoint_t u = buffer->cur().codepoint;
  291. if (likely (!IS_SARA_AM (u)))
  292. {
  293. if (unlikely (!buffer->next_glyph ())) break;
  294. continue;
  295. }
  296. /* Is SARA AM. Decompose and reorder. */
  297. (void) buffer->output_glyph (NIKHAHIT_FROM_SARA_AM (u));
  298. _hb_glyph_info_set_continuation (&buffer->prev());
  299. if (unlikely (!buffer->replace_glyph (SARA_AA_FROM_SARA_AM (u)))) break;
  300. /* Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. */
  301. unsigned int end = buffer->out_len;
  302. _hb_glyph_info_set_general_category (&buffer->out_info[end - 2], HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK);
  303. /* Ok, let's see... */
  304. unsigned int start = end - 2;
  305. while (start > 0 && IS_ABOVE_BASE_MARK (buffer->out_info[start - 1].codepoint))
  306. start--;
  307. if (start + 2 < end)
  308. {
  309. /* Move Nikhahit (end-2) to the beginning */
  310. buffer->merge_out_clusters (start, end);
  311. hb_glyph_info_t t = buffer->out_info[end - 2];
  312. memmove (buffer->out_info + start + 1,
  313. buffer->out_info + start,
  314. sizeof (buffer->out_info[0]) * (end - start - 2));
  315. buffer->out_info[start] = t;
  316. }
  317. else
  318. {
  319. /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
  320. * previous cluster. */
  321. if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
  322. buffer->merge_out_clusters (start - 1, end);
  323. }
  324. }
  325. buffer->sync ();
  326. /* If font has Thai GSUB, we are done. */
  327. if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0])
  328. do_thai_pua_shaping (plan, buffer, font);
  329. }
  330. const hb_ot_shaper_t _hb_ot_shaper_thai =
  331. {
  332. nullptr, /* collect_features */
  333. nullptr, /* override_features */
  334. nullptr, /* data_create */
  335. nullptr, /* data_destroy */
  336. preprocess_text_thai,
  337. nullptr, /* postprocess_glyphs */
  338. nullptr, /* decompose */
  339. nullptr, /* compose */
  340. nullptr, /* setup_masks */
  341. nullptr, /* reorder_marks */
  342. HB_TAG_NONE, /* gpos_tag */
  343. HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
  344. HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
  345. false,/* fallback_position */
  346. };
  347. #endif