text.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. fz_text *
  25. fz_new_text(fz_context *ctx)
  26. {
  27. fz_text *text = fz_malloc_struct(ctx, fz_text);
  28. text->refs = 1;
  29. return text;
  30. }
  31. fz_text *
  32. fz_keep_text(fz_context *ctx, const fz_text *textc)
  33. {
  34. fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
  35. return fz_keep_imp(ctx, text, &text->refs);
  36. }
  37. void
  38. fz_drop_text(fz_context *ctx, const fz_text *textc)
  39. {
  40. fz_text *text = (fz_text *)textc; /* Explicit cast away of const */
  41. if (fz_drop_imp(ctx, text, &text->refs))
  42. {
  43. fz_text_span *span = text->head;
  44. while (span)
  45. {
  46. fz_text_span *next = span->next;
  47. fz_drop_font(ctx, span->font);
  48. fz_free(ctx, span->items);
  49. fz_free(ctx, span);
  50. span = next;
  51. }
  52. fz_free(ctx, text);
  53. }
  54. }
  55. static fz_text_span *
  56. fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
  57. {
  58. fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
  59. span->font = fz_keep_font(ctx, font);
  60. span->wmode = wmode;
  61. span->bidi_level = bidi_level;
  62. span->markup_dir = markup_dir;
  63. span->language = language;
  64. span->trm = trm;
  65. span->trm.e = 0;
  66. span->trm.f = 0;
  67. return span;
  68. }
  69. static fz_text_span *
  70. fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
  71. {
  72. if (!text->tail)
  73. {
  74. text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
  75. }
  76. else if (text->tail->font != font ||
  77. text->tail->wmode != (unsigned int)wmode ||
  78. text->tail->bidi_level != (unsigned int)bidi_level ||
  79. text->tail->markup_dir != (unsigned int)markup_dir ||
  80. text->tail->language != (unsigned int)language ||
  81. text->tail->trm.a != trm.a ||
  82. text->tail->trm.b != trm.b ||
  83. text->tail->trm.c != trm.c ||
  84. text->tail->trm.d != trm.d)
  85. {
  86. text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
  87. }
  88. return text->tail;
  89. }
  90. static void
  91. fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
  92. {
  93. int new_cap = span->cap;
  94. if (span->len + n < new_cap)
  95. return;
  96. while (span->len + n > new_cap)
  97. new_cap = new_cap + 36;
  98. span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item);
  99. span->cap = new_cap;
  100. }
  101. void
  102. fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
  103. {
  104. fz_text_span *span;
  105. if (text->refs != 1)
  106. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot modify shared text objects");
  107. span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
  108. fz_grow_text_span(ctx, span, 1);
  109. span->items[span->len].ucs = ucs;
  110. span->items[span->len].gid = gid;
  111. span->items[span->len].cid = cid;
  112. span->items[span->len].x = trm.e;
  113. span->items[span->len].y = trm.f;
  114. span->items[span->len].adv = adv;
  115. span->len++;
  116. }
  117. void
  118. fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
  119. {
  120. float adv = (gid >= 0) ? fz_advance_glyph(ctx, font, gid, wmode) : 0;
  121. fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang);
  122. }
  123. fz_matrix
  124. fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s,
  125. int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
  126. {
  127. fz_font *font;
  128. int gid, ucs;
  129. float adv;
  130. while (*s)
  131. {
  132. s += fz_chartorune(&ucs, s);
  133. gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
  134. if (gid >= 0)
  135. adv = fz_advance_glyph(ctx, font, gid, wmode);
  136. else
  137. adv = 0;
  138. fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language);
  139. if (wmode == 0)
  140. trm = fz_pre_translate(trm, adv, 0);
  141. else
  142. trm = fz_pre_translate(trm, 0, -adv);
  143. }
  144. return trm;
  145. }
  146. fz_matrix
  147. fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s,
  148. int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
  149. {
  150. fz_font *font;
  151. int gid, ucs;
  152. float adv;
  153. while (*s)
  154. {
  155. s += fz_chartorune(&ucs, s);
  156. gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font);
  157. adv = fz_advance_glyph(ctx, font, gid, wmode);
  158. if (wmode == 0)
  159. trm = fz_pre_translate(trm, adv, 0);
  160. else
  161. trm = fz_pre_translate(trm, 0, -adv);
  162. }
  163. return trm;
  164. }
  165. fz_rect
  166. fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm)
  167. {
  168. fz_text_span *span;
  169. fz_matrix tm, trm;
  170. fz_rect gbox;
  171. fz_rect bbox;
  172. int i;
  173. bbox = fz_empty_rect;
  174. for (span = text->head; span; span = span->next)
  175. {
  176. if (span->len > 0)
  177. {
  178. tm = span->trm;
  179. for (i = 0; i < span->len; i++)
  180. {
  181. if (span->items[i].gid >= 0)
  182. {
  183. tm.e = span->items[i].x;
  184. tm.f = span->items[i].y;
  185. trm = fz_concat(tm, ctm);
  186. gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm);
  187. bbox = fz_union_rect(bbox, gbox);
  188. }
  189. }
  190. }
  191. }
  192. if (!fz_is_empty_rect(bbox))
  193. {
  194. if (stroke)
  195. bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm);
  196. /* Compensate for the glyph cache limited positioning precision */
  197. bbox.x0 -= 1;
  198. bbox.y0 -= 1;
  199. bbox.x1 += 1;
  200. bbox.y1 += 1;
  201. }
  202. return bbox;
  203. }
  204. fz_text_language fz_text_language_from_string(const char *str)
  205. {
  206. fz_text_language lang;
  207. if (str == NULL)
  208. return FZ_LANG_UNSET;
  209. if (!strcmp(str, "zh-Hant") ||
  210. !strcmp(str, "zh-HK") ||
  211. !strcmp(str, "zh-MO") ||
  212. !strcmp(str, "zh-SG") ||
  213. !strcmp(str, "zh-TW"))
  214. return FZ_LANG_zh_Hant;
  215. if (!strcmp(str, "zh-Hans") ||
  216. !strcmp(str, "zh-CN"))
  217. return FZ_LANG_zh_Hans;
  218. /* 1st char */
  219. if (str[0] >= 'a' && str[0] <= 'z')
  220. lang = str[0] - 'a' + 1;
  221. else if (str[0] >= 'A' && str[0] <= 'Z')
  222. lang = str[0] - 'A' + 1;
  223. else
  224. return 0;
  225. /* 2nd char */
  226. if (str[1] >= 'a' && str[1] <= 'z')
  227. lang += 27*(str[1] - 'a' + 1);
  228. else if (str[1] >= 'A' && str[1] <= 'Z')
  229. lang += 27*(str[1] - 'A' + 1);
  230. else
  231. return 0; /* There are no valid 1 char language codes */
  232. /* 3rd char */
  233. if (str[2] >= 'a' && str[2] <= 'z')
  234. lang += 27*27*(str[2] - 'a' + 1);
  235. else if (str[2] >= 'A' && str[2] <= 'Z')
  236. lang += 27*27*(str[2] - 'A' + 1);
  237. /* We don't support iso 639-6 4 char codes, cos the standard
  238. * has been withdrawn, and no one uses them. */
  239. return lang;
  240. }
  241. char *fz_string_from_text_language(char str[8], fz_text_language lang)
  242. {
  243. int c;
  244. /* str is supposed to be at least 8 chars in size */
  245. if (str == NULL)
  246. return NULL;
  247. if (lang == FZ_LANG_zh_Hant)
  248. fz_strlcpy(str, "zh-Hant", 8);
  249. else if (lang == FZ_LANG_zh_Hans)
  250. fz_strlcpy(str, "zh-Hans", 8);
  251. else
  252. {
  253. c = lang % 27;
  254. lang = lang / 27;
  255. str[0] = c == 0 ? 0 : c - 1 + 'a';
  256. c = lang % 27;
  257. lang = lang / 27;
  258. str[1] = c == 0 ? 0 : c - 1 + 'a';
  259. c = lang % 27;
  260. str[2] = c == 0 ? 0 : c - 1 + 'a';
  261. str[3] = 0;
  262. }
  263. return str;
  264. }