pdf-nametree.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. #include <string.h>
  25. static pdf_obj *
  26. pdf_lookup_name_imp(fz_context *ctx, pdf_obj *node, const char *needle, pdf_cycle_list *cycle_up)
  27. {
  28. pdf_cycle_list cycle;
  29. pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
  30. pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
  31. if (pdf_cycle(ctx, &cycle, cycle_up, node))
  32. return NULL;
  33. if (pdf_is_array(ctx, kids))
  34. {
  35. int l = 0;
  36. int r = pdf_array_len(ctx, kids) - 1;
  37. while (l <= r)
  38. {
  39. int m = (l + r) >> 1;
  40. pdf_obj *kid = pdf_array_get(ctx, kids, m);
  41. pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
  42. const char *first = pdf_array_get_text_string(ctx, limits, 0);
  43. const char *last = pdf_array_get_text_string(ctx, limits, 1);
  44. if (!pdf_is_indirect(ctx, kid))
  45. {
  46. fz_warn(ctx, "non-indirect internal node found in name tree");
  47. break;
  48. }
  49. if (strcmp(needle, first) < 0)
  50. r = m - 1;
  51. else if (strcmp(needle, last) > 0)
  52. l = m + 1;
  53. else
  54. {
  55. pdf_obj *obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
  56. if (obj)
  57. return obj;
  58. else
  59. break;
  60. }
  61. }
  62. /* Spec says names should be sorted (hence the binary search,
  63. * above), but Acrobat copes with non-sorted. Drop back to a
  64. * simple search if the binary search fails. */
  65. r = pdf_array_len(ctx, kids);
  66. for (l = 0; l < r; l++)
  67. {
  68. pdf_obj *obj, *kid = pdf_array_get(ctx, kids, l);
  69. if (!pdf_is_indirect(ctx, kid))
  70. {
  71. fz_warn(ctx, "non-indirect internal node found in name tree");
  72. continue;
  73. }
  74. obj = pdf_lookup_name_imp(ctx, kid, needle, &cycle);
  75. if (obj)
  76. return obj;
  77. }
  78. }
  79. if (pdf_is_array(ctx, names))
  80. {
  81. int l = 0;
  82. int r = (pdf_array_len(ctx, names) / 2) - 1;
  83. while (l <= r)
  84. {
  85. int m = (l + r) >> 1;
  86. int c;
  87. const char *key = pdf_array_get_text_string(ctx, names, m * 2);
  88. pdf_obj *val = pdf_array_get(ctx, names, m * 2 + 1);
  89. c = strcmp(needle, key);
  90. if (c < 0)
  91. r = m - 1;
  92. else if (c > 0)
  93. l = m + 1;
  94. else
  95. return val;
  96. }
  97. /* Spec says names should be sorted (hence the binary search,
  98. * above), but Acrobat copes with non-sorted. Drop back to a
  99. * simple search if the binary search fails. */
  100. r = pdf_array_len(ctx, names)/2;
  101. for (l = 0; l < r; l++)
  102. if (!strcmp(needle, pdf_array_get_text_string(ctx, names, l * 2)))
  103. return pdf_array_get(ctx, names, l * 2 + 1);
  104. }
  105. return NULL;
  106. }
  107. pdf_obj *
  108. pdf_lookup_name(fz_context *ctx, pdf_document *doc, pdf_obj *which, pdf_obj *needle)
  109. {
  110. pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  111. pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
  112. pdf_obj *tree = pdf_dict_get(ctx, names, which);
  113. return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
  114. }
  115. pdf_obj *
  116. pdf_lookup_dest(fz_context *ctx, pdf_document *doc, pdf_obj *needle)
  117. {
  118. pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  119. pdf_obj *dests = pdf_dict_get(ctx, root, PDF_NAME(Dests));
  120. pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
  121. /* PDF 1.1 has destinations in a dictionary */
  122. if (dests)
  123. {
  124. if (pdf_is_name(ctx, needle))
  125. return pdf_dict_get(ctx, dests, needle);
  126. else
  127. return pdf_dict_gets(ctx, dests, pdf_to_str_buf(ctx, needle));
  128. }
  129. /* PDF 1.2 has destinations in a name tree */
  130. if (names)
  131. {
  132. pdf_obj *tree = pdf_dict_get(ctx, names, PDF_NAME(Dests));
  133. return pdf_lookup_name_imp(ctx, tree, pdf_to_text_string(ctx, needle), NULL);
  134. }
  135. return NULL;
  136. }
  137. static void
  138. pdf_load_name_tree_imp(fz_context *ctx, pdf_obj *dict, pdf_document *doc, pdf_obj *node, pdf_cycle_list *cycle_up)
  139. {
  140. pdf_cycle_list cycle;
  141. pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
  142. pdf_obj *names = pdf_dict_get(ctx, node, PDF_NAME(Names));
  143. int i;
  144. if (kids && !pdf_cycle(ctx, &cycle, cycle_up, node))
  145. {
  146. int len = pdf_array_len(ctx, kids);
  147. for (i = 0; i < len; i++)
  148. pdf_load_name_tree_imp(ctx, dict, doc, pdf_array_get(ctx, kids, i), &cycle);
  149. }
  150. if (names)
  151. {
  152. int len = pdf_array_len(ctx, names);
  153. for (i = 0; i + 1 < len; i += 2)
  154. {
  155. pdf_obj *key = pdf_array_get(ctx, names, i);
  156. pdf_obj *val = pdf_array_get(ctx, names, i + 1);
  157. if (pdf_is_string(ctx, key))
  158. {
  159. key = pdf_new_name(ctx, pdf_to_text_string(ctx, key));
  160. fz_try(ctx)
  161. pdf_dict_put(ctx, dict, key, val);
  162. fz_always(ctx)
  163. pdf_drop_obj(ctx, key);
  164. fz_catch(ctx)
  165. fz_rethrow(ctx);
  166. }
  167. else if (pdf_is_name(ctx, key))
  168. {
  169. pdf_dict_put(ctx, dict, key, val);
  170. }
  171. }
  172. }
  173. }
  174. /* FIXME: fz_try/fz_catch needed here */
  175. pdf_obj *
  176. pdf_load_name_tree(fz_context *ctx, pdf_document *doc, pdf_obj *which)
  177. {
  178. pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  179. pdf_obj *names = pdf_dict_get(ctx, root, PDF_NAME(Names));
  180. pdf_obj *tree = pdf_dict_get(ctx, names, which);
  181. if (pdf_is_dict(ctx, tree))
  182. {
  183. pdf_obj *dict = pdf_new_dict(ctx, doc, 100);
  184. pdf_load_name_tree_imp(ctx, dict, doc, tree, NULL);
  185. return dict;
  186. }
  187. return NULL;
  188. }
  189. pdf_obj *
  190. pdf_lookup_number_imp(fz_context *ctx, pdf_obj *node, int needle, pdf_cycle_list *cycle_up)
  191. {
  192. pdf_cycle_list cycle;
  193. pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
  194. pdf_obj *nums = pdf_dict_get(ctx, node, PDF_NAME(Nums));
  195. if (pdf_is_array(ctx, kids))
  196. {
  197. int l = 0;
  198. int r = pdf_array_len(ctx, kids) - 1;
  199. while (l <= r)
  200. {
  201. int m = (l + r) >> 1;
  202. pdf_obj *kid = pdf_array_get(ctx, kids, m);
  203. pdf_obj *limits = pdf_dict_get(ctx, kid, PDF_NAME(Limits));
  204. int first = pdf_array_get_int(ctx, limits, 0);
  205. int last = pdf_array_get_int(ctx, limits, 1);
  206. if (needle < first)
  207. r = m - 1;
  208. else if (needle > last)
  209. l = m + 1;
  210. else
  211. {
  212. if (pdf_cycle(ctx, &cycle, cycle_up, node))
  213. break;
  214. return pdf_lookup_number_imp(ctx, kid, needle, &cycle);
  215. }
  216. }
  217. }
  218. if (pdf_is_array(ctx, nums))
  219. {
  220. int l = 0;
  221. int r = (pdf_array_len(ctx, nums) / 2) - 1;
  222. while (l <= r)
  223. {
  224. int m = (l + r) >> 1;
  225. int key = pdf_array_get_int(ctx, nums, m * 2);
  226. pdf_obj *val = pdf_array_get(ctx, nums, m * 2 + 1);
  227. if (needle < key)
  228. r = m - 1;
  229. else if (needle > key)
  230. l = m + 1;
  231. else
  232. return val;
  233. }
  234. /* Parallel the nametree lookup above by allowing for non-sorted lists. */
  235. r = pdf_array_len(ctx, nums)/2;
  236. for (l = 0; l < r; l++)
  237. if (needle == pdf_array_get_int(ctx, nums, l * 2))
  238. return pdf_array_get(ctx, nums, l * 2 + 1);
  239. }
  240. return NULL;
  241. }
  242. pdf_obj *
  243. pdf_lookup_number(fz_context *ctx, pdf_obj *node, int needle)
  244. {
  245. return pdf_lookup_number_imp(ctx, node, needle, NULL);
  246. }
  247. static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
  248. void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
  249. void (*leave)(fz_context *, pdf_obj *, void *),
  250. void *arg,
  251. pdf_obj **inherit_names,
  252. pdf_obj **inherit_vals,
  253. pdf_cycle_list *cycle_up);
  254. static void
  255. pdf_walk_tree_kid(fz_context *ctx,
  256. pdf_obj *obj,
  257. pdf_obj *kid_name,
  258. void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
  259. void (*leave)(fz_context *, pdf_obj *, void *),
  260. void *arg,
  261. pdf_obj **inherit_names,
  262. pdf_obj **inherit_vals,
  263. pdf_cycle_list *cycle_up)
  264. {
  265. pdf_cycle_list cycle;
  266. pdf_obj **new_vals = NULL;
  267. if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
  268. return;
  269. fz_var(new_vals);
  270. fz_try(ctx)
  271. {
  272. /* First we run through the names we've been asked to collect
  273. * inherited values for updating the values. */
  274. if (inherit_names != NULL)
  275. {
  276. int i, n;
  277. for (n = 0; inherit_names[n] != NULL; n++);
  278. for (i = 0; i < n; i++)
  279. {
  280. pdf_obj *v = pdf_dict_get(ctx, obj, inherit_names[i]);
  281. if (v != NULL)
  282. {
  283. if (new_vals == NULL)
  284. {
  285. new_vals = fz_malloc_array(ctx, n, pdf_obj *);
  286. memcpy(new_vals, inherit_vals, n*sizeof(pdf_obj *));
  287. inherit_vals = new_vals;
  288. }
  289. inherit_vals[i] = v;
  290. }
  291. }
  292. }
  293. if (arrive)
  294. arrive(ctx, obj, arg, inherit_vals);
  295. pdf_walk_tree_imp(ctx, pdf_dict_get(ctx, obj, kid_name), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
  296. if (leave)
  297. leave(ctx, obj, arg);
  298. }
  299. fz_always(ctx)
  300. fz_free(ctx, new_vals);
  301. fz_catch(ctx)
  302. fz_rethrow(ctx);
  303. }
  304. static void pdf_walk_tree_imp(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
  305. void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
  306. void (*leave)(fz_context *, pdf_obj *, void *),
  307. void *arg,
  308. pdf_obj **inherit_names,
  309. pdf_obj **inherit_vals,
  310. pdf_cycle_list *cycle_up)
  311. {
  312. pdf_cycle_list cycle;
  313. if (obj == NULL || pdf_cycle(ctx, &cycle, cycle_up, obj))
  314. return;
  315. if (pdf_is_array(ctx, obj))
  316. {
  317. int i, n = pdf_array_len(ctx, obj);
  318. for (i = 0; i < n; i++)
  319. pdf_walk_tree_kid(ctx, pdf_array_get(ctx, obj, i), kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
  320. }
  321. else
  322. {
  323. pdf_walk_tree_kid(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, &cycle);
  324. }
  325. }
  326. void pdf_walk_tree(fz_context *ctx, pdf_obj *obj, pdf_obj *kid_name,
  327. void (*arrive)(fz_context *, pdf_obj *, void *, pdf_obj **),
  328. void (*leave)(fz_context *, pdf_obj *, void *),
  329. void *arg,
  330. pdf_obj **inherit_names,
  331. pdf_obj **inherit_vals)
  332. {
  333. pdf_walk_tree_imp(ctx, obj, kid_name, arrive, leave, arg, inherit_names, inherit_vals, NULL);
  334. }