pdf-label.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. #include <stdarg.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. typedef struct pdf_object_labels pdf_object_labels;
  28. typedef struct pdf_object_label_node pdf_object_label_node;
  29. struct pdf_object_label_node
  30. {
  31. int num;
  32. char *path;
  33. pdf_object_label_node *next;
  34. };
  35. struct pdf_object_labels
  36. {
  37. fz_pool *pool;
  38. int object_count;
  39. int root, info, encrypt;
  40. unsigned short *pages;
  41. char *seen;
  42. pdf_object_label_node **nodes;
  43. };
  44. static void
  45. add_object_label(fz_context *ctx, pdf_object_labels *g, char *path, int a, int b)
  46. {
  47. pdf_object_label_node *node, **root;
  48. node = fz_pool_alloc(ctx, g->pool, sizeof(pdf_object_label_node));
  49. node->path = fz_pool_strdup(ctx, g->pool, path);
  50. node->num = b;
  51. root = &g->nodes[a];
  52. node->next = *root;
  53. *root = node;
  54. }
  55. static void
  56. scan_object_label_rec(fz_context *ctx, pdf_object_labels *g, char *root_path, pdf_obj *obj, int top)
  57. {
  58. char path[100];
  59. int i, n;
  60. if (pdf_is_indirect(ctx, obj))
  61. ;
  62. else if (pdf_is_dict(ctx, obj))
  63. {
  64. n = pdf_dict_len(ctx, obj);
  65. for (i = 0; i < n; ++i)
  66. {
  67. pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
  68. pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
  69. if (val && key != PDF_NAME(Parent) && key != PDF_NAME(P) && key != PDF_NAME(Prev) && key != PDF_NAME(Last))
  70. {
  71. if (pdf_is_indirect(ctx, val))
  72. {
  73. fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
  74. add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
  75. }
  76. else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
  77. {
  78. fz_snprintf(path, sizeof path, "%s/%s", root_path, pdf_to_name(ctx, key));
  79. scan_object_label_rec(ctx, g, path, val, top);
  80. }
  81. }
  82. }
  83. }
  84. else if (pdf_is_array(ctx, obj))
  85. {
  86. n = pdf_array_len(ctx, obj);
  87. for (i = 0; i < n; ++i)
  88. {
  89. pdf_obj *val = pdf_array_get(ctx, obj, i);
  90. if (val)
  91. {
  92. if (pdf_is_indirect(ctx, val))
  93. {
  94. fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
  95. add_object_label(ctx, g, path, pdf_to_num(ctx, val), top);
  96. }
  97. else if (pdf_is_dict(ctx, val) || pdf_is_array(ctx, val))
  98. {
  99. fz_snprintf(path, sizeof path, "%s/%d", root_path, i+1);
  100. scan_object_label_rec(ctx, g, path, val, top);
  101. }
  102. }
  103. }
  104. }
  105. }
  106. static void
  107. scan_object_label(fz_context *ctx, pdf_document *doc, pdf_object_labels *g, int num)
  108. {
  109. pdf_obj *obj = pdf_load_object(ctx, doc, num);
  110. fz_try(ctx)
  111. scan_object_label_rec(ctx, g, "", obj, num);
  112. fz_always(ctx)
  113. pdf_drop_obj(ctx, obj);
  114. fz_catch(ctx)
  115. fz_rethrow(ctx);
  116. }
  117. pdf_object_labels *
  118. pdf_load_object_labels(fz_context *ctx, pdf_document *doc)
  119. {
  120. pdf_object_labels *g = NULL;
  121. fz_pool *pool;
  122. int i, n, page_count;
  123. n = pdf_count_objects(ctx, doc);
  124. pool = fz_new_pool(ctx);
  125. fz_try(ctx)
  126. {
  127. g = fz_pool_alloc(ctx, pool, sizeof(pdf_object_labels));
  128. g->pool = pool;
  129. g->object_count = n;
  130. g->root = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
  131. g->info = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
  132. g->encrypt = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
  133. g->seen = fz_pool_alloc(ctx, pool, n);
  134. g->nodes = fz_pool_alloc(ctx, pool, g->object_count * sizeof(pdf_object_label_node*));
  135. g->pages = fz_pool_alloc(ctx, pool, g->object_count * sizeof(unsigned short));
  136. page_count = pdf_count_pages(ctx, doc);
  137. for (i = 0; i < page_count; ++i)
  138. g->pages[pdf_to_num(ctx, pdf_lookup_page_obj(ctx, doc, i))] = i+1;
  139. for (i = 1; i < n; ++i)
  140. scan_object_label(ctx, doc, g, i);
  141. }
  142. fz_catch(ctx)
  143. {
  144. fz_drop_pool(ctx, pool);
  145. }
  146. return g;
  147. }
  148. void
  149. pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g)
  150. {
  151. if (g)
  152. fz_drop_pool(ctx, g->pool);
  153. }
  154. static char *
  155. prepend(char *path_buffer, char *path, const char *fmt, ...)
  156. {
  157. char buf[256];
  158. size_t z;
  159. va_list args;
  160. va_start(args, fmt);
  161. z = fz_vsnprintf(buf, sizeof(buf), fmt, args);
  162. va_end(args);
  163. /* We always want to leave ourselves at least 3 chars for
  164. * a future "..." */
  165. if (path_buffer + z + 3 <= path)
  166. {
  167. path -= z;
  168. memcpy(path, buf, z);
  169. return path;
  170. }
  171. /* Just put ... in now. */
  172. path -= 3;
  173. path[0] = '.';
  174. path[1] = '.';
  175. path[2] = '.';
  176. return path;
  177. }
  178. static void
  179. find_paths(fz_context *ctx, pdf_object_labels *g, int here, char *path_buffer, char *leaf_path, pdf_label_object_fn *callback, void *arg)
  180. {
  181. pdf_object_label_node *node;
  182. int next;
  183. if (here == g->root)
  184. {
  185. prepend(path_buffer, leaf_path, "trailer/Root");
  186. callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Root"));
  187. return;
  188. }
  189. if (here == g->info)
  190. {
  191. callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Info"));
  192. return;
  193. }
  194. if (here == g->encrypt)
  195. {
  196. callback(ctx, arg, prepend(path_buffer, leaf_path, "trailer/Encrypt"));
  197. return;
  198. }
  199. if (g->pages[here])
  200. {
  201. callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d", g->pages[here]));
  202. }
  203. for (node = g->nodes[here]; node; node = node->next)
  204. {
  205. next = node->num;
  206. if (next < 1 || next >= g->object_count)
  207. continue;
  208. if (g->seen[next])
  209. continue;
  210. if (g->pages[next])
  211. {
  212. callback(ctx, arg, prepend(path_buffer, leaf_path, "pages/%d%s", g->pages[next], node->path));
  213. }
  214. else
  215. {
  216. char *p = prepend(path_buffer, leaf_path, "%s", node->path);
  217. g->seen[next] = 1;
  218. // if we've run out of room in the path buffer, send this and stop.
  219. if (p[0] == '.' && p[1] == '.' && p[2] == '.')
  220. callback(ctx, arg, p);
  221. else
  222. find_paths(ctx, g, next, path_buffer, p, callback, arg);
  223. g->seen[next] = 0;
  224. }
  225. }
  226. }
  227. void
  228. pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg)
  229. {
  230. int i;
  231. char path[4096];
  232. if (num < 1 || num >= g->object_count)
  233. return;
  234. for (i = 1; i < g->object_count; ++i)
  235. g->seen[i] = 0;
  236. path[sizeof(path)-1] = 0;
  237. find_paths(ctx, g, num, path, &path[sizeof(path)-1], callback, arg);
  238. }