pdf-run.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "pdf-annot-imp.h"
  24. static void
  25. pdf_run_annot_with_usage(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_annot *annot, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  26. {
  27. fz_matrix page_ctm;
  28. fz_rect mediabox;
  29. pdf_processor *proc = NULL;
  30. fz_default_colorspaces *default_cs = NULL;
  31. int flags;
  32. int resources_pushed = 0;
  33. int struct_parent_num;
  34. pdf_obj *struct_parent;
  35. fz_var(proc);
  36. fz_var(default_cs);
  37. fz_var(resources_pushed);
  38. if (cookie && page->super.incomplete)
  39. cookie->incomplete = 1;
  40. pdf_annot_push_local_xref(ctx, annot);
  41. /* Widgets only get displayed if they have both a T and a TF flag,
  42. * apparently */
  43. if (pdf_name_eq(ctx, pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
  44. {
  45. pdf_obj *ft = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(FT));
  46. pdf_obj *t = pdf_dict_get_inheritable(ctx, annot->obj, PDF_NAME(T));
  47. if (ft == NULL || t == NULL)
  48. {
  49. pdf_annot_pop_local_xref(ctx, annot);
  50. return;
  51. }
  52. }
  53. fz_try(ctx)
  54. {
  55. default_cs = pdf_load_default_colorspaces(ctx, doc, page);
  56. if (default_cs)
  57. fz_set_default_colorspaces(ctx, dev, default_cs);
  58. pdf_page_transform(ctx, page, &mediabox, &page_ctm);
  59. flags = pdf_dict_get_int(ctx, annot->obj, PDF_NAME(F));
  60. if (flags & PDF_ANNOT_IS_NO_ROTATE)
  61. {
  62. int rotate = pdf_dict_get_inheritable_int(ctx, page->obj, PDF_NAME(Rotate));
  63. fz_rect rect = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  64. fz_point tp = fz_transform_point_xy(rect.x0, rect.y1, page_ctm);
  65. page_ctm = fz_concat(page_ctm, fz_translate(-tp.x, -tp.y));
  66. page_ctm = fz_concat(page_ctm, fz_rotate(-rotate));
  67. page_ctm = fz_concat(page_ctm, fz_translate(tp.x, tp.y));
  68. }
  69. ctm = fz_concat(page_ctm, ctm);
  70. struct_parent = pdf_dict_getl(ctx, page->obj, PDF_NAME(StructParent), NULL);
  71. struct_parent_num = pdf_to_int_default(ctx, struct_parent, -1);
  72. proc = pdf_new_run_processor(ctx, page->doc, dev, ctm, struct_parent_num, usage, NULL, default_cs, cookie, NULL, NULL);
  73. pdf_processor_push_resources(ctx, proc, pdf_page_resources(ctx, annot->page));
  74. resources_pushed = 1;
  75. pdf_process_annot(ctx, proc, annot, cookie);
  76. pdf_close_processor(ctx, proc);
  77. }
  78. fz_always(ctx)
  79. {
  80. if (resources_pushed)
  81. pdf_processor_pop_resources(ctx, proc);
  82. pdf_drop_processor(ctx, proc);
  83. fz_drop_default_colorspaces(ctx, default_cs);
  84. pdf_annot_pop_local_xref(ctx, annot);
  85. }
  86. fz_catch(ctx)
  87. fz_rethrow(ctx);
  88. }
  89. static fz_rect pdf_page_cropbox(fz_context *ctx, pdf_page *page)
  90. {
  91. pdf_obj *obj = pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(CropBox));
  92. if (!obj)
  93. obj = pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(MediaBox));
  94. return pdf_to_rect(ctx, obj);
  95. }
  96. static fz_rect pdf_page_mediabox(fz_context *ctx, pdf_page *page)
  97. {
  98. return pdf_dict_get_inheritable_rect(ctx, page->obj, PDF_NAME(MediaBox));
  99. }
  100. static void
  101. pdf_run_page_contents_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  102. {
  103. fz_matrix page_ctm;
  104. pdf_obj *resources;
  105. pdf_obj *contents;
  106. fz_rect fitzbox;
  107. fz_rect mediabox, cropbox;
  108. pdf_processor *proc = NULL;
  109. fz_default_colorspaces *default_cs = NULL;
  110. fz_colorspace *colorspace = NULL;
  111. fz_path *path = NULL;
  112. int struct_parent_num;
  113. pdf_obj *struct_parent;
  114. fz_var(proc);
  115. fz_var(colorspace);
  116. fz_var(default_cs);
  117. fz_var(path);
  118. if (cookie && page->super.incomplete)
  119. cookie->incomplete = 1;
  120. fz_try(ctx)
  121. {
  122. default_cs = pdf_load_default_colorspaces(ctx, doc, page);
  123. if (default_cs)
  124. fz_set_default_colorspaces(ctx, dev, default_cs);
  125. pdf_page_transform(ctx, page, &fitzbox, &page_ctm);
  126. ctm = fz_concat(page_ctm, ctm);
  127. fitzbox = fz_transform_rect(fitzbox, ctm);
  128. resources = pdf_page_resources(ctx, page);
  129. contents = pdf_page_contents(ctx, page);
  130. mediabox = pdf_page_mediabox(ctx, page);
  131. cropbox = pdf_page_cropbox(ctx, page);
  132. if (page->transparency)
  133. {
  134. pdf_obj *group = pdf_page_group(ctx, page);
  135. if (group)
  136. {
  137. pdf_obj *cs = pdf_dict_get(ctx, group, PDF_NAME(CS));
  138. if (cs)
  139. {
  140. fz_try(ctx)
  141. colorspace = pdf_load_colorspace(ctx, cs);
  142. fz_catch(ctx)
  143. {
  144. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  145. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  146. fz_report_error(ctx);
  147. fz_warn(ctx, "Ignoring Page blending colorspace.");
  148. }
  149. if (!fz_is_valid_blend_colorspace(ctx, colorspace))
  150. {
  151. fz_warn(ctx, "Ignoring invalid Page blending colorspace: %s.", colorspace->name);
  152. fz_drop_colorspace(ctx, colorspace);
  153. colorspace = NULL;
  154. }
  155. }
  156. }
  157. else
  158. colorspace = fz_keep_colorspace(ctx, fz_default_output_intent(ctx, default_cs));
  159. fz_begin_group(ctx, dev, fitzbox, colorspace, 1, 0, 0, 1);
  160. }
  161. struct_parent = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents));
  162. struct_parent_num = pdf_to_int_default(ctx, struct_parent, -1);
  163. /* Clip content to CropBox if it is smaller than the MediaBox */
  164. if (cropbox.x0 > mediabox.x0 || cropbox.x1 < mediabox.x1 || cropbox.y0 > mediabox.y0 || cropbox.y1 < mediabox.y1)
  165. {
  166. path = fz_new_path(ctx);
  167. fz_rectto(ctx, path, cropbox.x0, cropbox.y0, cropbox.x1, cropbox.y1);
  168. fz_clip_path(ctx, dev, path, 1, ctm, fz_infinite_rect);
  169. }
  170. proc = pdf_new_run_processor(ctx, page->doc, dev, ctm, struct_parent_num, usage, NULL, default_cs, cookie, NULL, NULL);
  171. pdf_process_contents(ctx, proc, doc, resources, contents, cookie, NULL);
  172. pdf_close_processor(ctx, proc);
  173. if (cropbox.x0 > mediabox.x0 || cropbox.x1 < mediabox.x1 || cropbox.y0 > mediabox.y0 || cropbox.y1 < mediabox.y1)
  174. {
  175. fz_pop_clip(ctx, dev);
  176. }
  177. if (page->transparency)
  178. {
  179. fz_end_group(ctx, dev);
  180. }
  181. }
  182. fz_always(ctx)
  183. {
  184. fz_drop_path(ctx, path);
  185. pdf_drop_processor(ctx, proc);
  186. fz_drop_colorspace(ctx, colorspace);
  187. fz_drop_default_colorspaces(ctx, default_cs);
  188. }
  189. fz_catch(ctx)
  190. {
  191. fz_rethrow(ctx);
  192. }
  193. }
  194. void pdf_run_page_contents_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  195. {
  196. pdf_document *doc = page->doc;
  197. int nocache;
  198. nocache = !!(dev->hints & FZ_NO_CACHE);
  199. if (nocache)
  200. pdf_mark_xref(ctx, doc);
  201. fz_try(ctx)
  202. {
  203. pdf_run_page_contents_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  204. }
  205. fz_always(ctx)
  206. {
  207. if (nocache)
  208. pdf_clear_xref_to_mark(ctx, doc);
  209. }
  210. fz_catch(ctx)
  211. {
  212. fz_rethrow(ctx);
  213. }
  214. }
  215. void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  216. {
  217. pdf_run_page_contents_with_usage(ctx, page, dev, ctm, "View", cookie);
  218. }
  219. void pdf_run_annot(fz_context *ctx, pdf_annot *annot, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  220. {
  221. pdf_page *page = annot->page;
  222. pdf_document *doc;
  223. int nocache;
  224. if (!page)
  225. fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotation not bound to any page");
  226. doc = page->doc;
  227. nocache = !!(dev->hints & FZ_NO_CACHE);
  228. if (nocache)
  229. pdf_mark_xref(ctx, doc);
  230. fz_try(ctx)
  231. {
  232. pdf_run_annot_with_usage(ctx, doc, page, annot, dev, ctm, "View", cookie);
  233. }
  234. fz_always(ctx)
  235. {
  236. if (nocache)
  237. pdf_clear_xref_to_mark(ctx, doc);
  238. }
  239. fz_catch(ctx)
  240. {
  241. fz_rethrow(ctx);
  242. }
  243. }
  244. static void
  245. pdf_run_page_widgets_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  246. {
  247. pdf_annot *widget;
  248. if (cookie && cookie->progress_max != (size_t)-1)
  249. {
  250. int count = 1;
  251. for (widget = page->widgets; widget; widget = widget->next)
  252. count++;
  253. cookie->progress_max += count;
  254. }
  255. for (widget = page->widgets; widget; widget = widget->next)
  256. {
  257. /* Check the cookie for aborting */
  258. if (cookie)
  259. {
  260. if (cookie->abort)
  261. break;
  262. cookie->progress++;
  263. }
  264. pdf_run_annot_with_usage(ctx, doc, page, widget, dev, ctm, usage, cookie);
  265. }
  266. }
  267. static void
  268. pdf_run_page_annots_with_usage_imp(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  269. {
  270. pdf_annot *annot;
  271. if (cookie && cookie->progress_max != (size_t)-1)
  272. {
  273. int count = 1;
  274. for (annot = page->annots; annot; annot = annot->next)
  275. count++;
  276. cookie->progress_max += count;
  277. }
  278. for (annot = page->annots; annot; annot = annot->next)
  279. {
  280. /* Check the cookie for aborting */
  281. if (cookie)
  282. {
  283. if (cookie->abort)
  284. break;
  285. cookie->progress++;
  286. }
  287. pdf_run_annot_with_usage(ctx, doc, page, annot, dev, ctm, usage, cookie);
  288. }
  289. }
  290. void pdf_run_page_annots_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  291. {
  292. pdf_document *doc = page->doc;
  293. int nocache;
  294. nocache = !!(dev->hints & FZ_NO_CACHE);
  295. if (nocache)
  296. pdf_mark_xref(ctx, doc);
  297. fz_try(ctx)
  298. {
  299. pdf_run_page_annots_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  300. }
  301. fz_always(ctx)
  302. {
  303. if (nocache)
  304. pdf_clear_xref_to_mark(ctx, doc);
  305. }
  306. fz_catch(ctx)
  307. {
  308. fz_rethrow(ctx);
  309. }
  310. }
  311. void pdf_run_page_annots(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  312. {
  313. pdf_run_page_annots_with_usage(ctx, page, dev, ctm, "View", cookie);
  314. }
  315. void pdf_run_page_widgets_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  316. {
  317. pdf_document *doc = page->doc;
  318. int nocache;
  319. nocache = !!(dev->hints & FZ_NO_CACHE);
  320. if (nocache)
  321. pdf_mark_xref(ctx, doc);
  322. fz_try(ctx)
  323. {
  324. pdf_run_page_widgets_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  325. }
  326. fz_always(ctx)
  327. {
  328. if (nocache)
  329. pdf_clear_xref_to_mark(ctx, doc);
  330. }
  331. fz_catch(ctx)
  332. {
  333. fz_rethrow(ctx);
  334. }
  335. }
  336. void pdf_run_page_widgets(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  337. {
  338. pdf_run_page_widgets_with_usage(ctx, page, dev, ctm, "View", cookie);
  339. }
  340. void
  341. pdf_run_page_with_usage(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, const char *usage, fz_cookie *cookie)
  342. {
  343. pdf_document *doc = page->doc;
  344. int nocache = !!(dev->hints & FZ_NO_CACHE);
  345. if (nocache)
  346. pdf_mark_xref(ctx, doc);
  347. fz_try(ctx)
  348. {
  349. pdf_run_page_contents_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  350. pdf_run_page_annots_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  351. pdf_run_page_widgets_with_usage_imp(ctx, doc, page, dev, ctm, usage, cookie);
  352. }
  353. fz_always(ctx)
  354. {
  355. if (nocache)
  356. pdf_clear_xref_to_mark(ctx, doc);
  357. }
  358. fz_catch(ctx)
  359. {
  360. fz_rethrow(ctx);
  361. }
  362. }
  363. void
  364. pdf_run_page(fz_context *ctx, pdf_page *page, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  365. {
  366. pdf_run_page_with_usage(ctx, page, dev, ctm, "View", cookie);
  367. }
  368. void
  369. pdf_run_glyph(fz_context *ctx, pdf_document *doc, pdf_obj *resources, fz_buffer *contents, fz_device *dev, fz_matrix ctm, void *gstate, fz_default_colorspaces *default_cs, void *fill_gstate, void *stroke_gstate)
  370. {
  371. pdf_processor *proc;
  372. proc = pdf_new_run_processor(ctx, doc, dev, ctm, -1, "View", gstate, default_cs, NULL, fill_gstate, stroke_gstate);
  373. fz_try(ctx)
  374. {
  375. pdf_process_glyph(ctx, proc, doc, resources, contents);
  376. pdf_close_processor(ctx, proc);
  377. }
  378. fz_always(ctx)
  379. pdf_drop_processor(ctx, proc);
  380. fz_catch(ctx)
  381. fz_rethrow(ctx);
  382. }
  383. fz_structure
  384. pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag)
  385. {
  386. /* Perform Structure mapping to go from tag to standard. */
  387. if (role_map)
  388. {
  389. pdf_obj *o = pdf_dict_get(ctx, role_map, tag);
  390. if (o)
  391. tag = o;
  392. }
  393. if (pdf_name_eq(ctx, tag, PDF_NAME(Document)))
  394. return FZ_STRUCTURE_DOCUMENT;
  395. if (pdf_name_eq(ctx, tag, PDF_NAME(Part)))
  396. return FZ_STRUCTURE_PART;
  397. if (pdf_name_eq(ctx, tag, PDF_NAME(Art)))
  398. return FZ_STRUCTURE_ART;
  399. if (pdf_name_eq(ctx, tag, PDF_NAME(Sect)))
  400. return FZ_STRUCTURE_SECT;
  401. if (pdf_name_eq(ctx, tag, PDF_NAME(Div)))
  402. return FZ_STRUCTURE_DIV;
  403. if (pdf_name_eq(ctx, tag, PDF_NAME(BlockQuote)))
  404. return FZ_STRUCTURE_BLOCKQUOTE;
  405. if (pdf_name_eq(ctx, tag, PDF_NAME(Caption)))
  406. return FZ_STRUCTURE_CAPTION;
  407. if (pdf_name_eq(ctx, tag, PDF_NAME(TOC)))
  408. return FZ_STRUCTURE_TOC;
  409. if (pdf_name_eq(ctx, tag, PDF_NAME(TOCI)))
  410. return FZ_STRUCTURE_TOCI;
  411. if (pdf_name_eq(ctx, tag, PDF_NAME(Index)))
  412. return FZ_STRUCTURE_INDEX;
  413. if (pdf_name_eq(ctx, tag, PDF_NAME(NonStruct)))
  414. return FZ_STRUCTURE_NONSTRUCT;
  415. if (pdf_name_eq(ctx, tag, PDF_NAME(Private)))
  416. return FZ_STRUCTURE_PRIVATE;
  417. /* Grouping elements (PDF 2.0 - Table 364) */
  418. if (pdf_name_eq(ctx, tag, PDF_NAME(DocumentFragment)))
  419. return FZ_STRUCTURE_DOCUMENTFRAGMENT;
  420. /* Grouping elements (PDF 2.0 - Table 365) */
  421. if (pdf_name_eq(ctx, tag, PDF_NAME(Aside)))
  422. return FZ_STRUCTURE_ASIDE;
  423. /* Grouping elements (PDF 2.0 - Table 366) */
  424. if (pdf_name_eq(ctx, tag, PDF_NAME(Title)))
  425. return FZ_STRUCTURE_TITLE;
  426. if (pdf_name_eq(ctx, tag, PDF_NAME(FENote)))
  427. return FZ_STRUCTURE_FENOTE;
  428. /* Grouping elements (PDF 2.0 - Table 367) */
  429. if (pdf_name_eq(ctx, tag, PDF_NAME(Sub)))
  430. return FZ_STRUCTURE_SUB;
  431. /* Paragraphlike elements (PDF 1.7 - Table 10.21) */
  432. if (pdf_name_eq(ctx, tag, PDF_NAME(P)))
  433. return FZ_STRUCTURE_P;
  434. if (pdf_name_eq(ctx, tag, PDF_NAME(H)))
  435. return FZ_STRUCTURE_H;
  436. if (pdf_name_eq(ctx, tag, PDF_NAME(H1)))
  437. return FZ_STRUCTURE_H1;
  438. if (pdf_name_eq(ctx, tag, PDF_NAME(H2)))
  439. return FZ_STRUCTURE_H2;
  440. if (pdf_name_eq(ctx, tag, PDF_NAME(H3)))
  441. return FZ_STRUCTURE_H3;
  442. if (pdf_name_eq(ctx, tag, PDF_NAME(H4)))
  443. return FZ_STRUCTURE_H4;
  444. if (pdf_name_eq(ctx, tag, PDF_NAME(H5)))
  445. return FZ_STRUCTURE_H5;
  446. if (pdf_name_eq(ctx, tag, PDF_NAME(H6)))
  447. return FZ_STRUCTURE_H6;
  448. /* List elements (PDF 1.7 - Table 10.23) */
  449. if (pdf_name_eq(ctx, tag, PDF_NAME(L)))
  450. return FZ_STRUCTURE_LIST;
  451. if (pdf_name_eq(ctx, tag, PDF_NAME(LI)))
  452. return FZ_STRUCTURE_LISTITEM;
  453. if (pdf_name_eq(ctx, tag, PDF_NAME(Lbl)))
  454. return FZ_STRUCTURE_LABEL;
  455. if (pdf_name_eq(ctx, tag, PDF_NAME(LBody)))
  456. return FZ_STRUCTURE_LISTBODY;
  457. /* Table elements (PDF 1.7 - Table 10.24) */
  458. if (pdf_name_eq(ctx, tag, PDF_NAME(Table)))
  459. return FZ_STRUCTURE_TABLE;
  460. if (pdf_name_eq(ctx, tag, PDF_NAME(TR)))
  461. return FZ_STRUCTURE_TR;
  462. if (pdf_name_eq(ctx, tag, PDF_NAME(TH)))
  463. return FZ_STRUCTURE_TH;
  464. if (pdf_name_eq(ctx, tag, PDF_NAME(TD)))
  465. return FZ_STRUCTURE_TD;
  466. if (pdf_name_eq(ctx, tag, PDF_NAME(THead)))
  467. return FZ_STRUCTURE_THEAD;
  468. if (pdf_name_eq(ctx, tag, PDF_NAME(TBody)))
  469. return FZ_STRUCTURE_TBODY;
  470. if (pdf_name_eq(ctx, tag, PDF_NAME(TFoot)))
  471. return FZ_STRUCTURE_TFOOT;
  472. /* Inline elements (PDF 1.7 - Table 10.25) */
  473. if (pdf_name_eq(ctx, tag, PDF_NAME(Span)))
  474. return FZ_STRUCTURE_SPAN;
  475. if (pdf_name_eq(ctx, tag, PDF_NAME(Quote)))
  476. return FZ_STRUCTURE_QUOTE;
  477. if (pdf_name_eq(ctx, tag, PDF_NAME(Note)))
  478. return FZ_STRUCTURE_NOTE;
  479. if (pdf_name_eq(ctx, tag, PDF_NAME(Reference)))
  480. return FZ_STRUCTURE_REFERENCE;
  481. if (pdf_name_eq(ctx, tag, PDF_NAME(BibEntry)))
  482. return FZ_STRUCTURE_BIBENTRY;
  483. if (pdf_name_eq(ctx, tag, PDF_NAME(Code)))
  484. return FZ_STRUCTURE_CODE;
  485. if (pdf_name_eq(ctx, tag, PDF_NAME(Link)))
  486. return FZ_STRUCTURE_LINK;
  487. if (pdf_name_eq(ctx, tag, PDF_NAME(Annot)))
  488. return FZ_STRUCTURE_ANNOT;
  489. /* Inline elements (PDF 2.0 - Table 368) */
  490. if (pdf_name_eq(ctx, tag, PDF_NAME(Em)))
  491. return FZ_STRUCTURE_EM;
  492. if (pdf_name_eq(ctx, tag, PDF_NAME(Strong)))
  493. return FZ_STRUCTURE_STRONG;
  494. /* Ruby inline element (PDF 1.7 - Table 10.26) */
  495. if (pdf_name_eq(ctx, tag, PDF_NAME(Ruby)))
  496. return FZ_STRUCTURE_RUBY;
  497. if (pdf_name_eq(ctx, tag, PDF_NAME(RB)))
  498. return FZ_STRUCTURE_RB;
  499. if (pdf_name_eq(ctx, tag, PDF_NAME(RT)))
  500. return FZ_STRUCTURE_RT;
  501. if (pdf_name_eq(ctx, tag, PDF_NAME(RP)))
  502. return FZ_STRUCTURE_RP;
  503. /* Warichu inline element (PDF 1.7 - Table 10.26) */
  504. if (pdf_name_eq(ctx, tag, PDF_NAME(Warichu)))
  505. return FZ_STRUCTURE_WARICHU;
  506. if (pdf_name_eq(ctx, tag, PDF_NAME(WT)))
  507. return FZ_STRUCTURE_WT;
  508. if (pdf_name_eq(ctx, tag, PDF_NAME(WP)))
  509. return FZ_STRUCTURE_WP;
  510. /* Illustration elements (PDF 1.7 - Table 10.27) */
  511. if (pdf_name_eq(ctx, tag, PDF_NAME(Figure)))
  512. return FZ_STRUCTURE_FIGURE;
  513. if (pdf_name_eq(ctx, tag, PDF_NAME(Formula)))
  514. return FZ_STRUCTURE_FORMULA;
  515. if (pdf_name_eq(ctx, tag, PDF_NAME(Form)))
  516. return FZ_STRUCTURE_FORM;
  517. /* Artifact structure type (PDF 2.0 - Table 375) */
  518. if (pdf_name_eq(ctx, tag, PDF_NAME(Artifact)))
  519. return FZ_STRUCTURE_ARTIFACT;
  520. return FZ_STRUCTURE_INVALID;
  521. }
  522. /* The recursive descent of the structure tree uses an fz_try at each level.
  523. * At the risk of creating a foot cannon... "no one will need more than ~64
  524. * levels of structure tree". */
  525. static void
  526. run_ds(fz_context *ctx, fz_device *dev, pdf_obj *role_map, pdf_obj *obj, int idx, fz_cookie *cookie)
  527. {
  528. pdf_obj *k;
  529. int i, n;
  530. /* Check the cookie for aborting */
  531. if (cookie)
  532. {
  533. if (cookie->abort)
  534. return;
  535. cookie->progress++;
  536. }
  537. if (pdf_is_number(ctx, obj))
  538. {
  539. /* A marked-content identifier denoting a marked content sequence. WHAT? */
  540. return;
  541. }
  542. if (pdf_mark_obj(ctx, obj))
  543. return;
  544. fz_try(ctx)
  545. {
  546. fz_structure standard;
  547. pdf_obj *tag = pdf_dict_get(ctx, obj, PDF_NAME(S));
  548. if (!tag)
  549. break;
  550. standard = pdf_structure_type(ctx, role_map, tag);
  551. if (standard == FZ_STRUCTURE_INVALID)
  552. break;
  553. fz_begin_structure(ctx, dev, standard, pdf_to_name(ctx, tag), idx);
  554. k = pdf_dict_get(ctx, obj, PDF_NAME(K));
  555. if (k)
  556. {
  557. n = pdf_array_len(ctx, k);
  558. if (n == 0)
  559. run_ds(ctx, dev, role_map, k, 0, cookie);
  560. else
  561. {
  562. for (i = 0; i < n; i++)
  563. run_ds(ctx, dev, role_map, pdf_array_get(ctx, k, i), i, cookie);
  564. }
  565. }
  566. fz_end_structure(ctx, dev);
  567. }
  568. fz_always(ctx)
  569. pdf_unmark_obj(ctx, obj);
  570. fz_catch(ctx)
  571. fz_rethrow(ctx);
  572. }
  573. void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie)
  574. {
  575. int nocache;
  576. int marked = 0;
  577. pdf_obj *st, *rm, *k;
  578. fz_var(marked);
  579. nocache = !!(dev->hints & FZ_NO_CACHE);
  580. if (nocache)
  581. pdf_mark_xref(ctx, doc);
  582. fz_try(ctx)
  583. {
  584. st = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)), PDF_NAME(StructTreeRoot));
  585. rm = pdf_dict_get(ctx, st, PDF_NAME(RoleMap));
  586. if (pdf_mark_obj(ctx, st))
  587. break;
  588. marked = 1;
  589. k = pdf_dict_get(ctx, st, PDF_NAME(K));
  590. if (k)
  591. {
  592. int n = pdf_array_len(ctx, k);
  593. if (n == 0)
  594. run_ds(ctx, dev, rm, k, 0, cookie);
  595. else
  596. {
  597. int i;
  598. for (i = 0; i < n; i++)
  599. run_ds(ctx, dev, rm, pdf_array_get(ctx, k, i), i, cookie);
  600. }
  601. }
  602. }
  603. fz_always(ctx)
  604. {
  605. if (marked)
  606. pdf_unmark_obj(ctx, st);
  607. if (nocache)
  608. pdf_clear_xref_to_mark(ctx, doc);
  609. }
  610. fz_catch(ctx)
  611. fz_rethrow(ctx);
  612. }