xps-doc.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "xps-imp.h"
  24. #include <string.h>
  25. #include <stdlib.h>
  26. #define REL_START_PART \
  27. "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"
  28. #define REL_DOC_STRUCTURE \
  29. "http://schemas.microsoft.com/xps/2005/06/documentstructure"
  30. #define REL_REQUIRED_RESOURCE \
  31. "http://schemas.microsoft.com/xps/2005/06/required-resource"
  32. #define REL_REQUIRED_RESOURCE_RECURSIVE \
  33. "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive"
  34. #define REL_START_PART_OXPS \
  35. "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation"
  36. #define REL_DOC_STRUCTURE_OXPS \
  37. "http://schemas.openxps.org/oxps/v1.0/documentstructure"
  38. static void
  39. xps_rels_for_part(fz_context *ctx, xps_document *doc, char *buf, char *name, int buflen)
  40. {
  41. char *p, *basename;
  42. p = strrchr(name, '/');
  43. basename = p ? p + 1 : name;
  44. fz_strlcpy(buf, name, buflen);
  45. p = strrchr(buf, '/');
  46. if (p) *p = 0;
  47. fz_strlcat(buf, "/_rels/", buflen);
  48. fz_strlcat(buf, basename, buflen);
  49. fz_strlcat(buf, ".rels", buflen);
  50. }
  51. /*
  52. * The FixedDocumentSequence and FixedDocument parts determine
  53. * which parts correspond to actual pages, and the page order.
  54. */
  55. static void
  56. xps_add_fixed_document(fz_context *ctx, xps_document *doc, char *name)
  57. {
  58. xps_fixdoc *fixdoc;
  59. /* Check for duplicates first */
  60. for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
  61. if (!strcmp(fixdoc->name, name))
  62. return;
  63. fixdoc = fz_malloc_struct(ctx, xps_fixdoc);
  64. fz_try(ctx)
  65. {
  66. fixdoc->name = fz_strdup(ctx, name);
  67. fixdoc->outline = NULL;
  68. fixdoc->next = NULL;
  69. }
  70. fz_catch(ctx)
  71. {
  72. fz_free(ctx, fixdoc);
  73. fz_rethrow(ctx);
  74. }
  75. if (!doc->first_fixdoc)
  76. {
  77. doc->first_fixdoc = fixdoc;
  78. doc->last_fixdoc = fixdoc;
  79. }
  80. else
  81. {
  82. doc->last_fixdoc->next = fixdoc;
  83. doc->last_fixdoc = fixdoc;
  84. }
  85. }
  86. static void
  87. xps_add_fixed_page(fz_context *ctx, xps_document *doc, char *name, int width, int height)
  88. {
  89. xps_fixpage *page;
  90. /* Check for duplicates first */
  91. for (page = doc->first_page; page; page = page->next)
  92. if (!strcmp(page->name, name))
  93. return;
  94. page = fz_malloc_struct(ctx, xps_fixpage);
  95. page->name = NULL;
  96. fz_try(ctx)
  97. {
  98. page->name = fz_strdup(ctx, name);
  99. page->number = doc->page_count++;
  100. page->width = width;
  101. page->height = height;
  102. page->next = NULL;
  103. }
  104. fz_catch(ctx)
  105. {
  106. fz_free(ctx, page->name);
  107. fz_free(ctx, page);
  108. fz_rethrow(ctx);
  109. }
  110. if (!doc->first_page)
  111. {
  112. doc->first_page = page;
  113. doc->last_page = page;
  114. }
  115. else
  116. {
  117. doc->last_page->next = page;
  118. doc->last_page = page;
  119. }
  120. }
  121. static void
  122. xps_add_link_target(fz_context *ctx, xps_document *doc, char *name)
  123. {
  124. xps_fixpage *page = doc->last_page;
  125. xps_target *target;
  126. if (page == NULL)
  127. {
  128. fz_warn(ctx, "Dropping link target with no page");
  129. return;
  130. }
  131. target = fz_malloc_struct(ctx, xps_target);
  132. fz_try(ctx)
  133. {
  134. target->name = fz_strdup(ctx, name);
  135. target->page = page->number;
  136. target->next = doc->target;
  137. }
  138. fz_catch(ctx)
  139. {
  140. fz_free(ctx, target);
  141. fz_rethrow(ctx);
  142. }
  143. doc->target = target;
  144. }
  145. fz_link_dest
  146. xps_lookup_link_target(fz_context *ctx, fz_document *doc_, const char *target_uri)
  147. {
  148. xps_document *doc = (xps_document*)doc_;
  149. xps_target *target;
  150. const char *needle = strrchr(target_uri, '#');
  151. needle = needle ? needle + 1 : target_uri;
  152. for (target = doc->target; target; target = target->next)
  153. if (!strcmp(target->name, needle))
  154. return fz_make_link_dest_xyz(0, target->page, 0, 0, 0);
  155. return fz_make_link_dest_xyz(0, fz_atoi(needle) - 1, 0, 0, 0);
  156. }
  157. static void
  158. xps_drop_link_targets(fz_context *ctx, xps_document *doc)
  159. {
  160. xps_target *target = doc->target, *next;
  161. while (target)
  162. {
  163. next = target->next;
  164. fz_free(ctx, target->name);
  165. fz_free(ctx, target);
  166. target = next;
  167. }
  168. }
  169. static void
  170. xps_drop_fixed_pages(fz_context *ctx, xps_document *doc)
  171. {
  172. xps_fixpage *page = doc->first_page;
  173. while (page)
  174. {
  175. xps_fixpage *next = page->next;
  176. fz_free(ctx, page->name);
  177. fz_free(ctx, page);
  178. page = next;
  179. }
  180. doc->first_page = NULL;
  181. doc->last_page = NULL;
  182. }
  183. static void
  184. xps_drop_fixed_documents(fz_context *ctx, xps_document *doc)
  185. {
  186. xps_fixdoc *fixdoc = doc->first_fixdoc;
  187. while (fixdoc)
  188. {
  189. xps_fixdoc *next = fixdoc->next;
  190. fz_free(ctx, fixdoc->name);
  191. fz_free(ctx, fixdoc->outline);
  192. fz_free(ctx, fixdoc);
  193. fixdoc = next;
  194. }
  195. doc->first_fixdoc = NULL;
  196. doc->last_fixdoc = NULL;
  197. }
  198. void
  199. xps_drop_page_list(fz_context *ctx, xps_document *doc)
  200. {
  201. xps_drop_fixed_documents(ctx, doc);
  202. xps_drop_fixed_pages(ctx, doc);
  203. xps_drop_link_targets(ctx, doc);
  204. }
  205. /*
  206. * Parse the fixed document sequence structure and _rels/.rels to find the start part.
  207. */
  208. static void
  209. xps_parse_metadata_imp(fz_context *ctx, xps_document *doc, fz_xml *item, xps_fixdoc *fixdoc)
  210. {
  211. while (item)
  212. {
  213. if (fz_xml_is_tag(item, "Relationship"))
  214. {
  215. char *target = fz_xml_att(item, "Target");
  216. char *type = fz_xml_att(item, "Type");
  217. if (target && type)
  218. {
  219. char tgtbuf[1024];
  220. xps_resolve_url(ctx, doc, tgtbuf, doc->base_uri, target, sizeof tgtbuf);
  221. if (!strcmp(type, REL_START_PART) || !strcmp(type, REL_START_PART_OXPS))
  222. {
  223. fz_free(ctx, doc->start_part);
  224. doc->start_part = fz_strdup(ctx, tgtbuf);
  225. }
  226. if ((!strcmp(type, REL_DOC_STRUCTURE) || !strcmp(type, REL_DOC_STRUCTURE_OXPS)) && fixdoc)
  227. fixdoc->outline = fz_strdup(ctx, tgtbuf);
  228. if (!fz_xml_att(item, "Id"))
  229. fz_warn(ctx, "missing relationship id for %s", target);
  230. }
  231. }
  232. if (fz_xml_is_tag(item, "DocumentReference"))
  233. {
  234. char *source = fz_xml_att(item, "Source");
  235. if (source)
  236. {
  237. char srcbuf[1024];
  238. xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
  239. xps_add_fixed_document(ctx, doc, srcbuf);
  240. }
  241. }
  242. if (fz_xml_is_tag(item, "PageContent"))
  243. {
  244. char *source = fz_xml_att(item, "Source");
  245. char *width_att = fz_xml_att(item, "Width");
  246. char *height_att = fz_xml_att(item, "Height");
  247. int width = width_att ? atoi(width_att) : 0;
  248. int height = height_att ? atoi(height_att) : 0;
  249. if (source)
  250. {
  251. char srcbuf[1024];
  252. xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
  253. xps_add_fixed_page(ctx, doc, srcbuf, width, height);
  254. }
  255. }
  256. if (fz_xml_is_tag(item, "LinkTarget"))
  257. {
  258. char *name = fz_xml_att(item, "Name");
  259. if (name)
  260. xps_add_link_target(ctx, doc, name);
  261. }
  262. xps_parse_metadata_imp(ctx, doc, fz_xml_down(item), fixdoc);
  263. item = fz_xml_next(item);
  264. }
  265. }
  266. static void
  267. xps_parse_metadata(fz_context *ctx, xps_document *doc, xps_part *part, xps_fixdoc *fixdoc)
  268. {
  269. fz_xml_doc *xml;
  270. char buf[1024];
  271. char *s;
  272. /* Save directory name part */
  273. fz_strlcpy(buf, part->name, sizeof buf);
  274. s = strrchr(buf, '/');
  275. if (s)
  276. s[0] = 0;
  277. /* _rels parts are voodoo: their URI references are from
  278. * the part they are associated with, not the actual _rels
  279. * part being parsed.
  280. */
  281. s = strstr(buf, "/_rels");
  282. if (s)
  283. *s = 0;
  284. doc->base_uri = buf;
  285. doc->part_uri = part->name;
  286. xml = fz_parse_xml(ctx, part->data, 0);
  287. fz_try(ctx)
  288. {
  289. xps_parse_metadata_imp(ctx, doc, fz_xml_root(xml), fixdoc);
  290. }
  291. fz_always(ctx)
  292. {
  293. fz_drop_xml(ctx, xml);
  294. doc->base_uri = NULL;
  295. doc->part_uri = NULL;
  296. }
  297. fz_catch(ctx)
  298. fz_rethrow(ctx);
  299. }
  300. static void
  301. xps_read_and_process_metadata_part(fz_context *ctx, xps_document *doc, char *name, xps_fixdoc *fixdoc)
  302. {
  303. xps_part *part;
  304. if (!xps_has_part(ctx, doc, name))
  305. return;
  306. part = xps_read_part(ctx, doc, name);
  307. fz_try(ctx)
  308. {
  309. xps_parse_metadata(ctx, doc, part, fixdoc);
  310. }
  311. fz_always(ctx)
  312. {
  313. xps_drop_part(ctx, doc, part);
  314. }
  315. fz_catch(ctx)
  316. {
  317. fz_rethrow(ctx);
  318. }
  319. }
  320. void
  321. xps_read_page_list(fz_context *ctx, xps_document *doc)
  322. {
  323. xps_fixdoc *fixdoc;
  324. xps_read_and_process_metadata_part(ctx, doc, "/_rels/.rels", NULL);
  325. if (!doc->start_part)
  326. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find fixed document sequence start part");
  327. xps_read_and_process_metadata_part(ctx, doc, doc->start_part, NULL);
  328. for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
  329. {
  330. char relbuf[1024];
  331. fz_try(ctx)
  332. {
  333. xps_rels_for_part(ctx, doc, relbuf, fixdoc->name, sizeof relbuf);
  334. xps_read_and_process_metadata_part(ctx, doc, relbuf, fixdoc);
  335. }
  336. fz_catch(ctx)
  337. {
  338. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  339. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  340. fz_report_error(ctx);
  341. fz_warn(ctx, "cannot process FixedDocument rels part");
  342. }
  343. xps_read_and_process_metadata_part(ctx, doc, fixdoc->name, fixdoc);
  344. }
  345. }
  346. int
  347. xps_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
  348. {
  349. xps_document *doc = (xps_document*)doc_;
  350. return doc->page_count;
  351. }
  352. static fz_xml_doc *
  353. xps_load_fixed_page(fz_context *ctx, xps_document *doc, xps_fixpage *page)
  354. {
  355. xps_part *part;
  356. fz_xml_doc *xml = NULL;
  357. fz_xml *root;
  358. char *width_att;
  359. char *height_att;
  360. part = xps_read_part(ctx, doc, page->name);
  361. fz_try(ctx)
  362. {
  363. xml = fz_parse_xml(ctx, part->data, 0);
  364. root = fz_xml_root(xml);
  365. if (!root)
  366. fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing root element");
  367. if (fz_xml_is_tag(root, "AlternateContent"))
  368. {
  369. fz_xml *node = xps_lookup_alternate_content(ctx, doc, root);
  370. if (!node)
  371. fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing alternate root element");
  372. fz_detach_xml(ctx, node);
  373. root = node;
  374. }
  375. if (!fz_xml_is_tag(root, "FixedPage"))
  376. fz_throw(ctx, FZ_ERROR_FORMAT, "expected FixedPage element");
  377. width_att = fz_xml_att(root, "Width");
  378. if (!width_att)
  379. fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Width");
  380. height_att = fz_xml_att(root, "Height");
  381. if (!height_att)
  382. fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Height");
  383. page->width = atoi(width_att);
  384. page->height = atoi(height_att);
  385. }
  386. fz_always(ctx)
  387. {
  388. xps_drop_part(ctx, doc, part);
  389. }
  390. fz_catch(ctx)
  391. {
  392. fz_drop_xml(ctx, xml);
  393. fz_rethrow(ctx);
  394. }
  395. return xml;
  396. }
  397. static fz_rect
  398. xps_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
  399. {
  400. xps_page *page = (xps_page*)page_;
  401. fz_rect bounds;
  402. bounds.x0 = bounds.y0 = 0;
  403. bounds.x1 = page->fix->width * 72.0f / 96.0f;
  404. bounds.y1 = page->fix->height * 72.0f / 96.0f;
  405. return bounds;
  406. }
  407. static void
  408. xps_drop_page_imp(fz_context *ctx, fz_page *page_)
  409. {
  410. xps_page *page = (xps_page*)page_;
  411. fz_drop_xml(ctx, page->xml);
  412. }
  413. fz_page *
  414. xps_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
  415. {
  416. xps_document *doc = (xps_document*)doc_;
  417. xps_page *page = NULL;
  418. xps_fixpage *fix;
  419. fz_xml_doc *xml;
  420. int n = 0;
  421. fz_var(page);
  422. for (fix = doc->first_page; fix; fix = fix->next)
  423. {
  424. if (n == number)
  425. {
  426. xml = xps_load_fixed_page(ctx, doc, fix);
  427. fz_try(ctx)
  428. {
  429. page = fz_new_derived_page(ctx, xps_page, doc_);
  430. page->super.load_links = xps_load_links;
  431. page->super.bound_page = xps_bound_page;
  432. page->super.run_page_contents = xps_run_page;
  433. page->super.drop_page = xps_drop_page_imp;
  434. page->fix = fix;
  435. page->xml = xml;
  436. }
  437. fz_catch(ctx)
  438. {
  439. fz_drop_xml(ctx, xml);
  440. fz_rethrow(ctx);
  441. }
  442. return (fz_page*)page;
  443. }
  444. n ++;
  445. }
  446. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find page %d", number + 1);
  447. }
  448. static const char *xps_extensions[] =
  449. {
  450. "oxps",
  451. "xps",
  452. NULL
  453. };
  454. static const char *xps_mimetypes[] =
  455. {
  456. "application/oxps",
  457. "application/vnd.ms-xpsdocument",
  458. "application/xps",
  459. NULL
  460. };
  461. static int
  462. xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
  463. {
  464. fz_archive *arch = NULL;
  465. int ret = 0;
  466. fz_xml *xml = NULL;
  467. fz_xml *pos;
  468. if (state)
  469. *state = NULL;
  470. if (free_state)
  471. *free_state = NULL;
  472. fz_var(arch);
  473. fz_var(ret);
  474. fz_var(xml);
  475. fz_try(ctx)
  476. {
  477. int i, count;
  478. const char *name;
  479. if (stream == NULL)
  480. arch = fz_keep_archive(ctx, dir);
  481. else
  482. {
  483. arch = fz_try_open_archive_with_stream(ctx, stream);
  484. if (arch == NULL)
  485. break;
  486. }
  487. xml = fz_try_parse_xml_archive_entry(ctx, arch, "/_rels/.rels", 0);
  488. if (xml == NULL)
  489. xml = fz_try_parse_xml_archive_entry(ctx, arch, "\\_rels\\.rels", 0);
  490. if (xml)
  491. {
  492. pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
  493. if (pos)
  494. ret = 100;
  495. break;
  496. }
  497. /* Cope with tricksy XPS's have the rels in multiple bits. */
  498. count = fz_count_archive_entries(ctx, arch);
  499. for (i = 0; i < count; i++)
  500. {
  501. name = fz_list_archive_entry(ctx, arch, i);
  502. if (!name)
  503. continue;
  504. if (strncmp(name, "/_rels/.rels/", 13) == 0 ||
  505. strncmp(name, "_rels/.rels/", 12) == 0 ||
  506. strncmp(name, "\\_rels\\.rels\\", 13) == 0 ||
  507. strncmp(name, "_rels\\.rels\\", 12) == 0)
  508. {
  509. xml = fz_try_parse_xml_archive_entry(ctx, arch, name, 0);
  510. if (xml)
  511. {
  512. pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
  513. if (pos)
  514. {
  515. ret = 100;
  516. break;
  517. }
  518. fz_drop_xml(ctx, xml);
  519. xml = NULL;
  520. }
  521. }
  522. }
  523. }
  524. fz_always(ctx)
  525. {
  526. fz_drop_xml(ctx, xml);
  527. fz_drop_archive(ctx, arch);
  528. }
  529. fz_catch(ctx)
  530. fz_rethrow(ctx);
  531. return ret;
  532. }
  533. static fz_document *
  534. xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
  535. {
  536. if (file)
  537. return xps_open_document_with_stream(ctx, file);
  538. else
  539. return xps_open_document_with_directory(ctx, dir);
  540. }
  541. fz_document_handler xps_document_handler =
  542. {
  543. NULL,
  544. xps_open,
  545. xps_extensions,
  546. xps_mimetypes,
  547. xps_recognize_doc_content
  548. };