| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622 |
- // Copyright (C) 2004-2024 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "xps-imp.h"
- #include <string.h>
- #include <stdlib.h>
- #define REL_START_PART \
- "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"
- #define REL_DOC_STRUCTURE \
- "http://schemas.microsoft.com/xps/2005/06/documentstructure"
- #define REL_REQUIRED_RESOURCE \
- "http://schemas.microsoft.com/xps/2005/06/required-resource"
- #define REL_REQUIRED_RESOURCE_RECURSIVE \
- "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive"
- #define REL_START_PART_OXPS \
- "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation"
- #define REL_DOC_STRUCTURE_OXPS \
- "http://schemas.openxps.org/oxps/v1.0/documentstructure"
- static void
- xps_rels_for_part(fz_context *ctx, xps_document *doc, char *buf, char *name, int buflen)
- {
- char *p, *basename;
- p = strrchr(name, '/');
- basename = p ? p + 1 : name;
- fz_strlcpy(buf, name, buflen);
- p = strrchr(buf, '/');
- if (p) *p = 0;
- fz_strlcat(buf, "/_rels/", buflen);
- fz_strlcat(buf, basename, buflen);
- fz_strlcat(buf, ".rels", buflen);
- }
- /*
- * The FixedDocumentSequence and FixedDocument parts determine
- * which parts correspond to actual pages, and the page order.
- */
- static void
- xps_add_fixed_document(fz_context *ctx, xps_document *doc, char *name)
- {
- xps_fixdoc *fixdoc;
- /* Check for duplicates first */
- for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
- if (!strcmp(fixdoc->name, name))
- return;
- fixdoc = fz_malloc_struct(ctx, xps_fixdoc);
- fz_try(ctx)
- {
- fixdoc->name = fz_strdup(ctx, name);
- fixdoc->outline = NULL;
- fixdoc->next = NULL;
- }
- fz_catch(ctx)
- {
- fz_free(ctx, fixdoc);
- fz_rethrow(ctx);
- }
- if (!doc->first_fixdoc)
- {
- doc->first_fixdoc = fixdoc;
- doc->last_fixdoc = fixdoc;
- }
- else
- {
- doc->last_fixdoc->next = fixdoc;
- doc->last_fixdoc = fixdoc;
- }
- }
- static void
- xps_add_fixed_page(fz_context *ctx, xps_document *doc, char *name, int width, int height)
- {
- xps_fixpage *page;
- /* Check for duplicates first */
- for (page = doc->first_page; page; page = page->next)
- if (!strcmp(page->name, name))
- return;
- page = fz_malloc_struct(ctx, xps_fixpage);
- page->name = NULL;
- fz_try(ctx)
- {
- page->name = fz_strdup(ctx, name);
- page->number = doc->page_count++;
- page->width = width;
- page->height = height;
- page->next = NULL;
- }
- fz_catch(ctx)
- {
- fz_free(ctx, page->name);
- fz_free(ctx, page);
- fz_rethrow(ctx);
- }
- if (!doc->first_page)
- {
- doc->first_page = page;
- doc->last_page = page;
- }
- else
- {
- doc->last_page->next = page;
- doc->last_page = page;
- }
- }
- static void
- xps_add_link_target(fz_context *ctx, xps_document *doc, char *name)
- {
- xps_fixpage *page = doc->last_page;
- xps_target *target;
- if (page == NULL)
- {
- fz_warn(ctx, "Dropping link target with no page");
- return;
- }
- target = fz_malloc_struct(ctx, xps_target);
- fz_try(ctx)
- {
- target->name = fz_strdup(ctx, name);
- target->page = page->number;
- target->next = doc->target;
- }
- fz_catch(ctx)
- {
- fz_free(ctx, target);
- fz_rethrow(ctx);
- }
- doc->target = target;
- }
- fz_link_dest
- xps_lookup_link_target(fz_context *ctx, fz_document *doc_, const char *target_uri)
- {
- xps_document *doc = (xps_document*)doc_;
- xps_target *target;
- const char *needle = strrchr(target_uri, '#');
- needle = needle ? needle + 1 : target_uri;
- for (target = doc->target; target; target = target->next)
- if (!strcmp(target->name, needle))
- return fz_make_link_dest_xyz(0, target->page, 0, 0, 0);
- return fz_make_link_dest_xyz(0, fz_atoi(needle) - 1, 0, 0, 0);
- }
- static void
- xps_drop_link_targets(fz_context *ctx, xps_document *doc)
- {
- xps_target *target = doc->target, *next;
- while (target)
- {
- next = target->next;
- fz_free(ctx, target->name);
- fz_free(ctx, target);
- target = next;
- }
- }
- static void
- xps_drop_fixed_pages(fz_context *ctx, xps_document *doc)
- {
- xps_fixpage *page = doc->first_page;
- while (page)
- {
- xps_fixpage *next = page->next;
- fz_free(ctx, page->name);
- fz_free(ctx, page);
- page = next;
- }
- doc->first_page = NULL;
- doc->last_page = NULL;
- }
- static void
- xps_drop_fixed_documents(fz_context *ctx, xps_document *doc)
- {
- xps_fixdoc *fixdoc = doc->first_fixdoc;
- while (fixdoc)
- {
- xps_fixdoc *next = fixdoc->next;
- fz_free(ctx, fixdoc->name);
- fz_free(ctx, fixdoc->outline);
- fz_free(ctx, fixdoc);
- fixdoc = next;
- }
- doc->first_fixdoc = NULL;
- doc->last_fixdoc = NULL;
- }
- void
- xps_drop_page_list(fz_context *ctx, xps_document *doc)
- {
- xps_drop_fixed_documents(ctx, doc);
- xps_drop_fixed_pages(ctx, doc);
- xps_drop_link_targets(ctx, doc);
- }
- /*
- * Parse the fixed document sequence structure and _rels/.rels to find the start part.
- */
- static void
- xps_parse_metadata_imp(fz_context *ctx, xps_document *doc, fz_xml *item, xps_fixdoc *fixdoc)
- {
- while (item)
- {
- if (fz_xml_is_tag(item, "Relationship"))
- {
- char *target = fz_xml_att(item, "Target");
- char *type = fz_xml_att(item, "Type");
- if (target && type)
- {
- char tgtbuf[1024];
- xps_resolve_url(ctx, doc, tgtbuf, doc->base_uri, target, sizeof tgtbuf);
- if (!strcmp(type, REL_START_PART) || !strcmp(type, REL_START_PART_OXPS))
- {
- fz_free(ctx, doc->start_part);
- doc->start_part = fz_strdup(ctx, tgtbuf);
- }
- if ((!strcmp(type, REL_DOC_STRUCTURE) || !strcmp(type, REL_DOC_STRUCTURE_OXPS)) && fixdoc)
- fixdoc->outline = fz_strdup(ctx, tgtbuf);
- if (!fz_xml_att(item, "Id"))
- fz_warn(ctx, "missing relationship id for %s", target);
- }
- }
- if (fz_xml_is_tag(item, "DocumentReference"))
- {
- char *source = fz_xml_att(item, "Source");
- if (source)
- {
- char srcbuf[1024];
- xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
- xps_add_fixed_document(ctx, doc, srcbuf);
- }
- }
- if (fz_xml_is_tag(item, "PageContent"))
- {
- char *source = fz_xml_att(item, "Source");
- char *width_att = fz_xml_att(item, "Width");
- char *height_att = fz_xml_att(item, "Height");
- int width = width_att ? atoi(width_att) : 0;
- int height = height_att ? atoi(height_att) : 0;
- if (source)
- {
- char srcbuf[1024];
- xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
- xps_add_fixed_page(ctx, doc, srcbuf, width, height);
- }
- }
- if (fz_xml_is_tag(item, "LinkTarget"))
- {
- char *name = fz_xml_att(item, "Name");
- if (name)
- xps_add_link_target(ctx, doc, name);
- }
- xps_parse_metadata_imp(ctx, doc, fz_xml_down(item), fixdoc);
- item = fz_xml_next(item);
- }
- }
- static void
- xps_parse_metadata(fz_context *ctx, xps_document *doc, xps_part *part, xps_fixdoc *fixdoc)
- {
- fz_xml_doc *xml;
- char buf[1024];
- char *s;
- /* Save directory name part */
- fz_strlcpy(buf, part->name, sizeof buf);
- s = strrchr(buf, '/');
- if (s)
- s[0] = 0;
- /* _rels parts are voodoo: their URI references are from
- * the part they are associated with, not the actual _rels
- * part being parsed.
- */
- s = strstr(buf, "/_rels");
- if (s)
- *s = 0;
- doc->base_uri = buf;
- doc->part_uri = part->name;
- xml = fz_parse_xml(ctx, part->data, 0);
- fz_try(ctx)
- {
- xps_parse_metadata_imp(ctx, doc, fz_xml_root(xml), fixdoc);
- }
- fz_always(ctx)
- {
- fz_drop_xml(ctx, xml);
- doc->base_uri = NULL;
- doc->part_uri = NULL;
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static void
- xps_read_and_process_metadata_part(fz_context *ctx, xps_document *doc, char *name, xps_fixdoc *fixdoc)
- {
- xps_part *part;
- if (!xps_has_part(ctx, doc, name))
- return;
- part = xps_read_part(ctx, doc, name);
- fz_try(ctx)
- {
- xps_parse_metadata(ctx, doc, part, fixdoc);
- }
- fz_always(ctx)
- {
- xps_drop_part(ctx, doc, part);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- void
- xps_read_page_list(fz_context *ctx, xps_document *doc)
- {
- xps_fixdoc *fixdoc;
- xps_read_and_process_metadata_part(ctx, doc, "/_rels/.rels", NULL);
- if (!doc->start_part)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find fixed document sequence start part");
- xps_read_and_process_metadata_part(ctx, doc, doc->start_part, NULL);
- for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
- {
- char relbuf[1024];
- fz_try(ctx)
- {
- xps_rels_for_part(ctx, doc, relbuf, fixdoc->name, sizeof relbuf);
- xps_read_and_process_metadata_part(ctx, doc, relbuf, fixdoc);
- }
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- fz_warn(ctx, "cannot process FixedDocument rels part");
- }
- xps_read_and_process_metadata_part(ctx, doc, fixdoc->name, fixdoc);
- }
- }
- int
- xps_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
- {
- xps_document *doc = (xps_document*)doc_;
- return doc->page_count;
- }
- static fz_xml_doc *
- xps_load_fixed_page(fz_context *ctx, xps_document *doc, xps_fixpage *page)
- {
- xps_part *part;
- fz_xml_doc *xml = NULL;
- fz_xml *root;
- char *width_att;
- char *height_att;
- part = xps_read_part(ctx, doc, page->name);
- fz_try(ctx)
- {
- xml = fz_parse_xml(ctx, part->data, 0);
- root = fz_xml_root(xml);
- if (!root)
- fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing root element");
- if (fz_xml_is_tag(root, "AlternateContent"))
- {
- fz_xml *node = xps_lookup_alternate_content(ctx, doc, root);
- if (!node)
- fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing alternate root element");
- fz_detach_xml(ctx, node);
- root = node;
- }
- if (!fz_xml_is_tag(root, "FixedPage"))
- fz_throw(ctx, FZ_ERROR_FORMAT, "expected FixedPage element");
- width_att = fz_xml_att(root, "Width");
- if (!width_att)
- fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Width");
- height_att = fz_xml_att(root, "Height");
- if (!height_att)
- fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Height");
- page->width = atoi(width_att);
- page->height = atoi(height_att);
- }
- fz_always(ctx)
- {
- xps_drop_part(ctx, doc, part);
- }
- fz_catch(ctx)
- {
- fz_drop_xml(ctx, xml);
- fz_rethrow(ctx);
- }
- return xml;
- }
- static fz_rect
- xps_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
- {
- xps_page *page = (xps_page*)page_;
- fz_rect bounds;
- bounds.x0 = bounds.y0 = 0;
- bounds.x1 = page->fix->width * 72.0f / 96.0f;
- bounds.y1 = page->fix->height * 72.0f / 96.0f;
- return bounds;
- }
- static void
- xps_drop_page_imp(fz_context *ctx, fz_page *page_)
- {
- xps_page *page = (xps_page*)page_;
- fz_drop_xml(ctx, page->xml);
- }
- fz_page *
- xps_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
- {
- xps_document *doc = (xps_document*)doc_;
- xps_page *page = NULL;
- xps_fixpage *fix;
- fz_xml_doc *xml;
- int n = 0;
- fz_var(page);
- for (fix = doc->first_page; fix; fix = fix->next)
- {
- if (n == number)
- {
- xml = xps_load_fixed_page(ctx, doc, fix);
- fz_try(ctx)
- {
- page = fz_new_derived_page(ctx, xps_page, doc_);
- page->super.load_links = xps_load_links;
- page->super.bound_page = xps_bound_page;
- page->super.run_page_contents = xps_run_page;
- page->super.drop_page = xps_drop_page_imp;
- page->fix = fix;
- page->xml = xml;
- }
- fz_catch(ctx)
- {
- fz_drop_xml(ctx, xml);
- fz_rethrow(ctx);
- }
- return (fz_page*)page;
- }
- n ++;
- }
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find page %d", number + 1);
- }
- static const char *xps_extensions[] =
- {
- "oxps",
- "xps",
- NULL
- };
- static const char *xps_mimetypes[] =
- {
- "application/oxps",
- "application/vnd.ms-xpsdocument",
- "application/xps",
- NULL
- };
- static int
- xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
- {
- fz_archive *arch = NULL;
- int ret = 0;
- fz_xml *xml = NULL;
- fz_xml *pos;
- if (state)
- *state = NULL;
- if (free_state)
- *free_state = NULL;
- fz_var(arch);
- fz_var(ret);
- fz_var(xml);
- fz_try(ctx)
- {
- int i, count;
- const char *name;
- if (stream == NULL)
- arch = fz_keep_archive(ctx, dir);
- else
- {
- arch = fz_try_open_archive_with_stream(ctx, stream);
- if (arch == NULL)
- break;
- }
- xml = fz_try_parse_xml_archive_entry(ctx, arch, "/_rels/.rels", 0);
- if (xml == NULL)
- xml = fz_try_parse_xml_archive_entry(ctx, arch, "\\_rels\\.rels", 0);
- if (xml)
- {
- pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
- if (pos)
- ret = 100;
- break;
- }
- /* Cope with tricksy XPS's have the rels in multiple bits. */
- count = fz_count_archive_entries(ctx, arch);
- for (i = 0; i < count; i++)
- {
- name = fz_list_archive_entry(ctx, arch, i);
- if (!name)
- continue;
- if (strncmp(name, "/_rels/.rels/", 13) == 0 ||
- strncmp(name, "_rels/.rels/", 12) == 0 ||
- strncmp(name, "\\_rels\\.rels\\", 13) == 0 ||
- strncmp(name, "_rels\\.rels\\", 12) == 0)
- {
- xml = fz_try_parse_xml_archive_entry(ctx, arch, name, 0);
- if (xml)
- {
- pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
- if (pos)
- {
- ret = 100;
- break;
- }
- fz_drop_xml(ctx, xml);
- xml = NULL;
- }
- }
- }
- }
- fz_always(ctx)
- {
- fz_drop_xml(ctx, xml);
- fz_drop_archive(ctx, arch);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- return ret;
- }
- static fz_document *
- xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
- {
- if (file)
- return xps_open_document_with_stream(ctx, file);
- else
- return xps_open_document_with_directory(ctx, dir);
- }
- fz_document_handler xps_document_handler =
- {
- NULL,
- xps_open,
- xps_extensions,
- xps_mimetypes,
- xps_recognize_doc_content
- };
|