| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655 |
- // Copyright (C) 2022-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "html-imp.h"
- #include "string.h"
- fz_xml *fz_story_document(fz_context *ctx, fz_story *story)
- {
- if (story == NULL || story->dom == NULL)
- return NULL;
- return story->dom;
- }
- fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom)
- {
- if (dom == NULL)
- return NULL;
- return fz_xml_find_dfs(dom, "body", NULL, NULL);
- }
- fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom)
- {
- if (dom == NULL)
- return NULL;
- while (dom->up)
- dom = dom->up;
- return dom->down;
- }
- static fz_xml *
- doc_pointer(fz_xml *a)
- {
- while (a->up)
- a = a->up;
- return a;
- }
- static void
- check_same_doc(fz_context *ctx, fz_xml *a, fz_xml *b)
- {
- /* Sanity check: The child and parent must come from the same doc. */
- if (doc_pointer(a) != doc_pointer(b))
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Parent and child must be from the same document");
- }
- /* Helper function to skip forward if we are passed a
- * doc pointer in circumstances where we should not be. */
- static fz_xml *
- skip_doc_pointer(fz_xml *x)
- {
- return (x == NULL || !FZ_DOCUMENT_ITEM(x)) ? x : x->down;
- }
- fz_xml *
- fz_new_dom(fz_context *ctx, const char *tag)
- {
- fz_pool *pool = fz_new_pool(ctx);
- fz_xml *xml;
- fz_try(ctx)
- {
- xml = fz_pool_alloc(ctx, pool, sizeof *xml);
- xml->up = NULL;
- xml->down = NULL;
- xml->u.doc.refs = 1;
- xml->u.doc.pool = pool;
- xml->down = fz_new_dom_node(ctx, xml, tag);
- xml->down->up = xml;
- }
- fz_catch(ctx)
- {
- fz_drop_pool(ctx, pool);
- fz_rethrow(ctx);
- }
- return xml->down;
- }
- fz_xml *
- fz_new_dom_node(fz_context *ctx, fz_xml *dom, const char *tag)
- {
- const char *ns;
- fz_xml *xml;
- size_t size;
- dom = doc_pointer(dom);
- /* skip namespace prefix */
- for (ns = tag; *ns; ++ns)
- if (*ns == ':')
- tag = ns + 1;
- size = offsetof(fz_xml, u.node.u.d.name) + ns-tag+1;
- xml = fz_pool_alloc(ctx, dom->u.doc.pool, size);
- memcpy(xml->u.node.u.d.name, tag, ns-tag+1);
- xml->u.node.u.d.atts = NULL;
- xml->down = NULL;
- xml->up = dom;
- xml->u.node.next = NULL;
- xml->u.node.prev = NULL;
- #ifdef FZ_XML_SEQ
- /* We don't have sequence numbers here. */
- xml->seq = 0;
- #endif
- return xml;
- }
- fz_xml *
- fz_new_dom_text_node(fz_context *ctx, fz_xml *dom, const char *text)
- {
- fz_xml *xml;
- size_t len = text ? strlen(text) : 0;
- size_t size;
- dom = doc_pointer(dom);
- size = offsetof(fz_xml, u.node.u.text) + len + 1;
- xml = fz_pool_alloc(ctx, dom->u.doc.pool, size);
- if (text)
- memcpy(xml->u.node.u.text, text, len);
- xml->u.node.u.text[len] = 0;
- xml->down = MAGIC_TEXT;
- xml->up = dom;
- xml->u.node.next = NULL;
- xml->u.node.prev = NULL;
- #ifdef FZ_XML_SEQ
- /* We don't have sequence numbers here. */
- xml->u.node.seq = 0;
- #endif
- return xml;
- }
- static fz_xml *
- clone_xml(fz_context *ctx, fz_xml *dom, fz_xml *node)
- {
- fz_xml *clone;
- struct attribute **dst;
- struct attribute *attr;
- fz_xml *child, *prev;
- if (dom == NULL || node == NULL)
- return NULL;
- /* Text nodes are simple. No children. */
- if (FZ_TEXT_ITEM(node))
- {
- return fz_new_dom_text_node(ctx, dom, node->u.node.u.text);
- }
- /* Clone a non-text node. */
- clone = fz_new_dom_node(ctx, dom, node->u.node.u.d.name);
- /* Clone the attributes. */
- attr = node->u.node.u.d.atts;
- dst = &clone->u.node.u.d.atts;
- while (attr)
- {
- size_t len = strlen(attr->name) + 1;
- size_t size = offsetof(struct attribute, name) + len;
- struct attribute *a = fz_pool_alloc(ctx, dom->u.doc.pool, size);
- memcpy(a->name, attr->name, len);
- a->next = NULL;
- a->value = NULL;
- if (attr->value)
- {
- a->value = fz_pool_alloc(ctx, dom->u.doc.pool, strlen(attr->value)+1);
- strcpy(a->value, attr->value);
- }
- *dst = a;
- dst = &a->next;
- attr = attr->next;
- }
- /* If we have no children, we're done. */
- if (node->down == NULL)
- return clone;
- /* Copy the first child. */
- clone->down = clone_xml(ctx, dom, node->down);
- clone->down->up = clone;
- /* And then run along all the successive children. */
- prev = clone->down;
- child = node->down->u.node.next;
- while (child)
- {
- prev->u.node.next = clone_xml(ctx, dom, child);
- prev->u.node.prev = prev;
- prev = prev->u.node.next;
- prev->up = clone;
- child = child->u.node.next;
- }
- return clone;
- }
- fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt)
- {
- fz_xml *dom;
- if (elt == NULL)
- return NULL;
- /* We shouldn't be passed a document item really, but
- * cope. */
- if (FZ_DOCUMENT_ITEM(elt))
- elt = elt->down;
- /* Find the document pointer. */
- dom = elt;
- while (dom->up)
- dom = dom->up;
- return clone_xml(ctx, dom, elt);
- }
- fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag)
- {
- if (dom == NULL || tag == NULL)
- return NULL;
- /* We make a new node, unconnected to anything else.
- * up will still point to the dom root though. */
- return fz_new_dom_node(ctx, dom, tag);
- }
- fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text)
- {
- if (dom == NULL || text == NULL)
- return NULL;
- /* We make a new node, unconnected to anything else. */
- return fz_new_dom_text_node(ctx, dom, text);
- }
- fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match)
- {
- if (elt == NULL)
- return NULL;
- return fz_xml_find_dfs(elt, tag, att, match);
- }
- fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match)
- {
- if (elt == NULL)
- return NULL;
- return fz_xml_find_next_dfs(elt, tag, att, match);
- }
- void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child)
- {
- fz_xml *x;
- child = skip_doc_pointer(child);
- if (parent == NULL || child == NULL)
- return;
- check_same_doc(ctx, parent, child);
- /* Sanity checks: We can't add child to parent if parent is
- * a child of child. */
- x = parent;
- while (x)
- {
- if (x == child)
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't add a parent to its child.");
- x = x->up;
- }
- /* First unlink child from anywhere it's currently linked in. */
- if (child->u.node.prev)
- child->u.node.prev->u.node.next = child->u.node.next;
- else if (child->up->down == child && !FZ_DOCUMENT_ITEM(child->up))
- child->up->down = child->u.node.next;
- if (child->u.node.next)
- child->u.node.next->u.node.prev = child->u.node.prev;
- child->u.node.next = NULL;
- child->u.node.prev = NULL;
- /* Now find where to insert the child. */
- if (parent->down == NULL)
- {
- /* Insert as first (and only) child. */
- parent->down = child;
- }
- else
- {
- /* Find x, the current last child. */
- x = parent->down;
- while (x->u.node.next)
- x = x->u.node.next;
- /* And insert xchild after that. */
- x->u.node.next = child;
- child->u.node.prev = x;
- }
- child->up = parent;
- }
- void fz_dom_insert_before(fz_context *ctx, fz_xml *existing, fz_xml *elt)
- {
- fz_xml *x;
- existing = skip_doc_pointer(existing);
- elt = skip_doc_pointer(elt);
- if (existing == NULL || elt == NULL)
- return;
- check_same_doc(ctx, existing, elt);
- /* Sanity check: We can't add elt before existing if existing is
- * a child of elt. */
- x = existing;
- while (x)
- {
- if (x == elt)
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't add a node before its child.");
- x = x->up;
- }
- /* First unlink elt from anywhere it's currently linked in. */
- if (elt->u.node.prev)
- elt->u.node.prev->u.node.next = elt->u.node.next;
- else if (elt->up && !FZ_DOCUMENT_ITEM(elt->up))
- elt->up->down = elt->u.node.next;
- if (elt->u.node.next)
- elt->u.node.next->u.node.prev = elt->u.node.prev;
- elt->u.node.next = NULL;
- elt->u.node.prev = NULL;
- elt->up = NULL;
- /* Now insert the element */
- elt->u.node.prev = existing->u.node.prev;
- if (elt->u.node.prev)
- elt->u.node.prev->u.node.next = elt;
- else if (existing->up && !FZ_DOCUMENT_ITEM(existing->up))
- existing->up->down = elt;
- elt->u.node.next = existing;
- existing->u.node.prev = elt;
- elt->up = existing->up;
- }
- void fz_dom_insert_after(fz_context *ctx, fz_xml *existing, fz_xml *elt)
- {
- fz_xml *x;
- existing = skip_doc_pointer(existing);
- elt = skip_doc_pointer(elt);
- if (existing == NULL || elt == NULL)
- return;
- check_same_doc(ctx, existing, elt);
- /* Sanity check: We can't add elt before existing if existing is
- * a child of elt. */
- x = existing;
- while (x)
- {
- if (x == elt)
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't add a node after its child.");
- x = x->up;
- }
- /* First unlink child from anywhere it's currently linked in. */
- if (elt->u.node.prev)
- elt->u.node.prev->u.node.next = elt->u.node.next;
- else if (elt->up && !FZ_DOCUMENT_ITEM(elt->up))
- elt->up->down = elt->u.node.next;
- if (elt->u.node.next)
- elt->u.node.next->u.node.prev = elt->u.node.prev;
- elt->u.node.next = NULL;
- elt->u.node.prev = NULL;
- /* Now insert the element */
- elt->u.node.next = existing->u.node.next;
- if (elt->u.node.next)
- elt->u.node.next->u.node.prev = elt;
- elt->u.node.prev = existing;
- existing->u.node.next = elt;
- elt->up = existing->up;
- }
- void fz_dom_remove(fz_context *ctx, fz_xml *elt)
- {
- elt = skip_doc_pointer(elt);
- if (elt == NULL)
- return;
- /* Unlink child from anywhere it's currently linked in. */
- if (elt->u.node.prev)
- elt->u.node.prev->u.node.next = elt->u.node.next;
- else if (elt->up && !FZ_DOCUMENT_ITEM(elt))
- elt->up->down = elt->u.node.next;
- if (elt->u.node.next)
- elt->u.node.next->u.node.prev = elt->u.node.prev;
- elt->u.node.next = NULL;
- elt->u.node.prev = NULL;
- elt->up = doc_pointer(elt);
- }
- fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt)
- {
- elt = skip_doc_pointer(elt);
- if (elt == NULL || FZ_TEXT_ITEM(elt))
- return NULL;
- return elt->down;
- }
- fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt)
- {
- elt = skip_doc_pointer(elt);
- if (elt == NULL)
- return NULL;
- if (FZ_DOCUMENT_ITEM(elt->up))
- return NULL;
- return elt->up;
- }
- fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt)
- {
- elt = skip_doc_pointer(elt);
- if (elt == NULL)
- return NULL;
- return elt->u.node.next;
- }
- fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt)
- {
- elt = skip_doc_pointer(elt);
- if (elt == NULL)
- return NULL;
- return elt->u.node.prev;
- }
- void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value)
- {
- struct attribute *attr;
- size_t len, size;
- char *mvalue = NULL;
- fz_xml *doc;
- elt = skip_doc_pointer(elt);
- if (elt == NULL || att == NULL)
- return;
- if (FZ_TEXT_ITEM(elt))
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot add attributes to text node.");
- /* Move value to being a malloced thing, with the entity parsing done. */
- if (value) {
- char *d;
- const char *s = value;
- d = mvalue = fz_malloc(ctx, strlen(value)+1);
- while (*s)
- {
- if (*s == '&') {
- int c;
- s += xml_parse_entity(&c, s);
- d += fz_runetochar(d, c);
- }
- else
- *d++ = *s++;
- }
- *d = 0;
- }
- /* Do we have an attribute we can reuse? */
- attr = elt->u.node.u.d.atts;
- while (attr)
- {
- if (strcmp(att, attr->name) == 0)
- {
- /* Reuse this one. */
- break;
- }
- attr = attr->next;
- }
- if (attr && attr->value)
- {
- if (mvalue == NULL)
- {
- /* Just rewrite the existing value to be NULL. This
- * 'leaks' the old value within the pool, so it will
- * be cleaned up at the end. */
- attr->value = NULL;
- return;
- }
- if (strcmp(mvalue, attr->value) == 0)
- {
- /* Old and new values match. Nothing to change. */
- return;
- }
- }
- doc = doc_pointer(elt);
- /* Move mvalue to be an fz_pool thing. */
- if (mvalue)
- {
- char *tmp;
- fz_try(ctx)
- {
- tmp = fz_pool_alloc(ctx, doc->u.doc.pool, strlen(mvalue)+1);
- strcpy(tmp, mvalue);
- }
- fz_always(ctx)
- fz_free(ctx, mvalue);
- fz_catch(ctx)
- fz_rethrow(ctx);
- mvalue = tmp;
- }
- /* Make a new one and prepend it. */
- len = strlen(att) + 1;
- size = offsetof(struct attribute, name) + len;
- attr = fz_pool_alloc(ctx, doc->u.doc.pool, size);
- memcpy(attr->name, att, len);
- attr->next = elt->u.node.u.d.atts;
- elt->u.node.u.d.atts = attr;
- attr->value = mvalue;
- }
- void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att)
- {
- struct attribute **attr;
- elt = skip_doc_pointer(elt);
- if (elt == NULL || att == NULL)
- return;
- if (FZ_TEXT_ITEM(elt))
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot add attributes to text node.");
- attr = &elt->u.node.u.d.atts;
- while (*attr)
- {
- if (strcmp(att, (*attr)->name) == 0)
- {
- /* Delete this one. */
- /* The old attr/value are 'leaked' within the pool. */
- *attr = (*attr)->next;
- break;
- }
- attr = &(*attr)->next;
- }
- }
- const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att)
- {
- struct attribute *attr;
- elt = skip_doc_pointer(elt);
- if (elt == NULL || att == NULL)
- return NULL;
- /* Text nodes don't have attributes. */
- if (FZ_TEXT_ITEM(elt))
- return NULL;
- attr = elt->u.node.u.d.atts;
- while (attr)
- {
- if (strcmp(att, attr->name) == 0)
- {
- /* Found! */
- return attr->value;
- }
- }
- return NULL;
- }
- const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att)
- {
- struct attribute *attr;
- if (elt == NULL || att == NULL)
- {
- if (att)
- *att = NULL;
- return NULL;
- }
- /* Text nodes don't have attributes. */
- if (FZ_TEXT_ITEM(elt) || i < 0)
- {
- *att = NULL;
- return NULL;
- }
- attr = elt->u.node.u.d.atts;
- while (attr)
- {
- if (i == 0)
- {
- /* Found! */
- *att = attr->name;
- return attr->value;
- }
- i--;
- attr = attr->next;
- }
- *att = NULL;
- return NULL;
- }
|