| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- // Copyright (C) 2004-2024 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "html-imp.h"
- #include <string.h>
- enum { T, R, B, L };
- static int is_internal_uri(const char *uri)
- {
- while (*uri >= 'a' && *uri <= 'z')
- ++uri;
- if (uri[0] == ':' && uri[1] == '/' && uri[2] == '/')
- return 0;
- return 1;
- }
- static fz_link *load_link_flow(fz_context *ctx, fz_html_flow *flow, fz_link *head, int page, float page_h, const char *dir, const char *file)
- {
- fz_link *link;
- fz_html_flow *next;
- char path[2048];
- fz_rect bbox;
- const char *dest;
- const char *href;
- float end;
- float page_y0 = page * page_h;
- float page_y1 = (page + 1) * page_h;
- while (flow)
- {
- next = flow->next;
- if (flow->y >= page_y0 && flow->y <= page_y1)
- {
- href = flow->box->href;
- if (href)
- {
- /* Coalesce contiguous flow boxes into one link node */
- end = flow->x + flow->w;
- while (next &&
- next->y == flow->y &&
- next->h == flow->h &&
- next->box->href == href)
- {
- end = next->x + next->w;
- next = next->next;
- }
- bbox.x0 = flow->x;
- bbox.y0 = flow->y - page * page_h;
- bbox.x1 = end;
- bbox.y1 = bbox.y0 + flow->h;
- if (flow->type != FLOW_IMAGE)
- {
- /* flow->y is the baseline, adjust bbox appropriately */
- bbox.y0 -= 0.8f * flow->h;
- bbox.y1 -= 0.8f * flow->h;
- }
- if (is_internal_uri(href))
- {
- if (href[0] == '#')
- {
- fz_strlcpy(path, file, sizeof path);
- fz_strlcat(path, href, sizeof path);
- }
- else
- {
- fz_strlcpy(path, dir, sizeof path);
- fz_strlcat(path, "/", sizeof path);
- fz_strlcat(path, href, sizeof path);
- }
- fz_urldecode(path);
- fz_cleanname(path);
- dest = path;
- }
- else
- {
- dest = href;
- }
- link = fz_new_derived_link(ctx, fz_link, bbox, dest);
- link->next = head;
- head = link;
- }
- }
- flow = next;
- }
- return head;
- }
- static fz_link *load_link_box(fz_context *ctx, fz_html_box *box, fz_link *head, int page, float page_h, const char *dir, const char *file)
- {
- while (box)
- {
- if (box->type == BOX_FLOW)
- head = load_link_flow(ctx, box->u.flow.head, head, page, page_h, dir, file);
- if (box->down)
- head = load_link_box(ctx, box->down, head, page, page_h, dir, file);
- box = box->next;
- }
- return head;
- }
- fz_link *
- fz_load_html_links(fz_context *ctx, fz_html *html, int page, const char *file)
- {
- fz_link *link, *head;
- char dir[2048];
- fz_dirname(dir, file, sizeof dir);
- head = load_link_box(ctx, html->tree.root, NULL, page, html->page_h, dir, file);
- for (link = head; link; link = link->next)
- {
- /* Adjust for page margins */
- link->rect.x0 += html->page_margin[L];
- link->rect.x1 += html->page_margin[L];
- link->rect.y0 += html->page_margin[T];
- link->rect.y1 += html->page_margin[T];
- }
- return head;
- }
- static fz_html_flow *
- find_first_content(fz_html_box *box)
- {
- while (box)
- {
- if (box->type == BOX_FLOW)
- return box->u.flow.head;
- box = box->down;
- }
- return NULL;
- }
- static float
- find_flow_target(fz_html_flow *flow, const char *id)
- {
- while (flow)
- {
- if (flow->box->id && !strcmp(id, flow->box->id))
- return flow->y;
- flow = flow->next;
- }
- return -1;
- }
- static float
- find_box_target(fz_html_box *box, const char *id)
- {
- float y;
- while (box)
- {
- if (box->id && !strcmp(id, box->id))
- {
- fz_html_flow *flow = find_first_content(box);
- if (flow)
- return flow->y;
- return box->s.layout.y;
- }
- if (box->type == BOX_FLOW)
- {
- y = find_flow_target(box->u.flow.head, id);
- if (y >= 0)
- return y;
- }
- else
- {
- y = find_box_target(box->down, id);
- if (y >= 0)
- return y;
- }
- box = box->next;
- }
- return -1;
- }
- float
- fz_find_html_target(fz_context *ctx, fz_html *html, const char *id)
- {
- return find_box_target(html->tree.root, id);
- }
- static fz_html_flow *
- make_flow_bookmark(fz_context *ctx, fz_html_flow *flow, float y, fz_html_flow **candidate)
- {
- while (flow)
- {
- *candidate = flow;
- if (flow->y >= y)
- return flow;
- flow = flow->next;
- }
- return NULL;
- }
- static fz_html_flow *
- make_box_bookmark(fz_context *ctx, fz_html_box *box, float y, fz_html_flow **candidate)
- {
- fz_html_flow *mark;
- fz_html_flow *dummy = NULL;
- if (candidate == NULL)
- candidate = &dummy;
- while (box)
- {
- if (box->type == BOX_FLOW)
- {
- if (box->s.layout.y >= y)
- {
- mark = make_flow_bookmark(ctx, box->u.flow.head, y, candidate);
- if (mark)
- return mark;
- }
- else
- *candidate = make_flow_bookmark(ctx, box->u.flow.head, y, candidate);
- }
- else
- {
- mark = make_box_bookmark(ctx, box->down, y, candidate);
- if (mark)
- return mark;
- }
- box = box->next;
- }
- return *candidate;
- }
- fz_bookmark
- fz_make_html_bookmark(fz_context *ctx, fz_html *html, int page)
- {
- return (fz_bookmark)make_box_bookmark(ctx, html->tree.root, page * html->page_h, NULL);
- }
- static int
- lookup_flow_bookmark(fz_context *ctx, fz_html_flow *flow, fz_html_flow *mark)
- {
- while (flow)
- {
- if (flow == mark)
- return 1;
- flow = flow->next;
- }
- return 0;
- }
- static int
- lookup_box_bookmark(fz_context *ctx, fz_html_box *box, fz_html_flow *mark)
- {
- while (box)
- {
- if (box->type == BOX_FLOW)
- {
- if (lookup_flow_bookmark(ctx, box->u.flow.head, mark))
- return 1;
- }
- else
- {
- if (lookup_box_bookmark(ctx, box->down, mark))
- return 1;
- }
- box = box->next;
- }
- return 0;
- }
- int
- fz_lookup_html_bookmark(fz_context *ctx, fz_html *html, fz_bookmark mark)
- {
- fz_html_flow *flow = (fz_html_flow*)mark;
- if (flow && lookup_box_bookmark(ctx, html->tree.root, flow))
- return (int)(flow->y / html->page_h);
- return -1;
- }
- struct outline_parser
- {
- fz_html *html;
- fz_buffer *cat;
- fz_outline *head;
- fz_outline **tail[6];
- fz_outline **down[6];
- int level[6];
- int current;
- int id;
- };
- static void
- cat_html_flow(fz_context *ctx, fz_buffer *cat, fz_html_flow *flow)
- {
- while (flow)
- {
- switch (flow->type)
- {
- case FLOW_WORD:
- fz_append_string(ctx, cat, flow->content.text);
- break;
- case FLOW_SPACE:
- case FLOW_BREAK:
- fz_append_byte(ctx, cat, ' ');
- break;
- default:
- break;
- }
- flow = flow->next;
- }
- }
- static void
- cat_html_box(fz_context *ctx, fz_buffer *cat, fz_html_box *box)
- {
- while (box)
- {
- if (box->type == BOX_FLOW)
- cat_html_flow(ctx, cat, box->u.flow.head);
- cat_html_box(ctx, cat, box->down);
- box = box->next;
- }
- }
- static const char *
- cat_html_text(fz_context *ctx, struct outline_parser *x, fz_html_box *box)
- {
- if (!x->cat)
- x->cat = fz_new_buffer(ctx, 1024);
- else
- fz_clear_buffer(ctx, x->cat);
- cat_html_flow(ctx, x->cat, box->u.flow.head);
- cat_html_box(ctx, x->cat, box->down);
- return fz_string_from_buffer(ctx, x->cat);
- }
- static void
- add_html_outline(fz_context *ctx, struct outline_parser *x, fz_html_box *box)
- {
- fz_outline *node;
- char buf[100];
- int heading;
- node = fz_new_outline(ctx);
- fz_try(ctx)
- {
- node->title = Memento_label(fz_strdup(ctx, cat_html_text(ctx, x, box)), "outline_title");
- if (!box->id)
- {
- fz_snprintf(buf, sizeof buf, "'%d", x->id++);
- box->id = Memento_label(fz_pool_strdup(ctx, x->html->tree.pool, buf), "box_id");
- }
- node->uri = Memento_label(fz_asprintf(ctx, "#%s", box->id), "outline_uri");
- node->is_open = 1;
- }
- fz_catch(ctx)
- {
- fz_free(ctx, node);
- fz_rethrow(ctx);
- }
- heading = box->heading;
- if (x->level[x->current] < heading && x->current < 5)
- {
- x->tail[x->current+1] = x->down[x->current];
- x->current += 1;
- }
- else
- {
- while (x->current > 0 && x->level[x->current] > heading)
- {
- x->current -= 1;
- }
- }
- x->level[x->current] = heading;
- *(x->tail[x->current]) = node;
- x->tail[x->current] = &node->next;
- x->down[x->current] = &node->down;
- }
- static void
- load_html_outline(fz_context *ctx, struct outline_parser *x, fz_html_box *box)
- {
- while (box)
- {
- int heading = box->heading;
- if (heading)
- add_html_outline(ctx, x, box);
- if (box->down)
- load_html_outline(ctx, x, box->down);
- box = box->next;
- }
- }
- fz_outline *
- fz_load_html_outline(fz_context *ctx, fz_html *html)
- {
- struct outline_parser state;
- state.html = html;
- state.cat = NULL;
- state.head = NULL;
- state.tail[0] = &state.head;
- state.down[0] = NULL;
- state.level[0] = 99;
- state.current = 0;
- state.id = 1;
- fz_try(ctx)
- load_html_outline(ctx, &state, html->tree.root);
- fz_always(ctx)
- fz_drop_buffer(ctx, state.cat);
- fz_catch(ctx)
- {
- fz_drop_outline(ctx, state.head);
- state.head = NULL;
- }
- return state.head;
- }
|