| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137 |
- // Copyright (C) 2004-2024 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "html-imp.h"
- #include <string.h>
- #include <math.h>
- #include <zlib.h> /* for crc32 */
- enum { T, R, B, L };
- typedef struct epub_chapter epub_chapter;
- typedef struct epub_page epub_page;
- typedef struct
- {
- int max_chapters;
- int num_chapters;
- float layout_w;
- float layout_h;
- float layout_em;
- uint32_t css_sum;
- int use_doc_css;
- int *pages_in_chapter;
- } epub_accelerator;
- typedef struct
- {
- fz_document super;
- fz_archive *zip;
- fz_html_font_set *set;
- int count;
- epub_chapter *spine;
- fz_outline *outline;
- char *dc_title, *dc_creator;
- float layout_w, layout_h, layout_em;
- epub_accelerator *accel;
- uint32_t css_sum;
- /* A common pattern of use is for us to open a document,
- * load a page, draw it, drop it, load the next page,
- * draw it, drop it etc. This means that the HTML for
- * a chapter might get thrown away between the drop and
- * the the next load (if the chapter is large, and the
- * store size is low). Accordingly, we store a handle
- * to the most recently used html block here, thus
- * ensuring that the stored copy won't be evicted. */
- fz_html *most_recent_html;
- } epub_document;
- struct epub_chapter
- {
- epub_document *doc;
- char *path;
- int number;
- epub_chapter *next;
- };
- struct epub_page
- {
- fz_page super;
- epub_chapter *ch;
- int number;
- fz_html *html;
- };
- static uint32_t
- user_css_sum(fz_context *ctx)
- {
- uint32_t sum = 0;
- const char *css = fz_user_css(ctx);
- sum = crc32(0, NULL, 0);
- if (css)
- sum = crc32(sum, (Byte*)css, (int)strlen(css));
- return sum;
- }
- static int dummy = 1;
- struct encrypted {
- fz_archive super;
- fz_archive *chain;
- fz_tree *info;
- };
- static int has_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- struct encrypted *arch = (struct encrypted *)arch_;
- return fz_has_archive_entry(ctx, arch->chain, name);
- }
- static fz_stream *open_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- struct encrypted *arch = (struct encrypted *)arch_;
- if (fz_tree_lookup(ctx, arch->info, name))
- return NULL;
- return fz_open_archive_entry(ctx, arch->chain, name);
- }
- static fz_buffer *read_encrypted_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- struct encrypted *arch = (struct encrypted *)arch_;
- if (fz_tree_lookup(ctx, arch->info, name))
- return NULL;
- return fz_read_archive_entry(ctx, arch->chain, name);
- }
- static void drop_encrypted_archive(fz_context *ctx, fz_archive *arch_)
- {
- struct encrypted *arch = (struct encrypted *)arch_;
- fz_drop_tree(ctx, arch->info, NULL);
- fz_drop_archive(ctx, arch->chain);
- }
- static fz_archive *new_encrypted_archive(fz_context *ctx, fz_archive *chain, fz_tree *info)
- {
- struct encrypted *arch;
- arch = fz_new_derived_archive(ctx, NULL, struct encrypted);
- arch->super.format = "encrypted";
- arch->super.has_entry = has_encrypted_entry;
- arch->super.read_entry = read_encrypted_entry;
- arch->super.open_entry = open_encrypted_entry;
- arch->super.drop_archive = drop_encrypted_archive;
- arch->chain = chain;
- arch->info = info;
- return &arch->super;
- }
- static void
- epub_parse_encryption(fz_context *ctx, epub_document *doc, fz_xml *root)
- {
- fz_tree *info = NULL;
- fz_xml *edata;
- for (edata = fz_xml_find_down(root, "EncryptedData"); edata; edata = fz_xml_find_next(edata, "EncryptedData"))
- {
- fz_xml *cdata = fz_xml_find_down(edata, "CipherData");
- fz_xml *cref = fz_xml_find_down(cdata, "CipherReference");
- char *uri = fz_xml_att(cref, "URI");
- if (uri)
- {
- // TODO: Support reading EncryptedKey and EncryptionMethod to decrypt content.
- info = fz_tree_insert(ctx, info, uri, &dummy);
- }
- }
- if (info)
- {
- doc->zip = new_encrypted_archive(ctx, doc->zip, info);
- }
- }
- static fz_html *epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch);
- static int count_laid_out_pages(fz_html *html)
- {
- if (html->tree.root->s.layout.b > 0)
- return ceilf(html->tree.root->s.layout.b / html->page_h);
- return 1;
- }
- static void
- invalidate_accelerator(fz_context *ctx, epub_accelerator *acc)
- {
- int i;
- for (i = 0; i < acc->max_chapters; i++)
- acc->pages_in_chapter[i] = -1;
- }
- static int count_chapter_pages(fz_context *ctx, epub_document *doc, epub_chapter *ch)
- {
- epub_accelerator *acc = doc->accel;
- int use_doc_css = fz_use_document_css(ctx);
- if (use_doc_css != acc->use_doc_css || doc->css_sum != acc->css_sum)
- {
- acc->use_doc_css = use_doc_css;
- acc->css_sum = doc->css_sum;
- invalidate_accelerator(ctx, acc);
- }
- if (ch->number < acc->num_chapters && acc->pages_in_chapter[ch->number] != -1)
- return acc->pages_in_chapter[ch->number];
- fz_drop_html(ctx, epub_get_laid_out_html(ctx, doc, ch));
- return acc->pages_in_chapter[ch->number];
- }
- static fz_link_dest
- epub_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int i;
- const char *s = strchr(dest, '#');
- size_t n = s ? (size_t)(s - dest) : strlen(dest);
- if (s && s[1] == 0)
- s = NULL;
- for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next)
- {
- if (!strncmp(ch->path, dest, n) && ch->path[n] == 0)
- {
- if (s)
- {
- float y;
- fz_html *html = epub_get_laid_out_html(ctx, doc, ch);
- int ph = html->page_h;
- /* Search for a matching fragment */
- y = fz_find_html_target(ctx, html, s+1);
- fz_drop_html(ctx, html);
- if (y >= 0)
- {
- int page = y / ph;
- return fz_make_link_dest_xyz(i, page, 0, y - page * ph, 0);
- }
- return fz_make_link_dest_none();
- }
- return fz_make_link_dest_xyz(i, 0, 0, 0, 0);
- }
- }
- return fz_make_link_dest_none();
- }
- static void
- epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
- {
- epub_document *doc = (epub_document*)doc_;
- uint32_t css_sum = user_css_sum(ctx);
- int use_doc_css = fz_use_document_css(ctx);
- if (doc->layout_w == w && doc->layout_h == h && doc->layout_em == em && doc->css_sum == css_sum)
- return;
- doc->layout_w = w;
- doc->layout_h = h;
- doc->layout_em = em;
- if (doc->accel == NULL)
- return;
- /* When we load the saved accelerator, doc->accel
- * can be populated with different values than doc.
- * This is really useful as doc starts out with the
- * values being 0. If we've got the right values
- * already, then don't bin the data! */
- if (doc->accel->layout_w == w &&
- doc->accel->layout_h == h &&
- doc->accel->layout_em == em &&
- doc->accel->use_doc_css == use_doc_css &&
- doc->accel->css_sum == css_sum)
- return;
- doc->accel->layout_w = w;
- doc->accel->layout_h = h;
- doc->accel->layout_em = em;
- doc->accel->use_doc_css = use_doc_css;
- doc->accel->css_sum = css_sum;
- invalidate_accelerator(ctx, doc->accel);
- }
- static int
- epub_count_chapters(fz_context *ctx, fz_document *doc_)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int count = 0;
- for (ch = doc->spine; ch; ch = ch->next)
- ++count;
- return count;
- }
- static int
- epub_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int i;
- for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next)
- {
- if (i == chapter)
- {
- return count_chapter_pages(ctx, doc, ch);
- }
- }
- return 0;
- }
- #define MAGIC_ACCELERATOR 0xacce1e7a
- #define MAGIC_ACCEL_EPUB 0x62755065
- #define ACCEL_VERSION 0x00010001
- static void epub_load_accelerator(fz_context *ctx, epub_document *doc, fz_stream *accel)
- {
- int v;
- float w, h, em;
- int num_chapters;
- epub_accelerator *acc = NULL;
- uint32_t css_sum;
- int use_doc_css;
- int make_new = (accel == NULL);
- fz_var(acc);
- if (accel)
- {
- /* Try to read the accelerator data. If we fail silently give up. */
- fz_try(ctx)
- {
- v = fz_read_int32_le(ctx, accel);
- if (v != (int32_t)MAGIC_ACCELERATOR)
- {
- make_new = 1;
- break;
- }
- v = fz_read_int32_le(ctx, accel);
- if (v != MAGIC_ACCEL_EPUB)
- {
- make_new = 1;
- break;
- }
- v = fz_read_int32_le(ctx, accel);
- if (v != ACCEL_VERSION)
- {
- make_new = 1;
- break;
- }
- w = fz_read_float_le(ctx, accel);
- h = fz_read_float_le(ctx, accel);
- em = fz_read_float_le(ctx, accel);
- css_sum = fz_read_uint32_le(ctx, accel);
- use_doc_css = fz_read_int32_le(ctx, accel);
- num_chapters = fz_read_int32_le(ctx, accel);
- if (num_chapters <= 0)
- {
- make_new = 1;
- break;
- }
- acc = fz_malloc_struct(ctx, epub_accelerator);
- acc->pages_in_chapter = Memento_label(fz_malloc_array(ctx, num_chapters, int), "accel_pages_in_chapter");
- acc->max_chapters = acc->num_chapters = num_chapters;
- acc->layout_w = w;
- acc->layout_h = h;
- acc->layout_em = em;
- acc->css_sum = css_sum;
- acc->use_doc_css = use_doc_css;
- for (v = 0; v < num_chapters; v++)
- acc->pages_in_chapter[v] = fz_read_int32_le(ctx, accel);
- }
- fz_catch(ctx)
- {
- if (acc)
- fz_free(ctx, acc->pages_in_chapter);
- fz_free(ctx, acc);
- /* Swallow the error and run unaccelerated */
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- make_new = 1;
- }
- }
- /* If we aren't given an accelerator to load (or the one we're given
- * is bad) create a blank stub and we can fill it out as we go. */
- if (make_new)
- {
- acc = fz_malloc_struct(ctx, epub_accelerator);
- acc->css_sum = doc->css_sum;
- acc->use_doc_css = fz_use_document_css(ctx);
- }
- doc->accel = acc;
- }
- static void
- accelerate_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch, fz_html *html)
- {
- epub_accelerator *acc = doc->accel;
- int p = count_laid_out_pages(html);
- if (ch->number < acc->num_chapters)
- {
- if (acc->pages_in_chapter[ch->number] != p && acc->pages_in_chapter[ch->number] != -1)
- {
- fz_warn(ctx, "Invalidating stale accelerator data.");
- invalidate_accelerator(ctx, doc->accel);
- }
- acc->pages_in_chapter[ch->number] = p;
- return;
- }
- if (ch->number >= acc->max_chapters)
- {
- int n = acc->max_chapters;
- int i;
- if (n == 0)
- n = 4;
- while (n <= ch->number)
- n *= 2;
- acc->pages_in_chapter = fz_realloc_array(ctx, acc->pages_in_chapter, n, int);
- for (i = acc->max_chapters; i < n; i++)
- acc->pages_in_chapter[i] = -1;
- acc->max_chapters = n;
- }
- acc->pages_in_chapter[ch->number] = p;
- if (acc->num_chapters < ch->number+1)
- acc->num_chapters = ch->number+1;
- }
- static void
- epub_drop_page(fz_context *ctx, fz_page *page_)
- {
- epub_page *page = (epub_page *)page_;
- fz_drop_html(ctx, page->html);
- }
- static epub_chapter *
- epub_load_chapter(fz_context *ctx, epub_document *doc, const char *path, int i)
- {
- epub_chapter *ch;
- ch = fz_malloc_struct(ctx, epub_chapter);
- fz_try(ctx)
- {
- ch->path = Memento_label(fz_strdup(ctx, path), "chapter_path");
- ch->number = i;
- }
- fz_catch(ctx)
- {
- fz_free(ctx, ch);
- fz_rethrow(ctx);
- }
- return ch;
- }
- static fz_html *
- epub_parse_chapter(fz_context *ctx, epub_document *doc, epub_chapter *ch)
- {
- fz_archive *zip = doc->zip;
- fz_buffer *buf;
- char base_uri[2048];
- fz_html *html;
- /* Look for one we made earlier */
- html = fz_find_html(ctx, doc, ch->number);
- if (html)
- return html;
- fz_dirname(base_uri, ch->path, sizeof base_uri);
- buf = fz_read_archive_entry(ctx, zip, ch->path);
- fz_try(ctx)
- html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx), 1, 1, 0);
- fz_always(ctx)
- fz_drop_buffer(ctx, buf);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return fz_store_html(ctx, html, doc, ch->number);
- }
- static fz_html *
- epub_get_laid_out_html(fz_context *ctx, epub_document *doc, epub_chapter *ch)
- {
- fz_html *html = epub_parse_chapter(ctx, doc, ch);
- fz_try(ctx)
- {
- fz_layout_html(ctx, html, doc->layout_w, doc->layout_h, doc->layout_em);
- accelerate_chapter(ctx, doc, ch, html);
- }
- fz_catch(ctx)
- {
- fz_drop_html(ctx, html);
- fz_rethrow(ctx);
- }
- fz_drop_html(ctx, doc->most_recent_html);
- doc->most_recent_html = fz_keep_html(ctx, html);
- return html;
- }
- static fz_rect
- epub_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
- {
- epub_document *doc = (epub_document*)page_->doc;
- epub_page *page = (epub_page*)page_;
- epub_chapter *ch = page->ch;
- fz_rect bbox;
- fz_html *html = epub_get_laid_out_html(ctx, doc, ch);
- bbox.x0 = 0;
- bbox.y0 = 0;
- bbox.x1 = html->page_w + html->page_margin[L] + html->page_margin[R];
- bbox.y1 = html->page_h + html->page_margin[T] + html->page_margin[B];
- fz_drop_html(ctx, html);
- return bbox;
- }
- static void
- epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
- {
- epub_page *page = (epub_page*)page_;
- fz_draw_html(ctx, dev, ctm, page->html, page->number);
- }
- static fz_link *
- epub_load_links(fz_context *ctx, fz_page *page_)
- {
- epub_page *page = (epub_page*)page_;
- epub_chapter *ch = page->ch;
- return fz_load_html_links(ctx, page->html, page->number, ch->path);
- }
- static fz_bookmark
- epub_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int i;
- for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next)
- {
- if (i == loc.chapter)
- {
- fz_html *html = epub_get_laid_out_html(ctx, doc, ch);
- fz_bookmark mark = fz_make_html_bookmark(ctx, html, loc.page);
- fz_drop_html(ctx, html);
- return mark;
- }
- }
- return 0;
- }
- static fz_location
- epub_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int i;
- for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next)
- {
- fz_html *html = epub_get_laid_out_html(ctx, doc, ch);
- int p = fz_lookup_html_bookmark(ctx, html, mark);
- fz_drop_html(ctx, html);
- if (p != -1)
- return fz_make_location(i, p);
- }
- return fz_make_location(-1, -1);
- }
- static fz_page *
- epub_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch;
- int i;
- for (i = 0, ch = doc->spine; ch; ++i, ch = ch->next)
- {
- if (i == chapter)
- {
- epub_page *page = fz_new_derived_page(ctx, epub_page, doc_);
- page->super.bound_page = epub_bound_page;
- page->super.run_page_contents = epub_run_page;
- page->super.load_links = epub_load_links;
- page->super.drop_page = epub_drop_page;
- page->ch = ch;
- page->number = number;
- page->html = epub_get_laid_out_html(ctx, doc, ch);
- return (fz_page*)page;
- }
- }
- return NULL;
- }
- static void
- epub_page_label(fz_context *ctx, fz_document *doc_, int chapter, int number, char *buf, size_t size)
- {
- fz_snprintf(buf, size, "ch. %d, p. %d", chapter+1, number+1);
- }
- static void
- epub_drop_accelerator(fz_context *ctx, epub_accelerator *acc)
- {
- if (acc == NULL)
- return;
- fz_free(ctx, acc->pages_in_chapter);
- fz_free(ctx, acc);
- }
- static void
- epub_drop_document(fz_context *ctx, fz_document *doc_)
- {
- epub_document *doc = (epub_document*)doc_;
- epub_chapter *ch, *next;
- ch = doc->spine;
- while (ch)
- {
- next = ch->next;
- fz_free(ctx, ch->path);
- fz_free(ctx, ch);
- ch = next;
- }
- epub_drop_accelerator(ctx, doc->accel);
- fz_drop_archive(ctx, doc->zip);
- fz_drop_html_font_set(ctx, doc->set);
- fz_drop_outline(ctx, doc->outline);
- fz_free(ctx, doc->dc_title);
- fz_free(ctx, doc->dc_creator);
- fz_drop_html(ctx, doc->most_recent_html);
- fz_purge_stored_html(ctx, doc);
- }
- static const char *
- rel_path_from_idref(fz_xml *manifest, const char *idref)
- {
- fz_xml *item;
- if (!idref)
- return NULL;
- item = fz_xml_find_down(manifest, "item");
- while (item)
- {
- const char *id = fz_xml_att(item, "id");
- if (id && !strcmp(id, idref))
- return fz_xml_att(item, "href");
- item = fz_xml_find_next(item, "item");
- }
- return NULL;
- }
- static const char *
- path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n)
- {
- const char *rel_path = rel_path_from_idref(manifest, idref);
- if (!rel_path)
- {
- path[0] = 0;
- return NULL;
- }
- fz_strlcpy(path, base_uri, n);
- fz_strlcat(path, "/", n);
- fz_strlcat(path, rel_path, n);
- return fz_cleanname(fz_urldecode(path));
- }
- static fz_outline *
- epub_parse_ncx_imp(fz_context *ctx, epub_document *doc, fz_xml *node, char *base_uri)
- {
- char path[2048];
- fz_outline *outline, *head, **tailp;
- head = NULL;
- tailp = &head;
- node = fz_xml_find_down(node, "navPoint");
- while (node)
- {
- char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(fz_xml_find_down(node, "navLabel"), "text")));
- char *content = fz_xml_att(fz_xml_find_down(node, "content"), "src");
- if (text && content)
- {
- fz_strlcpy(path, base_uri, sizeof path);
- fz_strlcat(path, "/", sizeof path);
- fz_strlcat(path, content, sizeof path);
- fz_urldecode(path);
- fz_cleanname(path);
- fz_try(ctx)
- {
- *tailp = outline = fz_new_outline(ctx);
- tailp = &(*tailp)->next;
- outline->title = Memento_label(fz_strdup(ctx, text), "outline_title");
- outline->uri = Memento_label(fz_strdup(ctx, path), "outline_uri");
- outline->page = fz_make_location(-1, -1);
- outline->down = epub_parse_ncx_imp(ctx, doc, node, base_uri);
- outline->is_open = 1;
- }
- fz_catch(ctx)
- {
- fz_drop_outline(ctx, head);
- fz_rethrow(ctx);
- }
- }
- node = fz_xml_find_next(node, "navPoint");
- }
- return head;
- }
- static void
- epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path)
- {
- fz_archive *zip = doc->zip;
- fz_buffer *buf = NULL;
- fz_xml_doc *ncx = NULL;
- char base_uri[2048];
- fz_var(buf);
- fz_var(ncx);
- fz_try(ctx)
- {
- fz_dirname(base_uri, path, sizeof base_uri);
- buf = fz_read_archive_entry(ctx, zip, path);
- ncx = fz_parse_xml(ctx, buf, 0);
- doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(fz_xml_root(ncx), "navMap"), base_uri);
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, buf);
- fz_drop_xml(ctx, ncx);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static char *
- find_metadata(fz_context *ctx, fz_xml *metadata, char *key)
- {
- char *text = fz_xml_text(fz_xml_down(fz_xml_find_down(metadata, key)));
- if (text)
- return fz_strdup(ctx, text);
- return NULL;
- }
- static fz_buffer *
- read_container_and_prefix(fz_context *ctx, fz_archive *zip, char *prefix, size_t prefix_len)
- {
- int n = fz_count_archive_entries(ctx, zip);
- int i;
- prefix[0] = 0;
- /* First off, look for the container.xml at the top level. */
- for (i = 0; i < n; i++)
- {
- const char *p = fz_list_archive_entry(ctx, zip, i);
- if (!strcmp(p, "META-INF/container.xml"))
- return fz_read_archive_entry(ctx, zip, "META-INF/container.xml");
- }
- /* If that failed, look for the first such file in a subdirectory. */
- for (i = 0; i < n; i++)
- {
- const char *p = fz_list_archive_entry(ctx, zip, i);
- size_t z = strlen(p);
- size_t z0 = sizeof("META-INF/container.xml")-1;
- if (z < z0)
- continue;
- if (!strcmp(p + z - z0, "META-INF/container.xml"))
- {
- if (z - z0 >= prefix_len)
- {
- fz_warn(ctx, "Ignoring %s as path too long.", p);
- continue;
- }
- memcpy(prefix, p, z-z0);
- prefix[z-z0] = 0;
- return fz_read_archive_entry(ctx, zip, p);
- }
- }
- return fz_read_archive_entry(ctx, zip, "META-INF/container.xml");
- }
- static void
- epub_parse_header(fz_context *ctx, epub_document *doc)
- {
- fz_archive *zip = doc->zip;
- fz_buffer *buf = NULL;
- fz_xml_doc *encryption_xml = NULL;
- fz_xml_doc *container_xml = NULL;
- fz_xml_doc *content_opf = NULL;
- fz_xml *container, *rootfiles, *rootfile;
- fz_xml *package, *manifest, *spine, *itemref, *metadata;
- char base_uri[2048];
- const char *full_path;
- const char *version;
- char ncx[2048], s[2048];
- char *prefixed_full_path = NULL;
- size_t prefix_len;
- epub_chapter **tailp;
- int i;
- fz_var(buf);
- fz_var(encryption_xml);
- fz_var(container_xml);
- fz_var(content_opf);
- fz_var(prefixed_full_path);
- fz_try(ctx)
- {
- /* parse META-INF/encryption.xml to figure out which entries are encrypted */
- /* parse META-INF/container.xml to find OPF */
- /* Reuse base_uri to read the prefix. */
- buf = read_container_and_prefix(ctx, zip, base_uri, sizeof(base_uri));
- container_xml = fz_parse_xml(ctx, buf, 0);
- fz_drop_buffer(ctx, buf);
- buf = NULL;
- /* Some epub files can be prefixed by a directory name. This (normally
- * empty!) will be in base_uri. */
- prefix_len = strlen(base_uri);
- {
- /* Further abuse base_uri to hold a temporary name. */
- const size_t z0 = sizeof("META-INF/encryption.xml")-1;
- if (sizeof(base_uri) <= prefix_len + z0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Prefix too long in epub");
- strcpy(base_uri + prefix_len, "META-INF/encryption.xml");
- if (fz_has_archive_entry(ctx, zip, base_uri))
- {
- fz_warn(ctx, "EPUB may be locked by DRM");
- buf = fz_read_archive_entry(ctx, zip, base_uri);
- encryption_xml = fz_parse_xml(ctx, buf, 0);
- fz_drop_buffer(ctx, buf);
- buf = NULL;
- epub_parse_encryption(ctx, doc, fz_xml_find(fz_xml_root(encryption_xml), "encryption"));
- zip = doc->zip;
- }
- }
- container = fz_xml_find(fz_xml_root(container_xml), "container");
- rootfiles = fz_xml_find_down(container, "rootfiles");
- rootfile = fz_xml_find_down(rootfiles, "rootfile");
- full_path = fz_xml_att(rootfile, "full-path");
- if (!full_path)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find root file in EPUB");
- fz_dirname(base_uri+prefix_len, full_path, sizeof(base_uri) - prefix_len);
- prefixed_full_path = fz_malloc(ctx, strlen(full_path) + prefix_len + 1);
- memcpy(prefixed_full_path, base_uri, prefix_len);
- strcpy(prefixed_full_path + prefix_len, full_path);
- /* parse OPF to find NCX and spine */
- buf = fz_read_archive_entry(ctx, zip, prefixed_full_path);
- content_opf = fz_parse_xml(ctx, buf, 0);
- fz_drop_buffer(ctx, buf);
- buf = NULL;
- package = fz_xml_find(fz_xml_root(content_opf), "package");
- version = fz_xml_att(package, "version");
- if (!version || strcmp(version, "2.0"))
- fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>");
- metadata = fz_xml_find_down(package, "metadata");
- if (metadata)
- {
- doc->dc_title = Memento_label(find_metadata(ctx, metadata, "title"), "epub_title");
- doc->dc_creator = Memento_label(find_metadata(ctx, metadata, "creator"), "epub_creator");
- }
- manifest = fz_xml_find_down(package, "manifest");
- spine = fz_xml_find_down(package, "spine");
- if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx))
- {
- epub_parse_ncx(ctx, doc, ncx);
- }
- doc->spine = NULL;
- tailp = &doc->spine;
- itemref = fz_xml_find_down(spine, "itemref");
- i = 0;
- while (itemref)
- {
- if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s))
- {
- fz_try(ctx)
- {
- *tailp = epub_load_chapter(ctx, doc, s, i);
- tailp = &(*tailp)->next;
- i++;
- }
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- fz_warn(ctx, "ignoring chapter %s", s);
- }
- }
- itemref = fz_xml_find_next(itemref, "itemref");
- }
- }
- fz_always(ctx)
- {
- fz_drop_xml(ctx, content_opf);
- fz_drop_xml(ctx, container_xml);
- fz_drop_xml(ctx, encryption_xml);
- fz_drop_buffer(ctx, buf);
- fz_free(ctx, prefixed_full_path);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static fz_outline *
- epub_load_outline(fz_context *ctx, fz_document *doc_)
- {
- epub_document *doc = (epub_document*)doc_;
- return fz_keep_outline(ctx, doc->outline);
- }
- static int
- epub_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size)
- {
- epub_document *doc = (epub_document*)doc_;
- if (!strcmp(key, FZ_META_FORMAT))
- return 1 + (int)fz_strlcpy(buf, "EPUB", size);
- if (!strcmp(key, FZ_META_INFO_TITLE) && doc->dc_title)
- return 1 + (int)fz_strlcpy(buf, doc->dc_title, size);
- if (!strcmp(key, FZ_META_INFO_AUTHOR) && doc->dc_creator)
- return 1 + (int)fz_strlcpy(buf, doc->dc_creator, size);
- return -1;
- }
- static void
- epub_output_accelerator(fz_context *ctx, fz_document *doc_, fz_output *out)
- {
- epub_document *doc = (epub_document*)doc_;
- int i;
- fz_try(ctx)
- {
- if (doc->accel == NULL)
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "No accelerator data to write");
- fz_write_int32_le(ctx, out, MAGIC_ACCELERATOR);
- fz_write_int32_le(ctx, out, MAGIC_ACCEL_EPUB);
- fz_write_int32_le(ctx, out, ACCEL_VERSION);
- fz_write_float_le(ctx, out, doc->accel->layout_w);
- fz_write_float_le(ctx, out, doc->accel->layout_h);
- fz_write_float_le(ctx, out, doc->accel->layout_em);
- fz_write_uint32_le(ctx, out, doc->accel->css_sum);
- fz_write_int32_le(ctx, out, doc->accel->use_doc_css);
- fz_write_int32_le(ctx, out, doc->accel->num_chapters);
- for (i = 0; i < doc->accel->num_chapters; i++)
- fz_write_int32_le(ctx, out, doc->accel->pages_in_chapter[i]);
- fz_close_output(ctx, out);
- }
- fz_always(ctx)
- fz_drop_output(ctx, out);
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- /* Takes ownership of zip. Will always eventually drop it.
- * Never takes ownership of accel. */
- static fz_document *
- epub_init(fz_context *ctx, fz_archive *zip, fz_stream *accel)
- {
- epub_document *doc = NULL;
- fz_var(doc);
- fz_var(zip);
- fz_try(ctx)
- {
- doc = fz_new_derived_document(ctx, epub_document);
- doc->zip = zip;
- zip = NULL;
- doc->super.drop_document = epub_drop_document;
- doc->super.layout = epub_layout;
- doc->super.load_outline = epub_load_outline;
- doc->super.resolve_link_dest = epub_resolve_link;
- doc->super.make_bookmark = epub_make_bookmark;
- doc->super.lookup_bookmark = epub_lookup_bookmark;
- doc->super.count_chapters = epub_count_chapters;
- doc->super.count_pages = epub_count_pages;
- doc->super.load_page = epub_load_page;
- doc->super.page_label = epub_page_label;
- doc->super.lookup_metadata = epub_lookup_metadata;
- doc->super.output_accelerator = epub_output_accelerator;
- doc->super.is_reflowable = 1;
- doc->set = fz_new_html_font_set(ctx);
- doc->css_sum = user_css_sum(ctx);
- epub_load_accelerator(ctx, doc, accel);
- epub_parse_header(ctx, doc);
- }
- fz_catch(ctx)
- {
- fz_drop_archive(ctx, zip);
- fz_drop_document(ctx, &doc->super);
- fz_rethrow(ctx);
- }
- return (fz_document*)doc;
- }
- static fz_document *
- epub_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
- {
- fz_stream *file2 = NULL;
- fz_document *doc;
- fz_archive *zip = NULL;
- if (file == NULL)
- {
- /* Directory case: file == NULL and dir == the directory. */
- if (fz_has_archive_entry(ctx, dir, "META-INF/container.xml"))
- file2 = file = fz_open_archive_entry(ctx, dir, "META-INF/container.xml");
- else
- file2 = file = fz_open_archive_entry(ctx, dir, "META-INF\\container.xml");
- if (file == NULL)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Not an epub file");
- zip = fz_keep_archive(ctx, dir);
- }
- else
- {
- /* File case: file != NULL and dir can be ignored. */
- zip = fz_open_archive_with_stream(ctx, file);
- }
- fz_try(ctx)
- doc = epub_init(ctx, zip, file);
- fz_always(ctx)
- fz_drop_stream(ctx, file2);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return doc;
- }
- static int
- epub_recognize(fz_context *doc, const fz_document_handler *handler, const char *magic)
- {
- if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml"))
- return 200;
- return 0;
- }
- static int
- epub_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
- {
- fz_archive *arch = NULL;
- int ret = 0;
- fz_var(arch);
- fz_var(ret);
- if (state)
- *state = NULL;
- if (free_state)
- *free_state = NULL;
- fz_try(ctx)
- {
- if (stream == NULL)
- arch = fz_keep_archive(ctx, dir);
- else
- {
- arch = fz_try_open_archive_with_stream(ctx, stream);
- if (arch == NULL)
- break;
- }
- if (fz_has_archive_entry(ctx, arch, "META-INF/container.xml") ||
- fz_has_archive_entry(ctx, arch, "META-INF\\container.xml"))
- ret = 74; /* One less than the 75 that HWPX files are detected as. */
- }
- fz_always(ctx)
- fz_drop_archive(ctx, arch);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return ret;
- }
- static const char *epub_extensions[] =
- {
- "epub",
- NULL
- };
- static const char *epub_mimetypes[] =
- {
- "application/epub+zip",
- NULL
- };
- fz_document_handler epub_document_handler =
- {
- epub_recognize,
- epub_open_document,
- epub_extensions,
- epub_mimetypes,
- epub_recognize_content
- };
|