html-doc.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "html-imp.h"
  24. #include <string.h>
  25. #include <math.h>
  26. enum { T, R, B, L };
  27. typedef struct
  28. {
  29. fz_document super;
  30. fz_archive *zip;
  31. fz_html_font_set *set;
  32. fz_html *html;
  33. fz_outline *outline;
  34. const fz_htdoc_format_t *format;
  35. } html_document;
  36. typedef struct
  37. {
  38. fz_page super;
  39. html_document *doc;
  40. int number;
  41. } html_page;
  42. static void
  43. htdoc_drop_document(fz_context *ctx, fz_document *doc_)
  44. {
  45. html_document *doc = (html_document*)doc_;
  46. fz_drop_archive(ctx, doc->zip);
  47. fz_drop_html(ctx, doc->html);
  48. fz_drop_html_font_set(ctx, doc->set);
  49. fz_drop_outline(ctx, doc->outline);
  50. }
  51. static fz_link_dest
  52. htdoc_resolve_link(fz_context *ctx, fz_document *doc_, const char *dest)
  53. {
  54. html_document *doc = (html_document*)doc_;
  55. const char *s = strchr(dest, '#');
  56. if (s && s[1] != 0)
  57. {
  58. float y = fz_find_html_target(ctx, doc->html, s+1);
  59. if (y >= 0)
  60. {
  61. int page = y / doc->html->page_h;
  62. return fz_make_link_dest_xyz(0, page, 0, y - page * doc->html->page_h, 0);
  63. }
  64. }
  65. return fz_make_link_dest_none();
  66. }
  67. static int
  68. htdoc_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
  69. {
  70. html_document *doc = (html_document*)doc_;
  71. if (doc->html->tree.root->s.layout.b > 0)
  72. return ceilf(doc->html->tree.root->s.layout.b / doc->html->page_h);
  73. return 1;
  74. }
  75. static void
  76. htdoc_update_outline(fz_context *ctx, fz_document *doc, fz_outline *node)
  77. {
  78. while (node)
  79. {
  80. fz_link_dest dest = htdoc_resolve_link(ctx, doc, node->uri);
  81. node->page = dest.loc;
  82. node->x = dest.x;
  83. node->y = dest.y;
  84. htdoc_update_outline(ctx, doc, node->down);
  85. node = node->next;
  86. }
  87. }
  88. static void
  89. htdoc_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
  90. {
  91. html_document *doc = (html_document*)doc_;
  92. fz_layout_html(ctx, doc->html, w, h, em);
  93. htdoc_update_outline(ctx, doc_, doc->outline);
  94. }
  95. static void
  96. htdoc_drop_page(fz_context *ctx, fz_page *page_)
  97. {
  98. }
  99. static fz_rect
  100. htdoc_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
  101. {
  102. html_page *page = (html_page*)page_;
  103. html_document *doc = page->doc;
  104. fz_rect bbox;
  105. bbox.x0 = 0;
  106. bbox.y0 = 0;
  107. bbox.x1 = doc->html->page_w + doc->html->page_margin[L] + doc->html->page_margin[R];
  108. bbox.y1 = doc->html->page_h + doc->html->page_margin[T] + doc->html->page_margin[B];
  109. return bbox;
  110. }
  111. static void
  112. htdoc_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, fz_matrix ctm, fz_cookie *cookie)
  113. {
  114. html_page *page = (html_page*)page_;
  115. html_document *doc = page->doc;
  116. fz_draw_html(ctx, dev, ctm, doc->html, page->number);
  117. }
  118. static fz_link *
  119. htdoc_load_links(fz_context *ctx, fz_page *page_)
  120. {
  121. html_page *page = (html_page*)page_;
  122. html_document *doc = page->doc;
  123. return fz_load_html_links(ctx, doc->html, page->number, "");
  124. }
  125. static fz_bookmark
  126. htdoc_make_bookmark(fz_context *ctx, fz_document *doc_, fz_location loc)
  127. {
  128. html_document *doc = (html_document*)doc_;
  129. return fz_make_html_bookmark(ctx, doc->html, loc.page);
  130. }
  131. static fz_location
  132. htdoc_lookup_bookmark(fz_context *ctx, fz_document *doc_, fz_bookmark mark)
  133. {
  134. html_document *doc = (html_document*)doc_;
  135. return fz_make_location(0, fz_lookup_html_bookmark(ctx, doc->html, mark));
  136. }
  137. static fz_page *
  138. htdoc_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
  139. {
  140. html_document *doc = (html_document*)doc_;
  141. html_page *page = fz_new_derived_page(ctx, html_page, doc_);
  142. page->super.bound_page = htdoc_bound_page;
  143. page->super.run_page_contents = htdoc_run_page;
  144. page->super.load_links = htdoc_load_links;
  145. page->super.drop_page = htdoc_drop_page;
  146. page->doc = doc;
  147. page->number = number;
  148. return (fz_page*)page;
  149. }
  150. static fz_outline *
  151. htdoc_load_outline(fz_context *ctx, fz_document *doc_)
  152. {
  153. html_document *doc = (html_document*)doc_;
  154. return fz_keep_outline(ctx, doc->outline);
  155. }
  156. static int
  157. htdoc_lookup_metadata(fz_context *ctx, fz_document *doc_, const char *key, char *buf, size_t size)
  158. {
  159. html_document *doc = (html_document *)doc_;
  160. if (!strcmp(key, FZ_META_FORMAT))
  161. return 1 + (int)fz_strlcpy(buf, doc->format->format_name, size);
  162. if (!strcmp(key, FZ_META_INFO_TITLE) && doc->html->title)
  163. return 1 + (int)fz_strlcpy(buf, doc->html->title, size);
  164. return -1;
  165. }
  166. static fz_html *
  167. generic_parse(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buffer_in, const char *user_css, const fz_htdoc_format_t *format)
  168. {
  169. fz_buffer *buffer_html = NULL;
  170. fz_html *html = NULL;
  171. fz_try(ctx)
  172. {
  173. if (format->convert_to_html)
  174. buffer_html = format->convert_to_html(ctx, set, buffer_in, zip, user_css);
  175. else
  176. buffer_html = fz_keep_buffer(ctx, buffer_in);
  177. html = fz_parse_html(ctx, set, zip, base_uri, buffer_html, user_css, format->try_xml, format->try_html5, format->patch_mobi);
  178. }
  179. fz_always(ctx)
  180. {
  181. fz_drop_buffer(ctx, buffer_html);
  182. }
  183. fz_catch(ctx)
  184. {
  185. fz_drop_html(ctx, html);
  186. fz_rethrow(ctx);
  187. }
  188. return html;
  189. }
  190. fz_document *
  191. fz_htdoc_open_document_with_buffer(fz_context *ctx, fz_archive *dir, fz_buffer *buf, const fz_htdoc_format_t *format)
  192. {
  193. html_document *doc = NULL;
  194. fz_var(doc);
  195. fz_var(dir);
  196. fz_try(ctx)
  197. {
  198. doc = fz_new_derived_document(ctx, html_document);
  199. doc->super.drop_document = htdoc_drop_document;
  200. doc->super.layout = htdoc_layout;
  201. doc->super.load_outline = htdoc_load_outline;
  202. doc->super.resolve_link_dest = htdoc_resolve_link;
  203. doc->super.make_bookmark = htdoc_make_bookmark;
  204. doc->super.lookup_bookmark = htdoc_lookup_bookmark;
  205. doc->super.count_pages = htdoc_count_pages;
  206. doc->super.load_page = htdoc_load_page;
  207. doc->super.lookup_metadata = htdoc_lookup_metadata;
  208. doc->super.is_reflowable = 1;
  209. doc->zip = fz_keep_archive(ctx, dir);
  210. doc->format = format;
  211. doc->set = fz_new_html_font_set(ctx);
  212. doc->html = generic_parse(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx), format);
  213. doc->outline = fz_load_html_outline(ctx, doc->html);
  214. }
  215. fz_always(ctx)
  216. fz_drop_buffer(ctx, buf);
  217. fz_catch(ctx)
  218. {
  219. fz_drop_document(ctx, &doc->super);
  220. fz_rethrow(ctx);
  221. }
  222. return (fz_document*)doc;
  223. }
  224. fz_document *
  225. fz_htdoc_open_document_with_stream_and_dir(fz_context *ctx, fz_stream *stm, fz_archive *dir, const fz_htdoc_format_t *format)
  226. {
  227. fz_buffer *buf = NULL;
  228. if (stm)
  229. buf = fz_read_all(ctx, stm, 0);
  230. return fz_htdoc_open_document_with_buffer(ctx, dir, buf, format);
  231. }
  232. /* Variant specific functions */
  233. /* Generic HTML document handler */
  234. static int isws(int c)
  235. {
  236. return c == 32 || c == 9 || c == 10 || c == 13 || c == 12;
  237. }
  238. static int recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state, int xhtml)
  239. {
  240. uint8_t buffer[4096];
  241. size_t i, n, m;
  242. enum {
  243. state_top,
  244. state_open,
  245. state_pling,
  246. state_query,
  247. state_maybe_doctype,
  248. state_maybe_doctype_ws,
  249. state_maybe_doctype_html,
  250. state_maybe_doctype_html_xhtml,
  251. state_maybe_comment,
  252. state_maybe_html,
  253. state_maybe_html_xhtml,
  254. state_comment
  255. };
  256. int state = state_top;
  257. int type = 0;
  258. if (hstate)
  259. *hstate = NULL;
  260. if (free_state)
  261. *free_state = NULL;
  262. if (stream == NULL)
  263. return 0;
  264. /* Simple state machine. Search for "<!doctype html" or "<html" in the first
  265. * 4K of the file, allowing for comments and whitespace and case insensitivity. */
  266. n = fz_read(ctx, stream, buffer, sizeof(buffer));
  267. fz_seek(ctx, stream, 0, SEEK_SET);
  268. if (n == 0)
  269. return 0;
  270. i = 0;
  271. if (n >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF)
  272. {
  273. /* UTF-8 encoded BOM. Just skip it. */
  274. i = 3;
  275. }
  276. else if (n >= 2 && buffer[0] == 0xFE && buffer[1] == 0xFF)
  277. {
  278. /* UTF-16, big endian. */
  279. type = 1;
  280. i = 2;
  281. n &= ~1;
  282. }
  283. else if (n >= 2 && buffer[0] == 0xFF && buffer[1] == 0xFE)
  284. {
  285. /* UTF-16, little endian. */
  286. i = 2;
  287. type = 2;
  288. n &= ~1;
  289. }
  290. while (i < n)
  291. {
  292. int c;
  293. switch (type)
  294. {
  295. case 0: /* UTF-8 */
  296. c = buffer[i++];
  297. break;
  298. case 1: /* UTF-16 - big endian */
  299. c = buffer[i++] << 8;
  300. c |= buffer[i++];
  301. break;
  302. case 2: /* UTF-16 - little endian */
  303. c = buffer[i++];
  304. c |= buffer[i++] << 8;
  305. break;
  306. }
  307. switch (state)
  308. {
  309. case state_top:
  310. if (isws(c))
  311. continue; /* whitespace */
  312. if (c == '<')
  313. state = state_open;
  314. else
  315. return 0; /* Non whitespace found at the top level prior to a known tag. Fail. */
  316. break;
  317. case state_open:
  318. if (isws(c))
  319. continue; /* whitespace */
  320. if (c == '!')
  321. state = state_pling;
  322. else if (c == '?')
  323. state = state_query;
  324. else if (c == 'h' || c == 'H')
  325. state = state_maybe_html;
  326. else
  327. return 0; /* Not an acceptable opening tag. */
  328. m = 0;
  329. break;
  330. case state_query:
  331. if (c == '>')
  332. state = state_top;
  333. break;
  334. case state_pling:
  335. if (isws(c))
  336. continue; /* whitespace */
  337. else if (c == '-')
  338. state = state_maybe_comment;
  339. else if (c == 'd' || c == 'D')
  340. state = state_maybe_doctype;
  341. else
  342. return 0; /* Not an acceptable opening tag. */
  343. break;
  344. case state_maybe_comment:
  345. if (c == '-')
  346. state = state_comment;
  347. else
  348. return 0; /* Not an acceptable opening tag. */
  349. break;
  350. case state_comment:
  351. if (c == '-')
  352. {
  353. m++;
  354. }
  355. else if (c == '>' && m >= 2)
  356. {
  357. state = state_top;
  358. }
  359. else
  360. m = 0;
  361. break;
  362. case state_maybe_doctype:
  363. if (c == "octype"[m] || c == "OCTYPE"[m])
  364. {
  365. m++;
  366. if (m == 6)
  367. {
  368. state = state_maybe_doctype_ws;
  369. m = 0;
  370. }
  371. }
  372. else
  373. return 0; /* Not an acceptable opening tag. */
  374. break;
  375. case state_maybe_doctype_ws:
  376. if (isws(c))
  377. m++;
  378. else if (m > 0 && (c == 'h' || c == 'H'))
  379. {
  380. state = state_maybe_doctype_html;
  381. m = 0;
  382. }
  383. else
  384. return 0; /* Not an acceptable opening tag. */
  385. break;
  386. case state_maybe_doctype_html:
  387. if (c == "tml"[m] || c == "TML"[m])
  388. {
  389. m++;
  390. if (m == 3)
  391. {
  392. state = state_maybe_doctype_html_xhtml;
  393. m = 0;
  394. }
  395. }
  396. else
  397. return 0; /* Not an acceptable opening tag. */
  398. break;
  399. case state_maybe_doctype_html_xhtml:
  400. if (c == '>')
  401. {
  402. /* Not xhtml - the xhtml agent can handle this at a pinch (so 25),
  403. * but we'd rather the html one did (75). */
  404. return xhtml ? 25 : 75;
  405. }
  406. if (c >= 'A' && c <= 'Z')
  407. c += 'a'-'A';
  408. if (c == "xhtml"[m])
  409. {
  410. m++;
  411. if (m == 5)
  412. {
  413. /* xhtml - the xhtml agent would be better (75) than the html
  414. * agent (25). */
  415. return xhtml ? 75 : 25;
  416. }
  417. }
  418. else
  419. m = 0;
  420. break;
  421. case state_maybe_html:
  422. if (c == "tml"[m] || c == "TML"[m])
  423. {
  424. m++;
  425. if (m == 3)
  426. {
  427. state = state_maybe_html_xhtml;
  428. m = 0;
  429. }
  430. }
  431. else
  432. return 0; /* Not an acceptable opening tag. */
  433. break;
  434. case state_maybe_html_xhtml:
  435. if (c == '>')
  436. {
  437. /* Not xhtml - the xhtml agent can handle this at a pinch (so 25),
  438. * but we'd rather the html one did (75). */
  439. return xhtml ? 25 : 75;
  440. }
  441. if (c >= 'A' && c <= 'Z')
  442. c += 'a'-'A';
  443. if (c == "xhtml"[m])
  444. {
  445. m++;
  446. if (m == 5)
  447. {
  448. /* xhtml - the xhtml agent would be better (75) than the html
  449. * agent (25). */
  450. return xhtml ? 75 : 25;
  451. }
  452. }
  453. else
  454. m = 0;
  455. break;
  456. }
  457. }
  458. return 0;
  459. }
  460. int htdoc_recognize_html_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state)
  461. {
  462. return recognize_html_content(ctx, handler, stream, dir, hstate, free_state, 0);
  463. }
  464. static const fz_htdoc_format_t fz_htdoc_html5 =
  465. {
  466. "HTML5",
  467. NULL,
  468. 0, 1, 0
  469. };
  470. static fz_document *
  471. htdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
  472. {
  473. return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_html5);
  474. }
  475. static const char *htdoc_extensions[] =
  476. {
  477. "htm",
  478. "html",
  479. NULL
  480. };
  481. static const char *htdoc_mimetypes[] =
  482. {
  483. "text/html",
  484. NULL
  485. };
  486. fz_document_handler html_document_handler =
  487. {
  488. NULL,
  489. htdoc_open_document,
  490. htdoc_extensions,
  491. htdoc_mimetypes,
  492. htdoc_recognize_html_content,
  493. 1
  494. };
  495. /* XHTML document handler */
  496. static const fz_htdoc_format_t fz_htdoc_xhtml =
  497. {
  498. "XHTML",
  499. NULL,
  500. 1, 1, 0
  501. };
  502. static fz_document *
  503. xhtdoc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
  504. {
  505. return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_xhtml);
  506. }
  507. int xhtdoc_recognize_xhtml_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **hstate, fz_document_recognize_state_free_fn **free_state)
  508. {
  509. return recognize_html_content(ctx, handler, stream, dir, hstate, free_state, 1);
  510. }
  511. static const char *xhtdoc_extensions[] =
  512. {
  513. "xhtml",
  514. NULL
  515. };
  516. static const char *xhtdoc_mimetypes[] =
  517. {
  518. "application/xhtml+xml",
  519. NULL
  520. };
  521. fz_document_handler xhtml_document_handler =
  522. {
  523. NULL,
  524. xhtdoc_open_document,
  525. xhtdoc_extensions,
  526. xhtdoc_mimetypes,
  527. xhtdoc_recognize_xhtml_content,
  528. 1
  529. };
  530. /* FB2 document handler */
  531. static const fz_htdoc_format_t fz_htdoc_fb2 =
  532. {
  533. "FictionBook2",
  534. NULL,
  535. 1, 0, 0
  536. };
  537. static fz_document *
  538. fb2doc_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
  539. {
  540. return fz_htdoc_open_document_with_stream_and_dir(ctx, file, dir, &fz_htdoc_fb2);
  541. }
  542. static int
  543. fb2doc_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
  544. {
  545. const char *match = "<FictionBook";
  546. int pos = 0;
  547. int n = 4096;
  548. int c;
  549. if (state)
  550. *state = NULL;
  551. if (free_state)
  552. *free_state = NULL;
  553. if (stream == NULL)
  554. return 0;
  555. do
  556. {
  557. c = fz_read_byte(ctx, stream);
  558. if (c == EOF)
  559. return 0;
  560. if (c == match[pos])
  561. {
  562. pos++;
  563. if (pos == 12)
  564. return 100;
  565. }
  566. else
  567. {
  568. /* Restart matching, but recheck c against the start. */
  569. pos = (c == match[0]);
  570. }
  571. }
  572. while (--n > 0);
  573. return 0;
  574. }
  575. static const char *fb2doc_extensions[] =
  576. {
  577. "fb2",
  578. "xml",
  579. NULL
  580. };
  581. static const char *fb2doc_mimetypes[] =
  582. {
  583. "application/x-fictionbook",
  584. "application/xml",
  585. "text/xml",
  586. NULL
  587. };
  588. fz_document_handler fb2_document_handler =
  589. {
  590. NULL,
  591. fb2doc_open_document,
  592. fb2doc_extensions,
  593. fb2doc_mimetypes,
  594. fb2doc_recognize_content
  595. };
  596. /* Mobi document handler */
  597. static const fz_htdoc_format_t fz_htdoc_mobi =
  598. {
  599. "MOBI",
  600. NULL,
  601. 1, 1, 1
  602. };
  603. static fz_document *
  604. mobi_open_document_with_buffer(fz_context *ctx, fz_buffer *mobi)
  605. {
  606. fz_archive *dir = NULL;
  607. fz_buffer *html;
  608. fz_document *doc;
  609. fz_var(dir);
  610. fz_try(ctx)
  611. {
  612. dir = fz_extract_html_from_mobi(ctx, mobi);
  613. html = fz_read_archive_entry(ctx, dir, "index.html");
  614. doc = fz_htdoc_open_document_with_buffer(ctx, dir, html, &fz_htdoc_mobi);
  615. }
  616. fz_always(ctx)
  617. {
  618. fz_drop_buffer(ctx, mobi);
  619. fz_drop_archive(ctx, dir);
  620. }
  621. fz_catch(ctx)
  622. {
  623. fz_rethrow(ctx);
  624. }
  625. return doc;
  626. }
  627. static int
  628. mobi_recognize_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
  629. {
  630. char text[8];
  631. if (state)
  632. *state = NULL;
  633. if (free_state)
  634. *free_state = NULL;
  635. if (stream == NULL)
  636. return 0;
  637. fz_seek(ctx, stream, 32 + 28, SEEK_SET);
  638. if (fz_read(ctx, stream, (unsigned char *)text, 8) != 8)
  639. return 0;
  640. if (memcmp(text, "BOOKMOBI", 8) == 0)
  641. return 100;
  642. if (memcmp(text, "TEXtREAd", 8) == 0)
  643. return 100;
  644. return 0;
  645. }
  646. static fz_document *
  647. mobi_open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
  648. {
  649. return mobi_open_document_with_buffer(ctx, fz_read_all(ctx, file, 0));
  650. }
  651. static const char *mobi_extensions[] =
  652. {
  653. "mobi",
  654. "prc",
  655. "pdb",
  656. NULL
  657. };
  658. static const char *mobi_mimetypes[] =
  659. {
  660. "application/x-mobipocket-ebook",
  661. NULL
  662. };
  663. fz_document_handler mobi_document_handler =
  664. {
  665. NULL,
  666. mobi_open_document,
  667. mobi_extensions,
  668. mobi_mimetypes,
  669. mobi_recognize_content
  670. };