html-parse.c 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/ucdn.h"
  24. #include "html-imp.h"
  25. #include <string.h>
  26. #include <stdio.h>
  27. #include <assert.h>
  28. enum { T, R, B, L };
  29. #define DEFAULT_DIR FZ_BIDI_LTR
  30. static const char *html_default_css =
  31. "@page{margin:3em 2em}"
  32. "a{color:#06C;text-decoration:underline}"
  33. "address{display:block;font-style:italic}"
  34. "b{font-weight:bold}"
  35. "bdo{direction:rtl;unicode-bidi:bidi-override}"
  36. "blockquote{display:block;margin:1em 40px}"
  37. "body{display:block;margin:1em}"
  38. "cite{font-style:italic}"
  39. "code{font-family:monospace}"
  40. "dd{display:block;margin:0 0 0 40px}"
  41. "del{text-decoration:line-through}"
  42. "div{display:block}"
  43. "dl{display:block;margin:1em 0}"
  44. "dt{display:block}"
  45. "em{font-style:italic}"
  46. "h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}"
  47. "h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}"
  48. "h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}"
  49. "h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}"
  50. "h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}"
  51. "h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}"
  52. "head{display:none}"
  53. "hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}"
  54. "html{display:block}"
  55. "i{font-style:italic}"
  56. "ins{text-decoration:underline}"
  57. "kbd{font-family:monospace}"
  58. "li{display:list-item}"
  59. "menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
  60. "ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}"
  61. "p{display:block;margin:1em 0}"
  62. "pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}"
  63. "samp{font-family:monospace}"
  64. "script{display:none}"
  65. "small{font-size:0.83em}"
  66. "strong{font-weight:bold}"
  67. "style{display:none}"
  68. "sub{font-size:0.83em;vertical-align:sub}"
  69. "sup{font-size:0.83em;vertical-align:super}"
  70. "table{display:table;border-spacing:2px}"
  71. "tbody{display:table-row-group}"
  72. "td{display:table-cell;padding:1px;background-color:inherit}"
  73. "tfoot{display:table-footer-group}"
  74. "th{display:table-cell;font-weight:bold;padding:1px;text-align:center;background-color:inherit}"
  75. "thead{display:table-header-group}"
  76. "tr{display:table-row}"
  77. "ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}"
  78. "ul ul{list-style-type:circle}"
  79. "ul ul ul{list-style-type:square}"
  80. "var{font-style:italic}"
  81. "colgroup{display:table-column-group}"
  82. "col{display:table-column}"
  83. "caption{display:block;text-align:center}"
  84. ;
  85. static const char *mobi_default_css =
  86. "pagebreak{display:block;page-break-before:always}"
  87. "dl,ol,ul{margin:0}"
  88. "p{margin:0}"
  89. "blockquote{margin:0 40px}"
  90. "center{display:block;text-align:center}"
  91. "big{font-size:1.17em}"
  92. "strike{text-decoration:line-through}"
  93. ;
  94. static const char *fb2_default_css =
  95. "@page{margin:3em 2em}"
  96. "FictionBook{display:block;margin:1em}"
  97. "stylesheet,binary{display:none}"
  98. "description>*{display:none}"
  99. "description>title-info{display:block}"
  100. "description>title-info>*{display:none}"
  101. "description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}"
  102. "body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}"
  103. "image{display:block}"
  104. "p>image{display:inline}"
  105. "table{display:table}"
  106. "tr{display:table-row}"
  107. "th,td{display:table-cell}"
  108. "a{color:#06C;text-decoration:underline}"
  109. "a[type=note]{font-size:small;vertical-align:super}"
  110. "code{white-space:pre;font-family:monospace}"
  111. "emphasis{font-style:italic}"
  112. "strikethrough{text-decoration:line-through}"
  113. "strong{font-weight:bold}"
  114. "sub{font-size:small;vertical-align:sub}"
  115. "sup{font-size:small;vertical-align:super}"
  116. "image{margin:1em 0;text-align:center}"
  117. "cite,poem{margin:1em 2em}"
  118. "subtitle,epigraph,stanza{margin:1em 0}"
  119. "title>p{text-align:center;font-size:x-large}"
  120. "subtitle{text-align:center;font-size:large}"
  121. "p{margin-top:1em;text-align:justify}"
  122. "empty-line{padding-top:1em}"
  123. "p+p{margin-top:0;text-indent:1.5em}"
  124. "empty-line+p{margin-top:0}"
  125. "section>title{page-break-before:always}"
  126. ;
  127. static const char *known_html_tags[] = {
  128. // TODO: add known FB2 tags?
  129. // Sorted list of all HTML tags.
  130. "a", "abbr", "acronym", "address", "annotation-xml", "applet", "area",
  131. "article", "aside", "audio", "b", "base", "basefont", "bdi", "bdo",
  132. "bgsound", "big", "blink", "blockquote", "body", "br", "button",
  133. "canvas", "caption", "center", "cite", "code", "col", "colgroup",
  134. "data", "datalist", "dd", "del", "desc", "details", "dfn", "dir",
  135. "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure",
  136. "font", "footer", "foreignobject", "form", "frame", "frameset", "h1",
  137. "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
  138. "i", "iframe", "image", "img", "input", "ins", "isindex", "kbd",
  139. "keygen", "label", "legend", "li", "link", "listing", "main",
  140. "malignmark", "map", "mark", "marquee", "math", "menu", "menuitem",
  141. "meta", "meter", "mglyph", "mi", "mn", "mo", "ms", "mtext", "multicol",
  142. "nav", "nextid", "nobr", "noembed", "noframes", "noscript", "object",
  143. "ol", "optgroup", "option", "output", "p", "param", "plaintext", "pre",
  144. "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", "samp",
  145. "script", "section", "select", "small", "source", "spacer", "span",
  146. "strike", "strong", "style", "sub", "summary", "sup", "svg", "table",
  147. "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time",
  148. "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr", "xmp",
  149. };
  150. static const char *known_fb2_tags[] = {
  151. "FictionBook", "a", "binary", "body", "cite", "code", "coverpage",
  152. "date", "description", "emphasis", "empty-line", "epigraph", "image",
  153. "p", "poem", "section", "stanza", "strikethrough", "strong",
  154. "stylesheet", "sub", "subtitle", "sup", "table", "td", "text-author",
  155. "th", "title", "title-info", "tr", "v",
  156. };
  157. static const char *find_known_html_tag(const char *tag)
  158. {
  159. int l = 0;
  160. int r = nelem(known_html_tags) / 2 - 1;
  161. while (l <= r)
  162. {
  163. int m = (l + r) >> 1;
  164. int c = strcmp(tag, known_html_tags[m]);
  165. if (c < 0)
  166. r = m - 1;
  167. else if (c > 0)
  168. l = m + 1;
  169. else
  170. return known_html_tags[m];
  171. }
  172. return NULL;
  173. }
  174. static const char *find_known_fb2_tag(const char *tag)
  175. {
  176. int l = 0;
  177. int r = nelem(known_fb2_tags) / 2 - 1;
  178. while (l <= r)
  179. {
  180. int m = (l + r) >> 1;
  181. int c = strcmp(tag, known_fb2_tags[m]);
  182. if (c < 0)
  183. r = m - 1;
  184. else if (c > 0)
  185. l = m + 1;
  186. else
  187. return known_fb2_tags[m];
  188. }
  189. return NULL;
  190. }
  191. struct genstate
  192. {
  193. fz_pool *pool;
  194. fz_html_font_set *set;
  195. fz_archive *zip;
  196. fz_tree *images;
  197. fz_xml_doc *xml;
  198. int is_fb2;
  199. const char *base_uri;
  200. fz_css *css;
  201. int at_bol;
  202. fz_html_box *emit_white;
  203. int last_brk_cls;
  204. int list_counter;
  205. int section_depth;
  206. fz_bidi_direction markup_dir;
  207. fz_text_language markup_lang;
  208. char *href;
  209. fz_css_style_splay *styles;
  210. };
  211. static int iswhite(int c)
  212. {
  213. return c == ' ' || c == '\t' || c == '\r' || c == '\n';
  214. }
  215. static int is_all_white(const char *s)
  216. {
  217. while (*s)
  218. {
  219. if (!iswhite(*s))
  220. return 0;
  221. ++s;
  222. }
  223. return 1;
  224. }
  225. /* TODO: pool allocator for flow nodes */
  226. /* TODO: store text by pointing to a giant buffer */
  227. static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow)
  228. {
  229. while (flow)
  230. {
  231. fz_html_flow *next = flow->next;
  232. if (flow->type == FLOW_IMAGE)
  233. fz_drop_image(ctx, flow->content.image);
  234. flow = next;
  235. }
  236. }
  237. static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type, int extras)
  238. {
  239. size_t size = (type == FLOW_IMAGE ? sizeof(fz_html_flow) : offsetof(fz_html_flow, content) + extras);
  240. fz_html_flow *flow;
  241. /* Shouldn't happen, but bug 705324. */
  242. if (top == NULL || top->type != BOX_FLOW)
  243. return NULL;
  244. flow = fz_pool_alloc(ctx, pool, size);
  245. flow->type = type;
  246. flow->expand = 0;
  247. flow->bidi_level = 0;
  248. flow->markup_lang = 0;
  249. flow->breaks_line = 0;
  250. flow->box = inline_box;
  251. (*top->s.build.flow_tail) = flow;
  252. top->s.build.flow_tail = &flow->next;
  253. return flow;
  254. }
  255. static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
  256. {
  257. fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE, 0);
  258. if (flow)
  259. flow->expand = 1;
  260. }
  261. static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
  262. {
  263. (void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK, 0);
  264. }
  265. static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
  266. {
  267. (void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK, 0);
  268. }
  269. static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
  270. {
  271. (void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN, 0);
  272. }
  273. static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang)
  274. {
  275. fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD, b - a + 1);
  276. if (flow == NULL)
  277. return;
  278. memcpy(flow->content.text, a, b - a);
  279. flow->content.text[b - a] = 0;
  280. flow->markup_lang = lang;
  281. }
  282. static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img)
  283. {
  284. fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE, 0);
  285. if (flow)
  286. flow->content.image = fz_keep_image(ctx, img);
  287. }
  288. static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box)
  289. {
  290. (void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR, 0);
  291. }
  292. fz_html_flow *fz_html_split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset)
  293. {
  294. fz_html_flow *new_flow;
  295. char *text;
  296. size_t len;
  297. assert(flow->type == FLOW_WORD);
  298. if (offset == 0)
  299. return flow;
  300. text = flow->content.text;
  301. while (*text && offset)
  302. {
  303. int rune;
  304. text += fz_chartorune(&rune, text);
  305. offset--;
  306. }
  307. len = strlen(text);
  308. new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + len+1);
  309. memcpy(new_flow, flow, offsetof(fz_html_flow, content));
  310. new_flow->next = flow->next;
  311. flow->next = new_flow;
  312. strcpy(new_flow->content.text, text);
  313. *text = 0;
  314. return new_flow;
  315. }
  316. static void flush_space(fz_context *ctx, fz_html_box *flow, int lang, struct genstate *g)
  317. {
  318. static const char *space = " ";
  319. fz_pool *pool = g->pool;
  320. if (g->emit_white)
  321. {
  322. int bsp = g->emit_white->style->white_space & WS_ALLOW_BREAK_SPACE;
  323. if (!g->at_bol)
  324. {
  325. if (bsp)
  326. add_flow_space(ctx, pool, flow, g->emit_white);
  327. else
  328. add_flow_word(ctx, pool, flow, g->emit_white, space, space+1, lang);
  329. }
  330. g->emit_white = 0;
  331. }
  332. }
  333. /* pair-wise lookup table for UAX#14 linebreaks
  334. The linebreak table entries mean:
  335. ^ prohibited break
  336. never break before A and after B, even with one or more spaces in between
  337. % indirect break
  338. do not break before A, unless one or more spaces follow B
  339. _ direct break
  340. break allowed before A
  341. */
  342. static const char *pairbrk[32] =
  343. {
  344. /* -OCCQGNESIPPNAHIIHBBBZCWHHJJJREEZ- */
  345. /* -PLPULSXYSROULLDNYAB2WMJ23LVTIBMW- */
  346. /* - J- */
  347. "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */
  348. "_^^%%^^^^%%____%%%__^^^________%", /* CL close punctuation */
  349. "_^^%%^^^^%%%%%_%%%__^^^________%", /* CP close parenthesis */
  350. "^^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* QU quotation */
  351. "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* GL non-breaking glue */
  352. "_^^%%%^^^______%%%__^^^________%", /* NS nonstarters */
  353. "_^^%%%^^^______%%%__^^^________%", /* EX exclamation/interrogation */
  354. "_^^%%%^^^__%_%_%%%__^^^________%", /* SY symbols allowing break after */
  355. "_^^%%%^^^__%%%_%%%__^^^________%", /* IS infix numeric separator */
  356. "%^^%%%^^^__%%%%%%%__^^^%%%%%_%%%", /* PR prefix numeric */
  357. "%^^%%%^^^__%%%_%%%__^^^________%", /* PO postfix numeric */
  358. "%^^%%%^^^%%%%%_%%%__^^^________%", /* NU numeric */
  359. "%^^%%%^^^%%%%%_%%%__^^^________%", /* AL ordinary alphabetic and symbol characters */
  360. "%^^%%%^^^%%%%%_%%%__^^^________%", /* HL hebrew letter */
  361. "_^^%%%^^^_%____%%%__^^^________%", /* ID ideographic */
  362. "_^^%%%^^^______%%%__^^^________%", /* IN inseparable characters */
  363. "_^^%_%^^^__%___%%%__^^^________%", /* HY hyphens */
  364. "_^^%_%^^^______%%%__^^^________%", /* BA break after */
  365. "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* BB break before */
  366. "_^^%%%^^^______%%%_^^^^________%", /* B2 break opportunity before and after */
  367. "____________________^___________", /* ZW zero width space */
  368. "%^^%%%^^^%_%%%_%%%__^^^________%", /* CM combining mark */
  369. "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* WJ word joiner */
  370. "_^^%%%^^^_%____%%%__^^^___%%___%", /* H2 hangul leading/vowel syllable */
  371. "_^^%%%^^^_%____%%%__^^^____%___%", /* H3 hangul leading/vowel/trailing syllable */
  372. "_^^%%%^^^_%____%%%__^^^%%%%____%", /* JL hangul leading jamo */
  373. "_^^%%%^^^_%____%%%__^^^___%%___%", /* JV hangul vowel jamo */
  374. "_^^%%%^^^_%____%%%__^^^____%___%", /* JT hangul trailing jamo */
  375. "_^^%%%^^^______%%%__^^^_____%__%", /* RI regional indicator */
  376. "_^^%%%^^^_%____%%%__^^^_______%%", /* EB emoji base */
  377. "_^^%%%^^^_%____%%%__^^^________%", /* EM emoji modifier */
  378. "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%%%%", /* ZWJ zero width joiner */
  379. };
  380. static fz_html_box *
  381. find_flow_encloser(fz_context *ctx, fz_html_box *flow)
  382. {
  383. /* This code was written to assume that there will always be a
  384. * flow box enclosing callers of this. Bug 705324 shows that
  385. * this isn't always the case. In the absence of a reproducer
  386. * file, all I can do is try to patch around the issue so that
  387. * we won't crash. */
  388. while (flow->type != BOX_FLOW)
  389. {
  390. if (flow->up == NULL)
  391. {
  392. fz_warn(ctx, "Flow encloser not found. Please report this file!");
  393. break;
  394. }
  395. flow = flow->up;
  396. }
  397. return flow;
  398. }
  399. static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g)
  400. {
  401. fz_html_box *flow;
  402. fz_pool *pool = g->pool;
  403. int collapse = box->style->white_space & WS_COLLAPSE;
  404. int bsp = box->style->white_space & WS_ALLOW_BREAK_SPACE;
  405. int bnl = box->style->white_space & WS_FORCE_BREAK_NEWLINE;
  406. static const char *space = " ";
  407. flow = find_flow_encloser(ctx, box);
  408. if (flow == NULL)
  409. return;
  410. while (*text)
  411. {
  412. if (bnl && (*text == '\n' || *text == '\r'))
  413. {
  414. if (text[0] == '\r' && text[1] == '\n')
  415. text += 2;
  416. else
  417. text += 1;
  418. add_flow_break(ctx, pool, flow, box);
  419. g->at_bol = 1;
  420. }
  421. else if (iswhite(*text))
  422. {
  423. if (collapse)
  424. {
  425. if (bnl)
  426. while (*text == ' ' || *text == '\t')
  427. ++text;
  428. else
  429. while (iswhite(*text))
  430. ++text;
  431. g->emit_white = box;
  432. }
  433. else
  434. {
  435. // TODO: tabs
  436. if (bsp)
  437. add_flow_space(ctx, pool, flow, box);
  438. else
  439. add_flow_word(ctx, pool, flow, box, space, space+1, lang);
  440. ++text;
  441. }
  442. g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */
  443. }
  444. else
  445. {
  446. const char *prev, *mark = text;
  447. int c;
  448. flush_space(ctx, flow, lang, g);
  449. if (g->at_bol)
  450. g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ;
  451. while (*text && !iswhite(*text))
  452. {
  453. prev = text;
  454. text += fz_chartorune(&c, text);
  455. if (c == 0xAD) /* soft hyphen */
  456. {
  457. if (mark != prev)
  458. add_flow_word(ctx, pool, flow, box, mark, prev, lang);
  459. add_flow_shyphen(ctx, pool, flow, box);
  460. mark = text;
  461. g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */
  462. }
  463. else if (bsp) /* allow soft breaks */
  464. {
  465. int this_brk_cls = ucdn_get_resolved_linebreak_class(c);
  466. if (this_brk_cls <= UCDN_LINEBREAK_CLASS_ZWJ)
  467. {
  468. int brk = pairbrk[g->last_brk_cls][this_brk_cls];
  469. /* we handle spaces elsewhere, so ignore these classes */
  470. if (brk == '@') brk = '^';
  471. if (brk == '#') brk = '^';
  472. if (brk == '%') brk = '^';
  473. if (brk == '_')
  474. {
  475. if (mark != prev)
  476. add_flow_word(ctx, pool, flow, box, mark, prev, lang);
  477. add_flow_sbreak(ctx, pool, flow, box);
  478. mark = prev;
  479. }
  480. g->last_brk_cls = this_brk_cls;
  481. }
  482. }
  483. }
  484. if (mark != text)
  485. add_flow_word(ctx, pool, flow, box, mark, text, lang);
  486. g->at_bol = 0;
  487. }
  488. }
  489. }
  490. static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src)
  491. {
  492. char path[2048];
  493. fz_image *img = NULL;
  494. fz_buffer *buf = NULL;
  495. fz_var(img);
  496. fz_var(buf);
  497. fz_try(ctx)
  498. {
  499. if (!strncmp(src, "data:image/jpeg;base64,", 23))
  500. buf = fz_new_buffer_from_base64(ctx, src+23, 0);
  501. else if (!strncmp(src, "data:image/png;base64,", 22))
  502. buf = fz_new_buffer_from_base64(ctx, src+22, 0);
  503. else if (!strncmp(src, "data:image/gif;base64,", 22))
  504. buf = fz_new_buffer_from_base64(ctx, src+22, 0);
  505. else
  506. {
  507. fz_strlcpy(path, base_uri, sizeof path);
  508. fz_strlcat(path, "/", sizeof path);
  509. fz_strlcat(path, src, sizeof path);
  510. fz_urldecode(path);
  511. fz_cleanname(path);
  512. buf = fz_read_archive_entry(ctx, zip, path);
  513. }
  514. #if FZ_ENABLE_SVG
  515. if (strstr(src, ".svg"))
  516. img = fz_new_image_from_svg(ctx, buf, base_uri, zip);
  517. else
  518. #endif
  519. img = fz_new_image_from_buffer(ctx, buf);
  520. }
  521. fz_always(ctx)
  522. fz_drop_buffer(ctx, buf);
  523. fz_catch(ctx)
  524. {
  525. fz_ignore_error(ctx);
  526. fz_warn(ctx, "html: cannot load image src='%s'", src);
  527. }
  528. return img;
  529. }
  530. static fz_image *load_svg_image(fz_context *ctx, fz_archive *zip, const char *base_uri,
  531. fz_xml_doc *xmldoc, fz_xml *node)
  532. {
  533. fz_image *img = NULL;
  534. #if FZ_ENABLE_SVG
  535. fz_try(ctx)
  536. img = fz_new_image_from_svg_xml(ctx, xmldoc, node, base_uri, zip);
  537. fz_catch(ctx)
  538. {
  539. fz_ignore_error(ctx);
  540. fz_warn(ctx, "html: cannot load embedded svg document");
  541. }
  542. #endif
  543. return img;
  544. }
  545. static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g)
  546. {
  547. fz_html_box *flow;
  548. fz_pool *pool = g->pool;
  549. flow = find_flow_encloser(ctx, box);
  550. flush_space(ctx, flow, 0, g);
  551. if (!img)
  552. {
  553. const char *alt = "[image]";
  554. add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0);
  555. }
  556. else
  557. {
  558. fz_try(ctx)
  559. {
  560. add_flow_sbreak(ctx, pool, flow, box);
  561. add_flow_image(ctx, pool, flow, box, img);
  562. add_flow_sbreak(ctx, pool, flow, box);
  563. }
  564. fz_always(ctx)
  565. {
  566. fz_drop_image(ctx, img);
  567. }
  568. fz_catch(ctx)
  569. fz_rethrow(ctx);
  570. }
  571. g->at_bol = 0;
  572. }
  573. static void fz_drop_html_box(fz_context *ctx, fz_html_box *box)
  574. {
  575. while (box)
  576. {
  577. fz_html_box *next = box->next;
  578. if (box->type == BOX_FLOW)
  579. fz_drop_html_flow(ctx, box->u.flow.head);
  580. fz_drop_html_box(ctx, box->down);
  581. box = next;
  582. }
  583. }
  584. static void fz_drop_html_imp(fz_context *ctx, fz_storable *stor)
  585. {
  586. fz_html *html = (fz_html *)stor;
  587. fz_drop_html_box(ctx, html->tree.root);
  588. fz_drop_pool(ctx, html->tree.pool);
  589. }
  590. static void fz_drop_story_imp(fz_context *ctx, fz_storable *stor)
  591. {
  592. fz_story *story = (fz_story *)stor;
  593. fz_free(ctx, story->user_css);
  594. fz_drop_html_font_set(ctx, story->font_set);
  595. fz_drop_xml(ctx, story->dom);
  596. fz_drop_html_box(ctx, story->tree.root);
  597. fz_drop_buffer(ctx, story->warnings);
  598. fz_drop_archive(ctx, story->zip);
  599. /* The pool must be the last thing dropped. */
  600. fz_drop_pool(ctx, story->tree.pool);
  601. }
  602. /* Drop a structure derived from an html_tree. The exact things
  603. * freed here will depend upon the drop function with which it
  604. * was created. */
  605. static void
  606. fz_drop_html_tree(fz_context *ctx, fz_html_tree *tree)
  607. {
  608. fz_defer_reap_start(ctx);
  609. fz_drop_storable(ctx, &tree->storable);
  610. fz_defer_reap_end(ctx);
  611. }
  612. void fz_drop_html(fz_context *ctx, fz_html *html)
  613. {
  614. fz_drop_html_tree(ctx, &html->tree);
  615. }
  616. void fz_drop_story(fz_context *ctx, fz_story *story)
  617. {
  618. if (!story)
  619. return;
  620. fz_drop_html_tree(ctx, &story->tree);
  621. }
  622. fz_html *fz_keep_html(fz_context *ctx, fz_html *html)
  623. {
  624. return fz_keep_storable(ctx, &html->tree.storable);
  625. }
  626. static fz_html_box *new_box(fz_context *ctx, struct genstate *g, fz_xml *node, int type, fz_css_style *style)
  627. {
  628. fz_html_box *box;
  629. const char *tag = fz_xml_tag(node);
  630. const char *id = fz_xml_att(node, "id");
  631. const char *href;
  632. if (type == BOX_INLINE)
  633. box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u));
  634. else if (type == BOX_FLOW)
  635. box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.flow));
  636. else
  637. box = fz_pool_alloc(ctx, g->pool, offsetof(fz_html_box, u) + sizeof(box->u.block));
  638. box->type = type;
  639. box->is_first_flow = 0;
  640. box->markup_dir = g->markup_dir;
  641. box->heading = 0;
  642. box->list_item = 0;
  643. box->style = fz_css_enlist(ctx, style, &g->styles, g->pool);
  644. if (tag)
  645. {
  646. box->tag = find_known_html_tag(tag);
  647. if (!box->tag && g->is_fb2)
  648. box->tag = find_known_fb2_tag(tag);
  649. if (!box->tag)
  650. box->tag = fz_pool_strdup(ctx, g->pool, tag);
  651. }
  652. else
  653. {
  654. box->tag = "#anon";
  655. }
  656. if (id)
  657. box->id = fz_pool_strdup(ctx, g->pool, id);
  658. if (tag && tag[0]=='a' && tag[1]==0)
  659. {
  660. // Support deprecated anchor syntax with id in "name" instead of "id" attribute.
  661. if (!id)
  662. {
  663. const char *name = fz_xml_att(node, "name");
  664. if (name)
  665. box->id = fz_pool_strdup(ctx, g->pool, name);
  666. }
  667. if (g->is_fb2)
  668. {
  669. href = fz_xml_att(node, "l:href");
  670. if (!href)
  671. href = fz_xml_att(node, "xlink:href");
  672. }
  673. else
  674. {
  675. href = fz_xml_att(node, "href");
  676. }
  677. if (href)
  678. g->href = fz_pool_strdup(ctx, g->pool, href);
  679. }
  680. if (g->href)
  681. box->href = g->href;
  682. if (type == BOX_FLOW)
  683. {
  684. box->u.flow.head = NULL;
  685. box->s.build.flow_tail = &box->u.flow.head;
  686. }
  687. return box;
  688. }
  689. static void append_box(fz_context *ctx, fz_html_box *parent, fz_html_box *child)
  690. {
  691. child->up = parent;
  692. if (!parent->down)
  693. parent->down = child;
  694. if (parent->s.build.last_child)
  695. parent->s.build.last_child->next = child;
  696. parent->s.build.last_child = child;
  697. }
  698. static fz_html_box *find_block_context(fz_context *ctx, fz_html_box *box)
  699. {
  700. while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL)
  701. box = box->up;
  702. return box;
  703. }
  704. static fz_html_box *find_table_row_context(fz_context *ctx, fz_html_box *box)
  705. {
  706. fz_html_box *look = box;
  707. while (look && look->type != BOX_TABLE)
  708. look = look->up;
  709. if (look)
  710. return look;
  711. fz_warn(ctx, "table-row not inside table element");
  712. return NULL;
  713. }
  714. static fz_html_box *find_table_cell_context(fz_context *ctx, fz_html_box *box)
  715. {
  716. fz_html_box *look = box;
  717. while (look && look->type != BOX_TABLE_ROW)
  718. look = look->up;
  719. if (look)
  720. return look;
  721. fz_warn(ctx, "table-cell not inside table-row element");
  722. return NULL;
  723. }
  724. static fz_html_box *find_inline_context(fz_context *ctx, struct genstate *g, fz_html_box *box)
  725. {
  726. fz_css_style style;
  727. fz_html_box *flow_box;
  728. if (box->type == BOX_FLOW || box->type == BOX_INLINE)
  729. return box;
  730. // We have an inline element that is not in an existing flow/inline context.
  731. // Find the closest block level box to insert content into.
  732. while (box->type != BOX_BLOCK && box->type != BOX_TABLE_CELL)
  733. box = box->up;
  734. // Concatenate onto the last open flow box if we have one.
  735. if (box->s.build.last_child && box->s.build.last_child->type == BOX_FLOW)
  736. return box->s.build.last_child;
  737. // No flow box found, create and insert one!
  738. // TODO: null style instead of default for flow box?
  739. fz_default_css_style(ctx, &style);
  740. flow_box = new_box(ctx, g, NULL, BOX_FLOW, &style);
  741. flow_box->is_first_flow = !box->down;
  742. g->at_bol = 1;
  743. append_box(ctx, box, flow_box);
  744. return flow_box;
  745. }
  746. static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match);
  747. static void gen2_text(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node)
  748. {
  749. fz_html_box *anon_box;
  750. fz_css_style style;
  751. const char *text;
  752. int collapse;
  753. text = fz_xml_text(node);
  754. collapse = root_box->style->white_space & WS_COLLAPSE;
  755. if (collapse && is_all_white(text))
  756. {
  757. g->emit_white = root_box;
  758. }
  759. else
  760. {
  761. if (root_box->type != BOX_INLINE)
  762. {
  763. /* Create anonymous inline box, with the same style as the top block box. */
  764. style = *root_box->style;
  765. // Make sure not to recursively multiply font sizes
  766. style.font_size.value = 1;
  767. style.font_size.unit = N_SCALE;
  768. root_box = find_inline_context(ctx, g, root_box);
  769. anon_box = new_box(ctx, g, NULL, BOX_INLINE, &style);
  770. append_box(ctx, root_box, anon_box);
  771. root_box = anon_box;
  772. }
  773. generate_text(ctx, root_box, text, g->markup_lang, g);
  774. }
  775. }
  776. static fz_html_box *gen2_inline(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
  777. {
  778. fz_html_box *this_box;
  779. fz_html_box *flow_box;
  780. root_box = find_inline_context(ctx, g, root_box);
  781. this_box = new_box(ctx, g, node, BOX_INLINE, style);
  782. append_box(ctx, root_box, this_box);
  783. if (this_box->id)
  784. {
  785. flow_box = find_flow_encloser(ctx, this_box);
  786. add_flow_anchor(ctx, g->pool, flow_box, this_box);
  787. }
  788. return this_box;
  789. }
  790. static void gen2_break(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node)
  791. {
  792. fz_html_box *this_box;
  793. fz_html_box *flow_box;
  794. if (root_box->type != BOX_INLINE)
  795. {
  796. /* Create inline box to hold the <br> tag, with the same style as containing block. */
  797. /* Make sure not to recursively multiply font sizes. */
  798. fz_css_style style = *root_box->style;
  799. style.font_size.value = 1;
  800. style.font_size.unit = N_SCALE;
  801. this_box = new_box(ctx, g, node, BOX_INLINE, &style);
  802. append_box(ctx, find_inline_context(ctx, g, root_box), this_box);
  803. }
  804. else
  805. {
  806. this_box = root_box;
  807. }
  808. flow_box = find_flow_encloser(ctx, this_box);
  809. add_flow_break(ctx, g->pool, flow_box, this_box);
  810. g->at_bol = 1;
  811. }
  812. static fz_html_box *gen2_block(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
  813. {
  814. fz_html_box *this_box;
  815. root_box = find_block_context(ctx, root_box);
  816. this_box = new_box(ctx, g, node, BOX_BLOCK, style);
  817. append_box(ctx, root_box, this_box);
  818. return this_box;
  819. }
  820. static fz_html_box *gen2_table(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
  821. {
  822. fz_html_box *this_box;
  823. root_box = find_block_context(ctx, root_box);
  824. this_box = new_box(ctx, g, node, BOX_TABLE, style);
  825. append_box(ctx, root_box, this_box);
  826. return this_box;
  827. }
  828. static fz_html_box *gen2_table_row(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
  829. {
  830. fz_html_box *this_box, *table_box;
  831. table_box = find_table_row_context(ctx, root_box);
  832. if (!table_box)
  833. return gen2_block(ctx, g, root_box, node, style);
  834. this_box = new_box(ctx, g, node, BOX_TABLE_ROW, style);
  835. append_box(ctx, table_box, this_box);
  836. return this_box;
  837. }
  838. static fz_html_box *gen2_table_cell(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_css_style *style)
  839. {
  840. fz_html_box *this_box, *row_box;
  841. row_box = find_table_cell_context(ctx, root_box);
  842. if (!row_box)
  843. return gen2_block(ctx, g, root_box, node, style);
  844. this_box = new_box(ctx, g, node, BOX_TABLE_CELL, style);
  845. append_box(ctx, row_box, this_box);
  846. return this_box;
  847. }
  848. static void gen2_image_common(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, fz_image *img, int display, fz_css_style *style)
  849. {
  850. fz_html_box *img_block_box;
  851. fz_html_box *img_inline_box;
  852. if (display == DIS_INLINE || display == DIS_INLINE_BLOCK)
  853. {
  854. root_box = find_inline_context(ctx, g, root_box);
  855. img_inline_box = new_box(ctx, g, node, BOX_INLINE, style);
  856. append_box(ctx, root_box, img_inline_box);
  857. generate_image(ctx, img_inline_box, img, g);
  858. }
  859. else
  860. {
  861. root_box = find_block_context(ctx, root_box);
  862. img_block_box = new_box(ctx, g, node, BOX_BLOCK, style);
  863. append_box(ctx, root_box, img_block_box);
  864. root_box = find_inline_context(ctx, g, img_block_box);
  865. img_inline_box = new_box(ctx, g, NULL, BOX_INLINE, style);
  866. append_box(ctx, root_box, img_inline_box);
  867. generate_image(ctx, img_inline_box, img, g);
  868. }
  869. }
  870. static void gen2_image_html(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
  871. {
  872. const char *src = fz_xml_att(node, "src");
  873. if (src)
  874. {
  875. fz_css_style local_style = *style;
  876. fz_image *img;
  877. int w, h;
  878. const char *w_att = fz_xml_att(node, "width");
  879. const char *h_att = fz_xml_att(node, "height");
  880. if (w_att && (w = fz_atoi(w_att)) > 0)
  881. {
  882. local_style.width.value = w;
  883. local_style.width.unit = strchr(w_att, '%') ? N_PERCENT : N_LENGTH;
  884. }
  885. if (h_att && (h = fz_atoi(h_att)) > 0)
  886. {
  887. local_style.height.value = h;
  888. local_style.height.unit = strchr(h_att, '%') ? N_PERCENT : N_LENGTH;
  889. }
  890. img = load_html_image(ctx, g->zip, g->base_uri, src);
  891. gen2_image_common(ctx, g, root_box, node, img, display, &local_style);
  892. }
  893. }
  894. static void gen2_image_fb2(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
  895. {
  896. const char *src = fz_xml_att(node, "l:href");
  897. if (!src)
  898. src = fz_xml_att(node, "xlink:href");
  899. if (src && src[0] == '#')
  900. {
  901. fz_image *img = fz_tree_lookup(ctx, g->images, src+1);
  902. gen2_image_common(ctx, g, root_box, node, fz_keep_image(ctx, img), display, style);
  903. }
  904. }
  905. static void gen2_image_svg(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node, int display, fz_css_style *style)
  906. {
  907. fz_image *img = load_svg_image(ctx, g->zip, g->base_uri, g->xml, node);
  908. gen2_image_common(ctx, g, root_box, node, img, display, style);
  909. }
  910. static int get_heading_from_tag(fz_context *ctx, struct genstate *g, const char *tag)
  911. {
  912. if (tag[0] == 'h' && tag[1] != 0 && tag[2] == 0)
  913. {
  914. switch (tag[1])
  915. {
  916. case '1': return 1;
  917. case '2': return 2;
  918. case '3': return 3;
  919. case '4': return 4;
  920. case '5': return 5;
  921. case '6': return 6;
  922. }
  923. }
  924. if (g->is_fb2)
  925. {
  926. if (!strcmp(tag, "title") || !strcmp(tag, "subtitle"))
  927. return fz_mini(g->section_depth, 6);
  928. }
  929. return 0;
  930. }
  931. static void gen2_tag(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *node,
  932. fz_css_match *match, int display, fz_css_style *style)
  933. {
  934. fz_html_box *this_box;
  935. const char *tag;
  936. const char *lang_att;
  937. const char *dir_att;
  938. int save_markup_dir = g->markup_dir;
  939. int save_markup_lang = g->markup_lang;
  940. char *save_href = g->href;
  941. if (display == DIS_NONE)
  942. return;
  943. tag = fz_xml_tag(node);
  944. dir_att = fz_xml_att(node, "dir");
  945. if (dir_att)
  946. {
  947. if (!strcmp(dir_att, "auto"))
  948. g->markup_dir = FZ_BIDI_NEUTRAL;
  949. else if (!strcmp(dir_att, "rtl"))
  950. g->markup_dir = FZ_BIDI_RTL;
  951. else if (!strcmp(dir_att, "ltr"))
  952. g->markup_dir = FZ_BIDI_LTR;
  953. else
  954. g->markup_dir = DEFAULT_DIR;
  955. }
  956. lang_att = fz_xml_att(node, "lang");
  957. if (lang_att)
  958. g->markup_lang = fz_text_language_from_string(lang_att);
  959. switch (display)
  960. {
  961. case DIS_INLINE_BLOCK:
  962. // TODO handle inline block as a flow node
  963. this_box = gen2_block(ctx, g, root_box, node, style);
  964. break;
  965. case DIS_BLOCK:
  966. this_box = gen2_block(ctx, g, root_box, node, style);
  967. this_box->heading = get_heading_from_tag(ctx, g, tag);
  968. break;
  969. case DIS_LIST_ITEM:
  970. this_box = gen2_block(ctx, g, root_box, node, style);
  971. this_box->list_item = ++g->list_counter;
  972. break;
  973. // TODO: https://www.w3.org/TR/CSS2/tables.html#anonymous-boxes
  974. //
  975. // The table generation code should insert and create anonymous boxes
  976. // for any missing child/parent elements.
  977. //
  978. // MISSING CHILDREN:
  979. // 1: Wrap consecutive BLOCK found in a TABLE in an anon TABLE_ROW.
  980. // 2: Wrap consecutive BLOCK found in a TABLE_ROW in an anon TABLE_CELL.
  981. //
  982. // MISSING PARENTS:
  983. // 1: Wrap consecutive TABLE_CELL found outside TABLE_ROW in an anon TABLE_ROW
  984. // 2: Wrap consecutive TABLE_ROW found outside TABLE in an anon TABLE
  985. //
  986. // For now we ignore this and treat any such elements that are out of
  987. // context as plain block elements.
  988. case DIS_TABLE:
  989. this_box = gen2_table(ctx, g, root_box, node, style);
  990. break;
  991. case DIS_TABLE_GROUP:
  992. // no box for table-row-group elements
  993. this_box = root_box;
  994. break;
  995. case DIS_TABLE_ROW:
  996. this_box = gen2_table_row(ctx, g, root_box, node, style);
  997. break;
  998. case DIS_TABLE_CELL:
  999. this_box = gen2_table_cell(ctx, g, root_box, node, style);
  1000. break;
  1001. case DIS_INLINE:
  1002. default:
  1003. this_box = gen2_inline(ctx, g, root_box, node, style);
  1004. break;
  1005. }
  1006. if (tag && (!strcmp(tag, "ol") || !strcmp(tag, "ul") || !strcmp(tag, "dl")))
  1007. {
  1008. int save_list_counter = g->list_counter;
  1009. g->list_counter = 0;
  1010. gen2_children(ctx, g, this_box, node, match);
  1011. g->list_counter = save_list_counter;
  1012. }
  1013. else if (tag && !strcmp(tag, "section"))
  1014. {
  1015. int save_section_depth = g->section_depth;
  1016. g->section_depth++;
  1017. gen2_children(ctx, g, this_box, node, match);
  1018. g->section_depth = save_section_depth;
  1019. }
  1020. else
  1021. {
  1022. gen2_children(ctx, g, this_box, node, match);
  1023. }
  1024. g->markup_dir = save_markup_dir;
  1025. g->markup_lang = save_markup_lang;
  1026. g->href = save_href;
  1027. }
  1028. static void gen2_children(fz_context *ctx, struct genstate *g, fz_html_box *root_box, fz_xml *root_node, fz_css_match *root_match)
  1029. {
  1030. fz_xml *node;
  1031. const char *tag;
  1032. fz_css_match match;
  1033. fz_css_style style;
  1034. int display;
  1035. for (node = fz_xml_down(root_node); node; node = fz_xml_next(node))
  1036. {
  1037. tag = fz_xml_tag(node);
  1038. if (tag)
  1039. {
  1040. fz_match_css(ctx, &match, root_match, g->css, node);
  1041. fz_apply_css_style(ctx, g->set, &style, &match);
  1042. display = fz_get_css_match_display(&match);
  1043. if (tag[0]=='b' && tag[1]=='r' && tag[2]==0)
  1044. {
  1045. gen2_break(ctx, g, root_box, node);
  1046. }
  1047. else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0)
  1048. {
  1049. gen2_image_html(ctx, g, root_box, node, display, &style);
  1050. }
  1051. else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0)
  1052. {
  1053. gen2_image_fb2(ctx, g, root_box, node, display, &style);
  1054. }
  1055. else if (tag[0]=='s' && tag[1]=='v' && tag[2]=='g' && tag[3]==0)
  1056. {
  1057. gen2_image_svg(ctx, g, root_box, node, display, &style);
  1058. }
  1059. else
  1060. {
  1061. gen2_tag(ctx, g, root_box, node, &match, display, &style);
  1062. }
  1063. }
  1064. else
  1065. {
  1066. gen2_text(ctx, g, root_box, node);
  1067. }
  1068. }
  1069. }
  1070. static char *concat_text(fz_context *ctx, fz_xml *root)
  1071. {
  1072. fz_xml *node;
  1073. size_t i = 0, n = 1;
  1074. char *s;
  1075. for (node = fz_xml_down(root); node; node = fz_xml_next(node))
  1076. {
  1077. const char *text = fz_xml_text(node);
  1078. n += text ? strlen(text) : 0;
  1079. }
  1080. s = Memento_label(fz_malloc(ctx, n), "concat_html");
  1081. for (node = fz_xml_down(root); node; node = fz_xml_next(node))
  1082. {
  1083. const char *text = fz_xml_text(node);
  1084. if (text)
  1085. {
  1086. n = strlen(text);
  1087. memcpy(s+i, text, n);
  1088. i += n;
  1089. }
  1090. }
  1091. s[i] = 0;
  1092. return s;
  1093. }
  1094. static void
  1095. html_load_css_link(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root, const char *href)
  1096. {
  1097. char path[2048];
  1098. char css_base_uri[2048];
  1099. fz_buffer *buf;
  1100. fz_var(buf);
  1101. fz_strlcpy(path, base_uri, sizeof path);
  1102. fz_strlcat(path, "/", sizeof path);
  1103. fz_strlcat(path, href, sizeof path);
  1104. fz_urldecode(path);
  1105. fz_cleanname(path);
  1106. fz_dirname(css_base_uri, path, sizeof css_base_uri);
  1107. buf = NULL;
  1108. fz_try(ctx)
  1109. {
  1110. buf = fz_read_archive_entry(ctx, zip, path);
  1111. fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path);
  1112. fz_add_css_font_faces(ctx, set, zip, css_base_uri, css);
  1113. }
  1114. fz_always(ctx)
  1115. fz_drop_buffer(ctx, buf);
  1116. fz_catch(ctx)
  1117. {
  1118. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1119. fz_report_error(ctx);
  1120. fz_warn(ctx, "ignoring stylesheet %s", path);
  1121. }
  1122. }
  1123. static void
  1124. html_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
  1125. {
  1126. fz_xml *html, *head, *node;
  1127. html = fz_xml_find(root, "html");
  1128. head = fz_xml_find_down(html, "head");
  1129. for (node = fz_xml_down(head); node; node = fz_xml_next(node))
  1130. {
  1131. if (fz_xml_is_tag(node, "link"))
  1132. {
  1133. char *rel = fz_xml_att(node, "rel");
  1134. if (rel && !fz_strcasecmp(rel, "stylesheet"))
  1135. {
  1136. char *type = fz_xml_att(node, "type");
  1137. if ((type && !strcmp(type, "text/css")) || !type)
  1138. {
  1139. char *href = fz_xml_att(node, "href");
  1140. if (href)
  1141. {
  1142. html_load_css_link(ctx, set, zip, base_uri, css, root, href);
  1143. }
  1144. }
  1145. }
  1146. }
  1147. else if (fz_xml_is_tag(node, "style"))
  1148. {
  1149. char *s = concat_text(ctx, node);
  1150. fz_try(ctx)
  1151. {
  1152. fz_parse_css(ctx, css, s, "<style>");
  1153. fz_add_css_font_faces(ctx, set, zip, base_uri, css);
  1154. }
  1155. fz_always(ctx)
  1156. fz_free(ctx, s);
  1157. fz_catch(ctx)
  1158. {
  1159. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1160. fz_report_error(ctx);
  1161. fz_warn(ctx, "ignoring inline stylesheet");
  1162. }
  1163. }
  1164. }
  1165. }
  1166. static void
  1167. fb2_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root)
  1168. {
  1169. fz_xml *fictionbook, *stylesheet;
  1170. fictionbook = fz_xml_find(root, "FictionBook");
  1171. stylesheet = fz_xml_find_down(fictionbook, "stylesheet");
  1172. if (stylesheet)
  1173. {
  1174. char *s = concat_text(ctx, stylesheet);
  1175. fz_try(ctx)
  1176. {
  1177. fz_parse_css(ctx, css, s, "<stylesheet>");
  1178. fz_add_css_font_faces(ctx, set, zip, base_uri, css);
  1179. }
  1180. fz_catch(ctx)
  1181. {
  1182. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1183. fz_report_error(ctx);
  1184. fz_warn(ctx, "ignoring inline stylesheet");
  1185. }
  1186. fz_free(ctx, s);
  1187. }
  1188. }
  1189. static fz_tree *
  1190. load_fb2_images(fz_context *ctx, fz_xml *root)
  1191. {
  1192. fz_xml *fictionbook, *binary;
  1193. fz_tree *images = NULL;
  1194. fictionbook = fz_xml_find(root, "FictionBook");
  1195. for (binary = fz_xml_find_down(fictionbook, "binary"); binary; binary = fz_xml_find_next(binary, "binary"))
  1196. {
  1197. const char *id = fz_xml_att(binary, "id");
  1198. char *b64 = NULL;
  1199. fz_buffer *buf = NULL;
  1200. fz_image *img = NULL;
  1201. fz_var(b64);
  1202. fz_var(buf);
  1203. if (id == NULL)
  1204. {
  1205. fz_warn(ctx, "Skipping image with no id");
  1206. continue;
  1207. }
  1208. fz_try(ctx)
  1209. {
  1210. b64 = concat_text(ctx, binary);
  1211. buf = fz_new_buffer_from_base64(ctx, b64, strlen(b64));
  1212. img = fz_new_image_from_buffer(ctx, buf);
  1213. }
  1214. fz_always(ctx)
  1215. {
  1216. fz_drop_buffer(ctx, buf);
  1217. fz_free(ctx, b64);
  1218. }
  1219. fz_catch(ctx)
  1220. fz_rethrow(ctx);
  1221. images = fz_tree_insert(ctx, images, id, img);
  1222. }
  1223. return images;
  1224. }
  1225. typedef struct
  1226. {
  1227. uint32_t *data;
  1228. size_t cap;
  1229. size_t len;
  1230. } uni_buf;
  1231. typedef struct
  1232. {
  1233. fz_context *ctx;
  1234. fz_pool *pool;
  1235. fz_html_flow *flow;
  1236. uni_buf *buffer;
  1237. } bidi_data;
  1238. static void fragment_cb(const uint32_t *fragment,
  1239. size_t fragment_len,
  1240. int bidi_level,
  1241. int script,
  1242. void *arg)
  1243. {
  1244. bidi_data *data = (bidi_data *)arg;
  1245. /* We are guaranteed that fragmentOffset will be at the beginning
  1246. * of flow. */
  1247. while (fragment_len > 0)
  1248. {
  1249. size_t len;
  1250. if (data->flow->type == FLOW_SPACE)
  1251. {
  1252. len = 1;
  1253. }
  1254. else if (data->flow->type == FLOW_BREAK || data->flow->type == FLOW_SBREAK ||
  1255. data->flow->type == FLOW_SHYPHEN || data->flow->type == FLOW_ANCHOR)
  1256. {
  1257. len = 0;
  1258. }
  1259. else
  1260. {
  1261. /* Must be text */
  1262. len = fz_utflen(data->flow->content.text);
  1263. if (len > fragment_len)
  1264. {
  1265. /* We need to split this flow box */
  1266. (void)fz_html_split_flow(data->ctx, data->pool, data->flow, fragment_len);
  1267. len = fz_utflen(data->flow->content.text);
  1268. }
  1269. }
  1270. /* This flow box is entirely contained within this fragment. */
  1271. data->flow->bidi_level = bidi_level;
  1272. data->flow->script = script;
  1273. data->flow = data->flow->next;
  1274. fragment_len -= len;
  1275. }
  1276. }
  1277. static fz_bidi_direction
  1278. detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction bidi_dir, fz_html_flow *flow)
  1279. {
  1280. fz_html_flow *end = flow;
  1281. bidi_data data;
  1282. while (end)
  1283. {
  1284. unsigned int level = end->bidi_level;
  1285. /* Gather the text from the flow up into a single buffer (at
  1286. * least, as much of it as has the same direction markup). */
  1287. buffer->len = 0;
  1288. while (end && (level & 1) == (end->bidi_level & 1))
  1289. {
  1290. size_t len = 0;
  1291. const char *text = "";
  1292. int broken = 0;
  1293. switch (end->type)
  1294. {
  1295. case FLOW_WORD:
  1296. len = fz_utflen(end->content.text);
  1297. text = end->content.text;
  1298. break;
  1299. case FLOW_SPACE:
  1300. len = 1;
  1301. text = " ";
  1302. break;
  1303. case FLOW_SHYPHEN:
  1304. case FLOW_SBREAK:
  1305. break;
  1306. case FLOW_BREAK:
  1307. case FLOW_IMAGE:
  1308. broken = 1;
  1309. break;
  1310. }
  1311. end = end->next;
  1312. if (broken)
  1313. break;
  1314. /* Make sure the buffer is large enough */
  1315. if (buffer->len + len > buffer->cap)
  1316. {
  1317. size_t newcap = buffer->cap;
  1318. if (newcap < 128)
  1319. newcap = 128; /* Sensible small default */
  1320. while (newcap < buffer->len + len)
  1321. newcap = (newcap * 3) / 2;
  1322. buffer->data = fz_realloc_array(ctx, buffer->data, newcap, uint32_t);
  1323. buffer->cap = newcap;
  1324. }
  1325. /* Expand the utf8 text into Unicode and store it in the buffer */
  1326. while (*text)
  1327. {
  1328. int rune;
  1329. text += fz_chartorune(&rune, text);
  1330. buffer->data[buffer->len++] = rune;
  1331. }
  1332. }
  1333. /* Detect directionality for the buffer */
  1334. data.ctx = ctx;
  1335. data.pool = pool;
  1336. data.flow = flow;
  1337. data.buffer = buffer;
  1338. fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, fragment_cb, &data, 0 /* Flags */);
  1339. flow = end;
  1340. }
  1341. return bidi_dir;
  1342. }
  1343. static void
  1344. detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html_box *box)
  1345. {
  1346. while (box)
  1347. {
  1348. if (box->type == BOX_FLOW)
  1349. box->markup_dir = detect_flow_directionality(ctx, pool, buffer, box->markup_dir, box->u.flow.head);
  1350. detect_box_directionality(ctx, pool, buffer, box->down);
  1351. box = box->next;
  1352. }
  1353. }
  1354. static void
  1355. detect_directionality(fz_context *ctx, fz_pool *pool, fz_html_box *box)
  1356. {
  1357. uni_buf buffer = { NULL };
  1358. fz_try(ctx)
  1359. detect_box_directionality(ctx, pool, &buffer, box);
  1360. fz_always(ctx)
  1361. fz_free(ctx, buffer.data);
  1362. fz_catch(ctx)
  1363. fz_rethrow(ctx);
  1364. }
  1365. static fz_xml_doc *
  1366. parse_to_xml(fz_context *ctx, fz_buffer *buf, int try_xml, int try_html5)
  1367. {
  1368. fz_xml_doc *xml;
  1369. if (try_xml && try_html5)
  1370. {
  1371. fz_try(ctx)
  1372. xml = fz_parse_xml(ctx, buf, 1);
  1373. fz_catch(ctx)
  1374. {
  1375. if (fz_caught(ctx) == FZ_ERROR_SYNTAX)
  1376. {
  1377. fz_report_error(ctx);
  1378. fz_warn(ctx, "syntax error in XHTML; retrying using HTML5 parser");
  1379. xml = fz_parse_xml_from_html5(ctx, buf);
  1380. }
  1381. else
  1382. fz_rethrow(ctx);
  1383. }
  1384. }
  1385. else if (try_xml)
  1386. xml = fz_parse_xml(ctx, buf, 1);
  1387. else
  1388. {
  1389. assert(try_html5);
  1390. xml = fz_parse_xml_from_html5(ctx, buf);
  1391. }
  1392. return xml;
  1393. }
  1394. static void move_background_color_style_up(fz_context *ctx, struct genstate *g, fz_html_box *root, fz_html_box *from)
  1395. {
  1396. fz_css_color transparent = { 0, 0, 0, 0 };
  1397. fz_css_style s1, s2;
  1398. memcpy(&s1, root->style, sizeof s1);
  1399. memcpy(&s2, from->style, sizeof s2);
  1400. s1.background_color = s2.background_color;
  1401. s2.background_color = transparent;
  1402. root->style = fz_css_enlist(ctx, &s1, &g->styles, g->pool);
  1403. from->style = fz_css_enlist(ctx, &s2, &g->styles, g->pool);
  1404. }
  1405. static void move_background_color_up(fz_context *ctx, struct genstate *g, fz_html_box *root)
  1406. {
  1407. fz_html_box *html, *body;
  1408. if (root->style->background_color.a != 0)
  1409. {
  1410. return;
  1411. }
  1412. html = root->down;
  1413. if (html && !strcmp(html->tag, "html"))
  1414. {
  1415. if (html->style->background_color.a != 0)
  1416. {
  1417. move_background_color_style_up(ctx, g, root, html);
  1418. return;
  1419. }
  1420. body = html->down;
  1421. if (body && !strcmp(body->tag, "body"))
  1422. {
  1423. if (body->style->background_color.a != 0)
  1424. {
  1425. move_background_color_style_up(ctx, g, root, body);
  1426. return;
  1427. }
  1428. }
  1429. }
  1430. }
  1431. static void
  1432. xml_to_boxes(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, const char *user_css,
  1433. fz_xml_doc *xml, fz_html_tree *tree, char **rtitle, int try_fictionbook, int is_mobi)
  1434. {
  1435. fz_xml *root, *node;
  1436. char *title;
  1437. fz_css_match root_match, match;
  1438. struct genstate g = {0};
  1439. g.pool = NULL;
  1440. g.set = set;
  1441. g.zip = zip;
  1442. g.images = NULL;
  1443. g.xml = xml;
  1444. g.is_fb2 = 0;
  1445. g.base_uri = base_uri;
  1446. g.css = NULL;
  1447. g.at_bol = 0;
  1448. g.emit_white = 0;
  1449. g.last_brk_cls = UCDN_LINEBREAK_CLASS_OP;
  1450. g.list_counter = 0;
  1451. g.section_depth = 0;
  1452. g.markup_dir = FZ_BIDI_LTR;
  1453. g.markup_lang = FZ_LANG_UNSET;
  1454. g.href = NULL;
  1455. g.styles = NULL;
  1456. if (rtitle)
  1457. *rtitle = NULL;
  1458. root = fz_xml_root(g.xml);
  1459. g.css = fz_new_css(ctx);
  1460. #ifndef NDEBUG
  1461. if (fz_atoi(getenv("FZ_DEBUG_XML")))
  1462. fz_debug_xml(root, 0);
  1463. #endif
  1464. fz_try(ctx)
  1465. {
  1466. if (try_fictionbook && fz_xml_find(root, "FictionBook"))
  1467. {
  1468. g.is_fb2 = 1;
  1469. fz_parse_css(ctx, g.css, fb2_default_css, "<default:fb2>");
  1470. if (fz_use_document_css(ctx))
  1471. fb2_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
  1472. g.images = load_fb2_images(ctx, root);
  1473. }
  1474. else if (is_mobi)
  1475. {
  1476. g.is_fb2 = 0;
  1477. fz_parse_css(ctx, g.css, html_default_css, "<default:html>");
  1478. fz_parse_css(ctx, g.css, mobi_default_css, "<default:mobi>");
  1479. if (fz_use_document_css(ctx))
  1480. html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
  1481. }
  1482. else
  1483. {
  1484. g.is_fb2 = 0;
  1485. fz_parse_css(ctx, g.css, html_default_css, "<default:html>");
  1486. if (fz_use_document_css(ctx))
  1487. html_load_css(ctx, g.set, g.zip, g.base_uri, g.css, root);
  1488. }
  1489. if (user_css)
  1490. {
  1491. fz_parse_css(ctx, g.css, user_css, "<user>");
  1492. fz_add_css_font_faces(ctx, g.set, g.zip, ".", g.css);
  1493. }
  1494. }
  1495. fz_catch(ctx)
  1496. {
  1497. fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image);
  1498. fz_drop_css(ctx, g.css);
  1499. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  1500. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1501. fz_report_error(ctx);
  1502. fz_warn(ctx, "ignoring styles");
  1503. g.css = fz_new_css(ctx);
  1504. g.images = NULL;
  1505. }
  1506. #ifndef NDEBUG
  1507. if (fz_atoi(getenv("FZ_DEBUG_CSS")))
  1508. fz_debug_css(ctx, g.css);
  1509. #endif
  1510. fz_try(ctx)
  1511. {
  1512. fz_css_style style;
  1513. int display;
  1514. fz_match_css_at_page(ctx, &root_match, g.css);
  1515. fz_apply_css_style(ctx, g.set, &style, &root_match);
  1516. g.pool = tree->pool;
  1517. g.markup_dir = DEFAULT_DIR;
  1518. g.markup_lang = FZ_LANG_UNSET;
  1519. // Create root node
  1520. tree->root = new_box(ctx, &g, NULL, BOX_BLOCK, &style);
  1521. // TODO: transfer page margins out of this hacky box
  1522. tree->root->tag = ":root";
  1523. tree->root->s.layout.em = 0;
  1524. tree->root->s.layout.x = 0;
  1525. tree->root->s.layout.y = 0;
  1526. tree->root->s.layout.w = 0;
  1527. tree->root->s.layout.b = 0;
  1528. // Create document node (html).
  1529. fz_match_css(ctx, &match, &root_match, g.css, root);
  1530. fz_apply_css_style(ctx, g.set, &style, &match);
  1531. display = fz_get_css_match_display(&match);
  1532. gen2_tag(ctx, &g, tree->root, root, &match, display, &style);
  1533. detect_directionality(ctx, g.pool, tree->root);
  1534. if (g.is_fb2)
  1535. {
  1536. node = fz_xml_find(root, "FictionBook");
  1537. node = fz_xml_find_down(node, "description");
  1538. node = fz_xml_find_down(node, "title-info");
  1539. node = fz_xml_find_down(node, "book-title");
  1540. if (rtitle)
  1541. {
  1542. title = fz_xml_text(fz_xml_down(node));
  1543. if (title)
  1544. *rtitle = fz_pool_strdup(ctx, g.pool, title);
  1545. }
  1546. }
  1547. else
  1548. {
  1549. node = fz_xml_find(root, "html");
  1550. node = fz_xml_find_down(node, "head");
  1551. node = fz_xml_find_down(node, "title");
  1552. if (rtitle)
  1553. {
  1554. title = fz_xml_text(fz_xml_down(node));
  1555. if (title)
  1556. *rtitle = fz_pool_strdup(ctx, g.pool, title);
  1557. }
  1558. // Move html or body background-color to :root.
  1559. move_background_color_up(ctx, &g, tree->root);
  1560. }
  1561. }
  1562. fz_always(ctx)
  1563. {
  1564. fz_drop_tree(ctx, g.images, (void(*)(fz_context*,void*))fz_drop_image);
  1565. fz_drop_css(ctx, g.css);
  1566. }
  1567. fz_catch(ctx)
  1568. {
  1569. if (rtitle)
  1570. {
  1571. fz_free(ctx, *rtitle);
  1572. *rtitle = NULL;
  1573. }
  1574. fz_rethrow(ctx);
  1575. }
  1576. }
  1577. static const char *mobi_font_size[7] = {
  1578. "0.67em",
  1579. "0.83em",
  1580. "1em",
  1581. "1.17em",
  1582. "1.33em",
  1583. "1.5em",
  1584. "1.67em",
  1585. };
  1586. static void
  1587. patch_mobi_html(fz_context *ctx, fz_pool *pool, fz_xml *node)
  1588. {
  1589. fz_xml *down;
  1590. char buf[500];
  1591. while (node)
  1592. {
  1593. char *tag = fz_xml_tag(node);
  1594. if (tag)
  1595. {
  1596. // Read MOBI attributes, convert to inline CSS style
  1597. if (!strcmp(tag, "font"))
  1598. {
  1599. const char *size = fz_xml_att(node, "size");
  1600. if (size)
  1601. {
  1602. if (!strcmp(size, "1")) size = mobi_font_size[0];
  1603. else if (!strcmp(size, "2")) size = mobi_font_size[1];
  1604. else if (!strcmp(size, "3")) size = mobi_font_size[2];
  1605. else if (!strcmp(size, "4")) size = mobi_font_size[3];
  1606. else if (!strcmp(size, "5")) size = mobi_font_size[4];
  1607. else if (!strcmp(size, "6")) size = mobi_font_size[5];
  1608. else if (!strcmp(size, "7")) size = mobi_font_size[6];
  1609. else if (!strcmp(size, "+1")) size = mobi_font_size[3];
  1610. else if (!strcmp(size, "+2")) size = mobi_font_size[4];
  1611. else if (!strcmp(size, "+3")) size = mobi_font_size[5];
  1612. else if (!strcmp(size, "+4")) size = mobi_font_size[6];
  1613. else if (!strcmp(size, "+5")) size = mobi_font_size[6];
  1614. else if (!strcmp(size, "+6")) size = mobi_font_size[6];
  1615. else if (!strcmp(size, "-1")) size = mobi_font_size[1];
  1616. else if (!strcmp(size, "-2")) size = mobi_font_size[0];
  1617. else if (!strcmp(size, "-3")) size = mobi_font_size[0];
  1618. else if (!strcmp(size, "-4")) size = mobi_font_size[0];
  1619. else if (!strcmp(size, "-5")) size = mobi_font_size[0];
  1620. else if (!strcmp(size, "-6")) size = mobi_font_size[0];
  1621. fz_snprintf(buf, sizeof buf, "font-size:%s", size);
  1622. fz_xml_add_att(ctx, pool, node, "style", buf);
  1623. }
  1624. }
  1625. else
  1626. {
  1627. char *height = fz_xml_att(node, "height");
  1628. char *width = fz_xml_att(node, "width");
  1629. char *align = fz_xml_att(node, "align");
  1630. if (height || width || align)
  1631. {
  1632. buf[0] = 0;
  1633. if (height)
  1634. {
  1635. fz_strlcat(buf, "margin-top:", sizeof buf);
  1636. fz_strlcat(buf, height, sizeof buf);
  1637. fz_strlcat(buf, ";", sizeof buf);
  1638. }
  1639. if (width)
  1640. {
  1641. fz_strlcat(buf, "text-indent:", sizeof buf);
  1642. fz_strlcat(buf, width, sizeof buf);
  1643. fz_strlcat(buf, ";", sizeof buf);
  1644. }
  1645. if (align)
  1646. {
  1647. fz_strlcat(buf, "text-align:", sizeof buf);
  1648. fz_strlcat(buf, align, sizeof buf);
  1649. fz_strlcat(buf, ";", sizeof buf);
  1650. }
  1651. fz_xml_add_att(ctx, pool, node, "style", buf);
  1652. }
  1653. if (!strcmp(tag, "img"))
  1654. {
  1655. char *recindex = fz_xml_att(node, "recindex");
  1656. if (recindex)
  1657. fz_xml_add_att(ctx, pool, node, "src", recindex);
  1658. }
  1659. }
  1660. }
  1661. down = fz_xml_down(node);
  1662. if (down)
  1663. patch_mobi_html(ctx, pool, down);
  1664. node = fz_xml_next(node);
  1665. }
  1666. }
  1667. static void
  1668. fz_parse_html_tree(fz_context *ctx,
  1669. fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css,
  1670. int try_xml, int try_html5, fz_html_tree *tree, char **rtitle, int try_fictionbook, int patch_mobi)
  1671. {
  1672. fz_xml_doc *xml;
  1673. if (rtitle)
  1674. *rtitle = NULL;
  1675. xml = parse_to_xml(ctx, buf, try_xml, try_html5);
  1676. if (patch_mobi)
  1677. patch_mobi_html(ctx, xml->u.doc.pool, xml);
  1678. fz_try(ctx)
  1679. xml_to_boxes(ctx, set, zip, base_uri, user_css, xml, tree, rtitle, try_fictionbook, patch_mobi);
  1680. fz_always(ctx)
  1681. fz_drop_xml(ctx, xml);
  1682. fz_catch(ctx)
  1683. fz_rethrow(ctx);
  1684. }
  1685. #define fz_new_derived_html_tree(CTX, TYPE, DROP) \
  1686. ((TYPE *)Memento_label(fz_new_html_tree_of_size(CTX, sizeof(TYPE), DROP), #TYPE))
  1687. static fz_html_tree *
  1688. fz_new_html_tree_of_size(fz_context *ctx, size_t size, fz_store_drop_fn *drop)
  1689. {
  1690. fz_pool *pool = fz_new_pool(ctx);
  1691. fz_html_tree *tree;
  1692. fz_try(ctx)
  1693. {
  1694. tree = fz_pool_alloc(ctx, pool, size);
  1695. FZ_INIT_STORABLE(tree, 1, drop);
  1696. tree->pool = pool;
  1697. }
  1698. fz_catch(ctx)
  1699. {
  1700. fz_drop_pool(ctx, pool);
  1701. fz_rethrow(ctx);
  1702. }
  1703. return tree;
  1704. }
  1705. fz_html *
  1706. fz_parse_html(fz_context *ctx,
  1707. fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css,
  1708. int try_xml, int try_html5, int patch_mobi)
  1709. {
  1710. fz_html *html = fz_new_derived_html_tree(ctx, fz_html, fz_drop_html_imp);
  1711. html->layout_w = 0;
  1712. html->layout_h = 0;
  1713. html->layout_em = 0;
  1714. fz_try(ctx)
  1715. fz_parse_html_tree(ctx, set, zip, base_uri, buf, user_css, try_xml, try_html5, &html->tree, &html->title, 1, patch_mobi);
  1716. fz_catch(ctx)
  1717. {
  1718. fz_drop_html(ctx, html);
  1719. fz_rethrow(ctx);
  1720. }
  1721. return html;
  1722. }
  1723. typedef struct
  1724. {
  1725. int saved;
  1726. fz_warning_cb *old;
  1727. void *arg;
  1728. fz_buffer *buffer;
  1729. fz_context *ctx;
  1730. } warning_save;
  1731. static void
  1732. warn_to_buffer(void *user, const char *message)
  1733. {
  1734. warning_save *save = (warning_save *)user;
  1735. fz_context *ctx = save->ctx;
  1736. fz_try(ctx)
  1737. {
  1738. fz_append_string(ctx, save->buffer, message);
  1739. fz_append_byte(ctx, save->buffer, '\n');
  1740. }
  1741. fz_catch(ctx)
  1742. {
  1743. /* Silently swallow the error. */
  1744. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1745. fz_report_error(ctx);
  1746. }
  1747. }
  1748. static void
  1749. redirect_warnings_to_buffer(fz_context *ctx, fz_buffer *buf, warning_save *save)
  1750. {
  1751. save->saved = 1;
  1752. save->old = fz_warning_callback(ctx, &save->arg);
  1753. save->buffer = buf;
  1754. save->ctx = ctx;
  1755. fz_flush_warnings(ctx);
  1756. fz_set_warning_callback(ctx, warn_to_buffer, save);
  1757. }
  1758. static void
  1759. restore_warnings(fz_context *ctx, warning_save *save)
  1760. {
  1761. if (!save->saved)
  1762. return;
  1763. fz_flush_warnings(ctx);
  1764. fz_set_warning_callback(ctx, save->old, save->arg);
  1765. }
  1766. fz_story *
  1767. fz_new_story(fz_context *ctx, fz_buffer *buf, const char *user_css, float em, fz_archive *zip)
  1768. {
  1769. fz_story *story = fz_new_derived_html_tree(ctx, fz_story, fz_drop_story_imp);
  1770. warning_save saved = { 0 };
  1771. fz_buffer *local_buffer = NULL;
  1772. if (buf == NULL)
  1773. {
  1774. local_buffer = fz_new_buffer(ctx, 0);
  1775. buf = local_buffer;
  1776. }
  1777. fz_var(local_buffer);
  1778. fz_var(saved);
  1779. fz_try(ctx)
  1780. {
  1781. story->zip = fz_keep_archive(ctx, zip);
  1782. story->font_set = fz_new_html_font_set(ctx);
  1783. story->em = em;
  1784. story->user_css = user_css ? fz_strdup(ctx, user_css) : NULL;
  1785. story->warnings = fz_new_buffer(ctx, 128);
  1786. redirect_warnings_to_buffer(ctx, story->warnings, &saved);
  1787. story->dom = parse_to_xml(ctx, buf, 0, 1);
  1788. }
  1789. fz_always(ctx)
  1790. {
  1791. restore_warnings(ctx, &saved);
  1792. fz_drop_buffer(ctx, local_buffer);
  1793. }
  1794. fz_catch(ctx)
  1795. {
  1796. fz_drop_html_tree(ctx, &story->tree);
  1797. fz_rethrow(ctx);
  1798. }
  1799. return story;
  1800. }
  1801. fz_html *
  1802. fz_parse_xhtml(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css)
  1803. {
  1804. /* try as XML first, fall back to HTML5 */
  1805. return fz_parse_html(ctx, set, zip, base_uri, buf, user_css, 1, 1, 0);
  1806. }
  1807. static void indent(int level)
  1808. {
  1809. while (level-- > 0)
  1810. putchar('\t');
  1811. }
  1812. static void
  1813. fz_debug_html_flow(fz_context *ctx, fz_html_flow *flow, int level)
  1814. {
  1815. fz_html_box *sbox = NULL;
  1816. while (flow)
  1817. {
  1818. if (flow->box != sbox) {
  1819. sbox = flow->box;
  1820. indent(level);
  1821. #ifndef NDEBUG
  1822. printf("@style <%s> em=%g font='%s'", sbox->tag, sbox->s.layout.em, fz_font_name(ctx, sbox->style->font));
  1823. #else
  1824. printf("@style em=%g font='%s'", sbox->s.layout.em, fz_font_name(ctx, sbox->style->font));
  1825. #endif
  1826. if (fz_font_is_serif(ctx, sbox->style->font))
  1827. printf(" serif");
  1828. else
  1829. printf(" sans");
  1830. if (fz_font_is_monospaced(ctx, sbox->style->font))
  1831. printf(" monospaced");
  1832. if (fz_font_is_bold(ctx, sbox->style->font))
  1833. printf(" bold");
  1834. if (fz_font_is_italic(ctx, sbox->style->font))
  1835. printf(" italic");
  1836. if (sbox->style->small_caps)
  1837. printf(" small-caps");
  1838. printf("\n");
  1839. }
  1840. indent(level);
  1841. switch (flow->type) {
  1842. case FLOW_WORD: printf("word "); break;
  1843. case FLOW_SPACE: printf("space"); break;
  1844. case FLOW_SBREAK: printf("sbrk "); break;
  1845. case FLOW_SHYPHEN: printf("shy "); break;
  1846. case FLOW_BREAK: printf("break"); break;
  1847. case FLOW_IMAGE: printf("image"); break;
  1848. case FLOW_ANCHOR: printf("anchor"); break;
  1849. }
  1850. // printf(" y=%g x=%g w=%g", flow->y, flow->x, flow->w);
  1851. if (flow->type == FLOW_IMAGE)
  1852. printf(" h=%g", flow->h);
  1853. if (flow->type == FLOW_WORD)
  1854. printf(" text='%s'", flow->content.text);
  1855. printf("\n");
  1856. if (flow->breaks_line) {
  1857. indent(level);
  1858. printf("*\n");
  1859. }
  1860. flow = flow->next;
  1861. }
  1862. }
  1863. fz_structure fz_html_tag_to_structure(const char *tag)
  1864. {
  1865. if (!strcmp(tag, "body")) return FZ_STRUCTURE_DOCUMENT;
  1866. if (!strcmp(tag, "div")) return FZ_STRUCTURE_DIV;
  1867. if (!strcmp(tag, "span")) return FZ_STRUCTURE_SPAN;
  1868. if (!strcmp(tag, "blockquote")) return FZ_STRUCTURE_BLOCKQUOTE;
  1869. if (!strcmp(tag, "p")) return FZ_STRUCTURE_P;
  1870. if (!strcmp(tag, "h1")) return FZ_STRUCTURE_H1;
  1871. if (!strcmp(tag, "h2")) return FZ_STRUCTURE_H2;
  1872. if (!strcmp(tag, "h3")) return FZ_STRUCTURE_H3;
  1873. if (!strcmp(tag, "h4")) return FZ_STRUCTURE_H4;
  1874. if (!strcmp(tag, "h5")) return FZ_STRUCTURE_H5;
  1875. if (!strcmp(tag, "h6")) return FZ_STRUCTURE_H6;
  1876. if (!strcmp(tag, "ol")) return FZ_STRUCTURE_LIST;
  1877. if (!strcmp(tag, "ul")) return FZ_STRUCTURE_LIST;
  1878. if (!strcmp(tag, "dl")) return FZ_STRUCTURE_LIST;
  1879. if (!strcmp(tag, "li")) return FZ_STRUCTURE_LISTITEM;
  1880. if (!strcmp(tag, "table")) return FZ_STRUCTURE_TABLE;
  1881. if (!strcmp(tag, "tr")) return FZ_STRUCTURE_TR;
  1882. if (!strcmp(tag, "th")) return FZ_STRUCTURE_TH;
  1883. if (!strcmp(tag, "td")) return FZ_STRUCTURE_TD;
  1884. if (!strcmp(tag, "thead")) return FZ_STRUCTURE_THEAD;
  1885. if (!strcmp(tag, "tbody")) return FZ_STRUCTURE_TBODY;
  1886. if (!strcmp(tag, "tfoot")) return FZ_STRUCTURE_TFOOT;
  1887. return FZ_STRUCTURE_INVALID;
  1888. }
  1889. static void
  1890. fz_debug_html_box(fz_context *ctx, fz_html_box *box, int level)
  1891. {
  1892. while (box)
  1893. {
  1894. indent(level);
  1895. printf("box ");
  1896. switch (box->type) {
  1897. case BOX_BLOCK: printf("block"); break;
  1898. case BOX_FLOW: printf("flow"); break;
  1899. case BOX_INLINE: printf("inline"); break;
  1900. case BOX_TABLE: printf("table"); break;
  1901. case BOX_TABLE_ROW: printf("table-row"); break;
  1902. case BOX_TABLE_CELL: printf("table-cell"); break;
  1903. }
  1904. printf(" <%s>", box->tag);
  1905. // printf(" em=%g", box->em);
  1906. // printf(" x=%g y=%g w=%g b=%g", box->x, box->y, box->w, box->b);
  1907. if (box->is_first_flow)
  1908. printf(" is-first-flow");
  1909. if (box->list_item)
  1910. printf(" list=%d", box->list_item);
  1911. if (box->id)
  1912. printf(" id=(%s)", box->id);
  1913. if (box->href)
  1914. printf(" href=(%s)", box->href);
  1915. printf("\n");
  1916. if (box->type == BOX_BLOCK || box->type == BOX_TABLE) {
  1917. indent(level+1);
  1918. printf(">margin=(%g %g %g %g)\n", box->u.block.margin[0], box->u.block.margin[1], box->u.block.margin[2], box->u.block.margin[3]);
  1919. //indent(level+1);
  1920. //printf(">padding=(%g %g %g %g)\n", box->u.block.padding[0], box->u.block.padding[1], box->u.block.padding[2], box->u.block.padding[3]);
  1921. //indent(level+1);
  1922. //printf(">border=(%g %g %g %g)\n", box->u.block.border[0], box->u.block.border[1], box->u.block.border[2], box->u.block.border[3]);
  1923. }
  1924. if (box->down)
  1925. fz_debug_html_box(ctx, box->down, level + 1);
  1926. if (box->type == BOX_FLOW) {
  1927. indent(level+1);
  1928. printf("flow\n");
  1929. fz_debug_html_flow(ctx, box->u.flow.head, level + 2);
  1930. }
  1931. box = box->next;
  1932. }
  1933. }
  1934. void
  1935. fz_debug_html(fz_context *ctx, fz_html_box *box)
  1936. {
  1937. fz_debug_html_box(ctx, box, 0);
  1938. }
  1939. static size_t
  1940. fz_html_size(fz_context *ctx, fz_html *html)
  1941. {
  1942. return html ? fz_pool_size(ctx, html->tree.pool) : 0;
  1943. }
  1944. /* Magic to make html storable. */
  1945. typedef struct {
  1946. int refs;
  1947. void *doc;
  1948. int chapter_num;
  1949. } fz_html_key;
  1950. static int
  1951. fz_make_hash_html_key(fz_context *ctx, fz_store_hash *hash, void *key_)
  1952. {
  1953. fz_html_key *key = (fz_html_key *)key_;
  1954. hash->u.pi.ptr = key->doc;
  1955. hash->u.pi.i = key->chapter_num;
  1956. return 1;
  1957. }
  1958. static void *
  1959. fz_keep_html_key(fz_context *ctx, void *key_)
  1960. {
  1961. fz_html_key *key = (fz_html_key *)key_;
  1962. return fz_keep_imp(ctx, key, &key->refs);
  1963. }
  1964. static void
  1965. fz_drop_html_key(fz_context *ctx, void *key_)
  1966. {
  1967. fz_html_key *key = (fz_html_key *)key_;
  1968. if (fz_drop_imp(ctx, key, &key->refs))
  1969. {
  1970. fz_free(ctx, key);
  1971. }
  1972. }
  1973. static int
  1974. fz_cmp_html_key(fz_context *ctx, void *k0_, void *k1_)
  1975. {
  1976. fz_html_key *k0 = (fz_html_key *)k0_;
  1977. fz_html_key *k1 = (fz_html_key *)k1_;
  1978. return k0->doc == k1->doc && k0->chapter_num == k1->chapter_num;
  1979. }
  1980. static void
  1981. fz_format_html_key(fz_context *ctx, char *s, size_t n, void *key_)
  1982. {
  1983. fz_html_key *key = (fz_html_key *)key_;
  1984. fz_snprintf(s, n, "(html doc=%p, ch=%d)", key->doc, key->chapter_num);
  1985. }
  1986. static const fz_store_type fz_html_store_type =
  1987. {
  1988. "fz_html",
  1989. fz_make_hash_html_key,
  1990. fz_keep_html_key,
  1991. fz_drop_html_key,
  1992. fz_cmp_html_key,
  1993. fz_format_html_key,
  1994. NULL
  1995. };
  1996. fz_html *fz_store_html(fz_context *ctx, fz_html *html, void *doc, int chapter)
  1997. {
  1998. fz_html_key *key = NULL;
  1999. fz_html *other_html;
  2000. /* Stick the parsed html in the store */
  2001. fz_var(key);
  2002. fz_try(ctx)
  2003. {
  2004. key = fz_malloc_struct(ctx, fz_html_key);
  2005. key->refs = 1;
  2006. key->doc = doc;
  2007. key->chapter_num = chapter;
  2008. other_html = fz_store_item(ctx, key, html, fz_html_size(ctx, html), &fz_html_store_type);
  2009. if (other_html)
  2010. {
  2011. fz_drop_html(ctx, html);
  2012. html = other_html;
  2013. }
  2014. }
  2015. fz_always(ctx)
  2016. fz_drop_html_key(ctx, key);
  2017. fz_catch(ctx)
  2018. {
  2019. /* Do nothing */
  2020. }
  2021. return html;
  2022. }
  2023. fz_html *fz_find_html(fz_context *ctx, void *doc, int chapter)
  2024. {
  2025. fz_html_key key;
  2026. key.refs = 1;
  2027. key.doc = doc;
  2028. key.chapter_num = chapter;
  2029. return fz_find_item(ctx, &fz_drop_html_imp, &key, &fz_html_store_type);
  2030. }
  2031. static int
  2032. html_filter_store(fz_context *ctx, void *doc, void *key_)
  2033. {
  2034. fz_html_key *key = (fz_html_key *)key_;
  2035. return (doc == key->doc);
  2036. }
  2037. void fz_purge_stored_html(fz_context *ctx, void *doc)
  2038. {
  2039. fz_filter_store(ctx, html_filter_store, doc, &fz_html_store_type);
  2040. }
  2041. static void
  2042. convert_to_boxes(fz_context *ctx, fz_story *story)
  2043. {
  2044. warning_save saved = { 0 };
  2045. if (story->dom == NULL)
  2046. return;
  2047. fz_var(saved);
  2048. fz_try(ctx)
  2049. {
  2050. redirect_warnings_to_buffer(ctx, story->warnings, &saved);
  2051. xml_to_boxes(ctx, story->font_set, story->zip, ".", story->user_css, story->dom, &story->tree, NULL, 0, 0);
  2052. }
  2053. fz_always(ctx)
  2054. {
  2055. fz_drop_xml(ctx, story->dom);
  2056. story->dom = NULL;
  2057. restore_warnings(ctx, &saved);
  2058. }
  2059. fz_catch(ctx)
  2060. fz_rethrow(ctx);
  2061. }
  2062. int fz_place_story(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled)
  2063. {
  2064. return fz_place_story_flags(ctx, story, where, filled, 0);
  2065. }
  2066. int fz_place_story_flags(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled, int flags)
  2067. {
  2068. float w, h;
  2069. if (filled)
  2070. *filled = fz_empty_rect;
  2071. if (story == NULL || story->complete)
  2072. return 0;
  2073. /* Convert from XML to box model on the first attempt to place.
  2074. * The DOM is unusable from here on in. */
  2075. convert_to_boxes(ctx, story);
  2076. w = where.x1 - where.x0;
  2077. h = where.y1 - where.y0;
  2078. /* Confusingly, we call the layout using restart_draw, not restart_place,
  2079. * because we don't want to destroy the current values in restart_place
  2080. * in case we have to retry later. This means the values are left in
  2081. * the correct struct though! */
  2082. story->restart_draw.start = story->restart_place.start;
  2083. story->restart_draw.start_flow = story->restart_place.start_flow;
  2084. story->restart_draw.end = NULL;
  2085. story->restart_draw.end_flow = NULL;
  2086. story->restart_draw.reason = FZ_HTML_RESTART_REASON_NONE;
  2087. story->restart_draw.flags = flags;
  2088. story->bbox = where;
  2089. fz_restartable_layout_html(ctx, &story->tree, where.x0, where.y0, w, h, story->em, &story->restart_draw);
  2090. story->restart_draw.start = story->restart_place.start;
  2091. story->restart_draw.start_flow = story->restart_place.start_flow;
  2092. if (filled)
  2093. {
  2094. fz_html_box *b = story->tree.root;
  2095. filled->x0 = b->s.layout.x - b->u.block.margin[L] - b->u.block.border[L] - b->u.block.padding[L];
  2096. filled->x1 = b->s.layout.w + b->u.block.margin[R] + b->u.block.border[R] + b->u.block.padding[R] + b->s.layout.x;
  2097. filled->y0 = b->s.layout.y - b->u.block.margin[T] - b->u.block.border[T] - b->u.block.padding[T];
  2098. filled->y1 = b->s.layout.b + b->u.block.margin[B] + b->u.block.border[B] + b->u.block.padding[B];
  2099. }
  2100. #ifndef NDEBUG
  2101. if (fz_atoi(getenv("FZ_DEBUG_HTML")))
  2102. fz_debug_html(ctx, story->tree.root);
  2103. #endif
  2104. if (story->restart_draw.end == NULL)
  2105. return FZ_HTML_RESTART_REASON_NONE;
  2106. if (story->restart_draw.reason == FZ_HTML_RESTART_REASON_LINE_WIDTH)
  2107. return FZ_HTML_RESTART_REASON_LINE_WIDTH;
  2108. return FZ_HTML_RESTART_REASON_LINE_HEIGHT;
  2109. }
  2110. const char *
  2111. fz_story_warnings(fz_context *ctx, fz_story *story)
  2112. {
  2113. unsigned char *data;
  2114. if (!story)
  2115. return NULL;
  2116. convert_to_boxes(ctx, story);
  2117. fz_terminate_buffer(ctx, story->warnings);
  2118. if (fz_buffer_storage(ctx, story->warnings, &data) == 0)
  2119. return NULL;
  2120. return (const char *)data;
  2121. }