| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #define SUBSCRIPT_OFFSET 0.2f
- #define SUPERSCRIPT_OFFSET -0.2f
- #include <ft2build.h>
- #include FT_FREETYPE_H
- // Text black color when converted from DeviceCMYK to RGB
- #define CMYK_BLACK 0x221f1f
- static void
- scale_run(fz_context *ctx, fz_stext_block *block, float scale)
- {
- fz_matrix m = fz_scale(scale, scale);
- fz_stext_line *line;
- fz_stext_char *ch;
- while (block)
- {
- block->bbox = fz_transform_rect(block->bbox, m);
- switch (block->type)
- {
- case FZ_STEXT_BLOCK_TEXT:
- for (line = block->u.t.first_line; line; line = line->next)
- {
- line->bbox = fz_transform_rect(block->bbox, m);
- for (ch = line->first_char; ch; ch = ch->next)
- {
- ch->origin = fz_transform_point(ch->origin, m);
- ch->quad = fz_transform_quad(ch->quad, m);
- ch->size = ch->size * scale;
- }
- }
- break;
- case FZ_STEXT_BLOCK_IMAGE:
- block->u.i.transform = fz_post_scale(block->u.i.transform, scale, scale);
- break;
- case FZ_STEXT_BLOCK_STRUCT:
- if (block->u.s.down)
- scale_run(ctx, block->u.s.down->first_block, scale);
- break;
- }
- block = block->next;
- }
- }
- static void fz_scale_stext_page(fz_context *ctx, fz_stext_page *page, float scale)
- {
- scale_run(ctx, page->first_block, scale);
- }
- /* HTML output (visual formatting with preserved layout) */
- static int
- detect_super_script(fz_stext_line *line, fz_stext_char *ch)
- {
- if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
- return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
- return 0;
- }
- static const char *
- font_full_name(fz_context *ctx, fz_font *font)
- {
- const char *name = fz_font_name(ctx, font);
- const char *s = strchr(name, '+');
- return s ? s + 1 : name;
- }
- static const char *
- html_clean_font_name(const char *fontname)
- {
- if (strstr(fontname, "Times"))
- return "Times New Roman";
- if (strstr(fontname, "Arial") || strstr(fontname, "Helvetica"))
- {
- if (strstr(fontname, "Narrow") || strstr(fontname, "Condensed"))
- return "Arial Narrow";
- return "Arial";
- }
- if (strstr(fontname, "Courier"))
- return "Courier";
- return fontname;
- }
- static void
- font_family_name(fz_context *ctx, fz_font *font, char *buf, int size, int is_mono, int is_serif)
- {
- const char *name = html_clean_font_name(font_full_name(ctx, font));
- char *s;
- fz_strlcpy(buf, name, size);
- s = strrchr(buf, '-');
- if (s)
- *s = 0;
- if (is_mono)
- fz_strlcat(buf, ",monospace", size);
- else
- fz_strlcat(buf, is_serif ? ",serif" : ",sans-serif", size);
- }
- static void
- fz_print_style_begin_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup, int color)
- {
- char family[80];
- int is_bold = fz_font_is_bold(ctx, font);
- int is_italic = fz_font_is_italic(ctx, font);
- int is_serif = fz_font_is_serif(ctx, font);
- int is_mono = fz_font_is_monospaced(ctx, font);
- font_family_name(ctx, font, family, sizeof family, is_mono, is_serif);
- if (sup) fz_write_string(ctx, out, "<sup>");
- if (is_mono) fz_write_string(ctx, out, "<tt>");
- if (is_bold) fz_write_string(ctx, out, "<b>");
- if (is_italic) fz_write_string(ctx, out, "<i>");
- fz_write_printf(ctx, out, "<span style=\"font-family:%s;font-size:%.1fpt", family, size);
- if (color != 0 && color != CMYK_BLACK)
- fz_write_printf(ctx, out, ";color:#%06x", color & 0xffffff);
- fz_write_printf(ctx, out, "\">");
- }
- static void
- fz_print_style_end_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup, int color)
- {
- int is_mono = fz_font_is_monospaced(ctx, font);
- int is_bold = fz_font_is_bold(ctx,font);
- int is_italic = fz_font_is_italic(ctx, font);
- fz_write_string(ctx, out, "</span>");
- if (is_italic) fz_write_string(ctx, out, "</i>");
- if (is_bold) fz_write_string(ctx, out, "</b>");
- if (is_mono) fz_write_string(ctx, out, "</tt>");
- if (sup) fz_write_string(ctx, out, "</sup>");
- }
- static void
- fz_print_stext_image_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- fz_matrix ctm = block->u.i.transform;
- #define USE_CSS_MATRIX_TRANSFORMS
- #ifdef USE_CSS_MATRIX_TRANSFORMS
- /* Matrix maths notes.
- * When we get here ctm maps the unit square to the position in device
- * space occupied by the image.
- *
- * That is to say that mapping the 4 corners of the unit square through
- * the transform, give us the 4 target corners. We extend the corners
- * by adding an extra '1' into them to allow transforms to work. Thus
- * (x,y) maps through ctm = (a b c d e f) as:
- *
- * (x y 1) (a b 0) = (X Y 1)
- * (c d 0)
- * (e f 1)
- *
- * To simplify reading of matrix maths, we use the trick where we
- * 'drop' the first matrix down the page. Thus the corners c0=(0,0),
- * c1=(1,0), c2=(1,1), c3=(0,1) map to C0, C1, C2, C3 respectively:
- *
- * ( a b 0)
- * ( c d 0)
- * ( e f 1)
- * (0 0 1) ( e f 1)
- * (0 1 1) ( c+e d+f 1)
- * (1 1 1) (a+c+e b+d+f 1)
- * (1 0 1) ( a+e b+f 1)
- *
- * where C0 = (e,f), C1=(c+e, d+f) C2=(a+c+e, b+d+f), C3=(a+e, b+f)
- *
- * Unfortunately, the CSS matrix transform, does not map the unit square.
- * Rather it does something moderately mad. As far as I can work out, the
- * top left corner of a (0,0) -> (w, h) box is transformed using the .e
- * and .f entries of the matrix. Then the image from within that square
- * is transformed using the centre of that square as the origin.
- *
- * So, an image placed at (0,0) in destination space with 1:1 transform
- * will result in an image a (0,0) as you'd expect. But an image at (0,0)
- * with a scale of 2, will result in 25% of the image off the left of the
- * screen, and 25% off the top.
- *
- * Accordingly, we have to adjust the ctm in several steps.
- */
- /* Move to moving the centre of the image. */
- ctm.e += (ctm.a+ctm.c)/2;
- ctm.f += (ctm.b+ctm.d)/2;
- /* Move from transforming the unit square to w/h */
- ctm.a /= block->u.i.image->w;
- ctm.b /= block->u.i.image->w;
- ctm.c /= block->u.i.image->h;
- ctm.d /= block->u.i.image->h;
- /* Move from points to pixels */
- ctm.a *= 96.0f/72;
- ctm.b *= 96.0f/72;
- ctm.c *= 96.0f/72;
- ctm.d *= 96.0f/72;
- ctm.e *= 96.0f/72;
- ctm.f *= 96.0f/72;
- /* Move to moving the top left of the untransformed image box, cos HTML is bonkers. */
- ctm.e -= block->u.i.image->w/2;
- ctm.f -= block->u.i.image->h/2;
- fz_write_printf(ctx, out, "<img style=\"position:absolute;transform:matrix(%g,%g,%g,%g,%g,%g)\" src=\"",
- ctm.a, ctm.b, ctm.c, ctm.d, ctm.e, ctm.f);
- #else
- /* Alternative version of the code that uses scaleX/Y and rotate
- * instead, but only copes with axis aligned cases. */
- int t;
- int x = block->bbox.x0;
- int y = block->bbox.y0;
- int w = block->bbox.x1 - block->bbox.x0;
- int h = block->bbox.y1 - block->bbox.y0;
- const char *flip = "";
- if (ctm.b == 0 && ctm.c == 0)
- {
- if (ctm.a < 0 && ctm.d < 0)
- flip = "transform: scaleX(-1) scaleY(-1);";
- else if (ctm.a < 0)
- {
- flip = "transform: scaleX(-1);";
- }
- else if (ctm.d < 0)
- {
- flip = "transform: scaleY(-1);";
- }
- } else if (ctm.a == 0 && ctm.d == 0) {
- if (ctm.b < 0 && ctm.c < 0)
- {
- flip = "transform: scaleY(-1) rotate(90deg);";
- x += (w-h)/2;
- y -= (w-h)/2;
- t = w; w = h; h = t;
- }
- else if (ctm.b < 0)
- {
- flip = "transform: scaleX(-1) scaleY(-1) rotate(90deg);";
- x += (w-h)/2;
- y -= (w-h)/2;
- t = w; w = h; h = t;
- }
- else if (ctm.c < 0)
- {
- flip = "transform: scaleX(-1) scaleY(-1) rotate(270deg);";
- x += (w-h)/2;
- y -= (w-h)/2;
- t = w; w = h; h = t;
- }
- else
- {
- flip = "transform: scaleY(-1) rotate(270deg);";
- x += (w-h)/2;
- y -= (w-h)/2;
- t = w; w = h; h = t;
- }
- }
- fz_write_printf(ctx, out, "<img style=\"position:absolute;%stop:%dpt;left:%dpt;width:%dpt;height:%dpt\" src=\"", flip, y, x, w, h);
- #endif
- fz_write_image_as_data_uri(ctx, out, block->u.i.image);
- fz_write_string(ctx, out, "\">\n");
- }
- void
- fz_print_stext_block_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- fz_stext_line *line;
- fz_stext_char *ch;
- float x, y, h;
- fz_font *font = NULL;
- float size = 0;
- int sup = 0;
- uint32_t color = 0;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- x = line->bbox.x0;
- y = line->bbox.y0;
- h = line->bbox.y1 - line->bbox.y0;
- if (line->first_char)
- {
- h = line->first_char->size;
- y = line->first_char->origin.y - h * 0.8f;
- }
- fz_write_printf(ctx, out, "<p style=\"top:%.1fpt;left:%.1fpt;line-height:%.1fpt\">", y, x, h);
- font = NULL;
- for (ch = line->first_char; ch; ch = ch->next)
- {
- int ch_sup = detect_super_script(line, ch);
- if (ch->font != font || ch->size != size || ch_sup != sup || ch->argb != color)
- {
- if (font)
- fz_print_style_end_html(ctx, out, font, size, sup, color);
- font = ch->font;
- size = ch->size;
- color = ch->argb;
- sup = ch_sup;
- fz_print_style_begin_html(ctx, out, font, size, sup, color);
- }
- switch (ch->c)
- {
- default:
- if (ch->c >= 32 && ch->c <= 127)
- fz_write_byte(ctx, out, ch->c);
- else
- fz_write_printf(ctx, out, "&#x%x;", ch->c);
- break;
- case '<': fz_write_string(ctx, out, "<"); break;
- case '>': fz_write_string(ctx, out, ">"); break;
- case '&': fz_write_string(ctx, out, "&"); break;
- case '"': fz_write_string(ctx, out, """); break;
- case '\'': fz_write_string(ctx, out, "'"); break;
- }
- }
- if (font)
- fz_print_style_end_html(ctx, out, font, size, sup, color);
- fz_write_string(ctx, out, "</p>\n");
- }
- }
- static const char *
- html_tag_for_struct(fz_stext_struct *s)
- {
- const char *raw;
- if (s == NULL)
- return "DIV";
- raw = s->raw;
- if (raw == NULL)
- raw = fz_structure_to_string(s->standard);
- if (!fz_strcasecmp(raw, "blockquote"))
- return "blockquote";
- if (!fz_strcasecmp(raw, "title"))
- return "h1";
- if (!fz_strcasecmp(raw, "sub"))
- return "sub";
- if (!fz_strcasecmp(raw, "p"))
- return "p";
- if (!fz_strcasecmp(raw, "h"))
- return "h1"; /* Pick one! */
- if (!fz_strcasecmp(raw, "h1"))
- return "h1";
- if (!fz_strcasecmp(raw, "h2"))
- return "h2";
- if (!fz_strcasecmp(raw, "h3"))
- return "h3";
- if (!fz_strcasecmp(raw, "h4"))
- return "h4";
- if (!fz_strcasecmp(raw, "h5"))
- return "h5";
- if (!fz_strcasecmp(raw, "h6"))
- return "h6";
- if (!fz_strcasecmp(raw, "list"))
- return "ul";
- if (!fz_strcasecmp(raw, "listitem"))
- return "li";
- if (!fz_strcasecmp(raw, "table"))
- return "table";
- if (!fz_strcasecmp(raw, "tr"))
- return "tr";
- if (!fz_strcasecmp(raw, "th"))
- return "th";
- if (!fz_strcasecmp(raw, "td"))
- return "td";
- if (!fz_strcasecmp(raw, "thead"))
- return "thead";
- if (!fz_strcasecmp(raw, "tbody"))
- return "tbody";
- if (!fz_strcasecmp(raw, "tfoot"))
- return "tfoot";
- if (!fz_strcasecmp(raw, "span"))
- return "span";
- if (!fz_strcasecmp(raw, "code"))
- return "code";
- if (!fz_strcasecmp(raw, "em"))
- return "em";
- if (!fz_strcasecmp(raw, "strong"))
- return "strong";
- return "div";
- }
- static void
- print_blocks_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block);
- static void
- fz_print_stext_struct_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- const char *tag;
- if (block->u.s.down == NULL)
- return;
- tag = html_tag_for_struct(block->u.s.down);
- fz_write_printf(ctx, out, "<%s>\n", tag);
- print_blocks_as_html(ctx, out, block->u.s.down->first_block);
- fz_write_printf(ctx, out, "</%s>\n", tag);
- }
- static void
- print_blocks_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- for (; block; block = block->next)
- {
- if (block->type == FZ_STEXT_BLOCK_IMAGE)
- fz_print_stext_image_as_html(ctx, out, block);
- else if (block->type == FZ_STEXT_BLOCK_TEXT)
- fz_print_stext_block_as_html(ctx, out, block);
- else if (block->type == FZ_STEXT_BLOCK_STRUCT)
- fz_print_stext_struct_as_html(ctx, out, block);
- }
- }
- void
- fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
- {
- float w = page->mediabox.x1 - page->mediabox.x0;
- float h = page->mediabox.y1 - page->mediabox.y0;
- fz_write_printf(ctx, out, "<div id=\"page%d\" style=\"width:%.1fpt;height:%.1fpt\">\n", id, w, h);
- print_blocks_as_html(ctx, out, page->first_block);
- fz_write_string(ctx, out, "</div>\n");
- }
- void
- fz_print_stext_header_as_html(fz_context *ctx, fz_output *out)
- {
- fz_write_string(ctx, out, "<!DOCTYPE html>\n");
- fz_write_string(ctx, out, "<html>\n");
- fz_write_string(ctx, out, "<head>\n");
- fz_write_string(ctx, out, "<style>\n");
- fz_write_string(ctx, out, "body{background-color:slategray}\n");
- fz_write_string(ctx, out, "div{position:relative;background-color:white;margin:1em auto;box-shadow:1px 1px 8px -2px black}\n");
- fz_write_string(ctx, out, "p{position:absolute;white-space:pre;margin:0}\n");
- fz_write_string(ctx, out, "</style>\n");
- fz_write_string(ctx, out, "</head>\n");
- fz_write_string(ctx, out, "<body>\n");
- }
- void
- fz_print_stext_trailer_as_html(fz_context *ctx, fz_output *out)
- {
- fz_write_string(ctx, out, "</body>\n");
- fz_write_string(ctx, out, "</html>\n");
- }
- /* XHTML output (semantic, little layout, suitable for reflow) */
- static void
- find_table_pos(fz_stext_grid_positions *xs, float x0, float x1, int *ix0, int *ix1)
- {
- int i;
- *ix0 = -1;
- *ix1 = -1;
- for (i = 1; i < xs->len; i++)
- if (x0 < xs->list[i].pos)
- {
- *ix0 = i-1;
- break;
- }
- for (; i < xs->len; i++)
- if (x1 < xs->list[i].pos)
- {
- *ix1 = i-1;
- break;
- }
- if (i == xs->len)
- *ix1 = i-1;
- }
- static void
- run_to_xhtml(fz_context *ctx, fz_stext_block *block, fz_output *out);
- static void
- fz_print_stext_table_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- fz_stext_block *grid, *tr, *td;
- int w, h;
- int x, y;
- uint8_t *cells;
- int malformed = 0;
- for (grid = block; grid != NULL; grid = grid->next)
- if (grid->type == FZ_STEXT_BLOCK_GRID)
- break;
- if (grid == NULL)
- {
- fz_warn(ctx, "Malformed table data");
- return;
- }
- w = grid->u.b.xs->len;
- h = grid->u.b.ys->len;
- cells = fz_calloc(ctx, w, h);
- fz_try(ctx)
- {
- fz_write_printf(ctx, out, "<table>\n");
- y = 0;
- for (tr = grid->next; tr != NULL; tr = tr->next)
- {
- if (tr->type != FZ_STEXT_BLOCK_STRUCT || tr->u.s.down == NULL || tr->u.s.down->standard != FZ_STRUCTURE_TR)
- {
- malformed = 1;
- continue;
- }
- fz_write_printf(ctx, out, "<tr>\n");
- x = 0;
- for (td = tr->u.s.down->first_block; td != NULL; td = td->next)
- {
- int x0, y0, x1, y1;
- if (td->type != FZ_STEXT_BLOCK_STRUCT || td->u.s.down == NULL || td->u.s.down->standard != FZ_STRUCTURE_TD)
- {
- malformed = 1;
- continue;
- }
- find_table_pos(grid->u.b.xs, td->bbox.x0, td->bbox.x1, &x0, &x1);
- find_table_pos(grid->u.b.ys, td->bbox.y0, td->bbox.y1, &y0, &y1);
- if (x0 < 0 || x1 < 0 || x1 >= w)
- {
- malformed = 1;
- x0 = x;
- x1 = x+1;
- }
- if (y0 < 0 || y1 < 0 || y1 >= h)
- {
- malformed = 1;
- y0 = y;
- y1 = y+1;
- }
- if (y < y0)
- {
- malformed = 1;
- continue;
- }
- if (x > x0)
- {
- malformed = 1;
- }
- while (x < x0)
- {
- uint8_t *c = &cells[x + w*y];
- if (*c == 0)
- {
- fz_write_printf(ctx, out, "<td></td>");
- *c = 1;
- }
- x++;
- }
- fz_write_string(ctx, out, "<td");
- if (x1 > x0+1)
- fz_write_printf(ctx, out, " rowspan=%d", x1-x0);
- if (y1 > y0+1)
- fz_write_printf(ctx, out, " colspan=%d", y1-y0);
- fz_write_string(ctx, out, ">\n");
- run_to_xhtml(ctx, td->u.s.down->first_block, out);
- fz_write_printf(ctx, out, "</td>\n");
- for ( ; y0 < y1; y0++)
- for (x = x0; x < x1; x++)
- {
- uint8_t *c = &cells[x + w*y0];
- if (*c != 0)
- malformed = 1;
- *c = 1;
- }
- }
- fz_write_printf(ctx, out, "</tr>\n");
- y++;
- }
- fz_write_printf(ctx, out, "</table>\n");
- }
- fz_always(ctx)
- fz_free(ctx, cells);
- fz_catch(ctx)
- fz_rethrow(ctx);
- if (malformed)
- fz_warn(ctx, "Malformed table data");
- }
- static void
- fz_print_stext_image_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- int w = block->bbox.x1 - block->bbox.x0;
- int h = block->bbox.y1 - block->bbox.y0;
- fz_write_printf(ctx, out, "<p><img width=\"%d\" height=\"%d\" src=\"", w, h);
- fz_write_image_as_data_uri(ctx, out, block->u.i.image);
- fz_write_string(ctx, out, "\"/></p>\n");
- }
- static void
- fz_print_style_begin_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup)
- {
- int is_mono = fz_font_is_monospaced(ctx, font);
- int is_bold = fz_font_is_bold(ctx, font);
- int is_italic = fz_font_is_italic(ctx, font);
- if (sup)
- fz_write_string(ctx, out, "<sup>");
- if (is_mono)
- fz_write_string(ctx, out, "<tt>");
- if (is_bold)
- fz_write_string(ctx, out, "<b>");
- if (is_italic)
- fz_write_string(ctx, out, "<i>");
- }
- static void
- fz_print_style_end_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup)
- {
- int is_mono = fz_font_is_monospaced(ctx, font);
- int is_bold = fz_font_is_bold(ctx, font);
- int is_italic = fz_font_is_italic(ctx, font);
- if (is_italic)
- fz_write_string(ctx, out, "</i>");
- if (is_bold)
- fz_write_string(ctx, out, "</b>");
- if (is_mono)
- fz_write_string(ctx, out, "</tt>");
- if (sup)
- fz_write_string(ctx, out, "</sup>");
- }
- static float avg_font_size_of_line(fz_stext_char *ch)
- {
- float size = 0;
- int n = 0;
- if (!ch)
- return 0;
- while (ch)
- {
- size += ch->size;
- ++n;
- ch = ch->next;
- }
- return size / n;
- }
- static const char *tag_from_font_size(float size)
- {
- if (size >= 20) return "h1";
- if (size >= 15) return "h2";
- if (size >= 12) return "h3";
- return "p";
- }
- static void fz_print_stext_block_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- fz_stext_line *line;
- fz_stext_char *ch;
- fz_font *font = NULL;
- int sup = 0;
- int sp = 1;
- const char *tag = NULL;
- const char *new_tag;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- new_tag = tag_from_font_size(avg_font_size_of_line(line->first_char));
- if (tag != new_tag)
- {
- if (tag)
- {
- if (font)
- fz_print_style_end_xhtml(ctx, out, font, sup);
- fz_write_printf(ctx, out, "</%s>", tag);
- }
- tag = new_tag;
- fz_write_printf(ctx, out, "<%s>", tag);
- if (font)
- fz_print_style_begin_xhtml(ctx, out, font, sup);
- }
- if (!sp)
- fz_write_byte(ctx, out, ' ');
- for (ch = line->first_char; ch; ch = ch->next)
- {
- int ch_sup = detect_super_script(line, ch);
- if (ch->font != font || ch_sup != sup)
- {
- if (font)
- fz_print_style_end_xhtml(ctx, out, font, sup);
- font = ch->font;
- sup = ch_sup;
- fz_print_style_begin_xhtml(ctx, out, font, sup);
- }
- sp = (ch->c == ' ');
- switch (ch->c)
- {
- default:
- if (ch->c >= 32 && ch->c <= 127)
- fz_write_byte(ctx, out, ch->c);
- else
- fz_write_printf(ctx, out, "&#x%x;", ch->c);
- break;
- case '<': fz_write_string(ctx, out, "<"); break;
- case '>': fz_write_string(ctx, out, ">"); break;
- case '&': fz_write_string(ctx, out, "&"); break;
- case '"': fz_write_string(ctx, out, """); break;
- case '\'': fz_write_string(ctx, out, "'"); break;
- }
- }
- }
- if (font)
- fz_print_style_end_xhtml(ctx, out, font, sup);
- fz_write_printf(ctx, out, "</%s>\n", tag);
- }
- static void
- fz_print_struct_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
- {
- const char *tag;
- if (block->u.s.down == NULL)
- return;
- if (block->u.s.down->standard == FZ_STRUCTURE_TABLE)
- {
- fz_print_stext_table_as_xhtml(ctx, out, block->u.s.down->first_block);
- return;
- }
- tag = html_tag_for_struct(block->u.s.down);
- fz_write_printf(ctx, out, "<%s>\n", tag);
- run_to_xhtml(ctx, block->u.s.down->first_block, out);
- fz_write_printf(ctx, out, "</%s>\n", tag);
- }
- static void
- run_to_xhtml(fz_context *ctx, fz_stext_block *block, fz_output *out)
- {
- while (block)
- {
- switch(block->type)
- {
- case FZ_STEXT_BLOCK_IMAGE:
- fz_print_stext_image_as_xhtml(ctx, out, block);
- break;
- case FZ_STEXT_BLOCK_TEXT:
- fz_print_stext_block_as_xhtml(ctx, out, block);
- break;
- case FZ_STEXT_BLOCK_STRUCT:
- fz_print_struct_as_xhtml(ctx, out, block);
- break;
- }
- block = block->next;
- }
- }
- void
- fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
- {
- fz_write_printf(ctx, out, "<div id=\"page%d\">\n", id);
- run_to_xhtml(ctx, page->first_block, out);
- fz_write_string(ctx, out, "</div>\n");
- }
- void
- fz_print_stext_header_as_xhtml(fz_context *ctx, fz_output *out)
- {
- fz_write_string(ctx, out, "<?xml version=\"1.0\"?>\n");
- fz_write_string(ctx, out, "<!DOCTYPE html");
- fz_write_string(ctx, out, " PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"");
- fz_write_string(ctx, out, " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n");
- fz_write_string(ctx, out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
- fz_write_string(ctx, out, "<head>\n");
- fz_write_string(ctx, out, "<style>\n");
- fz_write_string(ctx, out, "p{white-space:pre-wrap}\n");
- fz_write_string(ctx, out, "</style>\n");
- fz_write_string(ctx, out, "</head>\n");
- fz_write_string(ctx, out, "<body>\n");
- }
- void
- fz_print_stext_trailer_as_xhtml(fz_context *ctx, fz_output *out)
- {
- fz_write_string(ctx, out, "</body>\n");
- fz_write_string(ctx, out, "</html>\n");
- }
- /* Detailed XML dump of the entire structured text data */
- static void
- xml_write_char(fz_context *ctx, fz_output *out, int c)
- {
- switch (c)
- {
- case '<': fz_write_string(ctx, out, "<"); break;
- case '>': fz_write_string(ctx, out, ">"); break;
- case '&': fz_write_string(ctx, out, "&"); break;
- case '"': fz_write_string(ctx, out, """); break;
- case '\'': fz_write_string(ctx, out, "'"); break;
- default:
- if (c >= 32 && c <= 127)
- fz_write_printf(ctx, out, "%c", c);
- else
- fz_write_printf(ctx, out, "&#x%x;", c);
- break;
- }
- }
- static void
- as_xml(fz_context *ctx, fz_stext_block *block, fz_output *out)
- {
- fz_stext_line *line;
- fz_stext_char *ch;
- int i;
- while (block)
- {
- switch (block->type)
- {
- case FZ_STEXT_BLOCK_TEXT:
- fz_write_printf(ctx, out, "<block bbox=\"%g %g %g %g\"",
- block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
- if (block->u.t.flags == FZ_STEXT_TEXT_JUSTIFY_UNKNOWN)
- fz_write_printf(ctx, out, " justify=\"unknown\"");
- if (block->u.t.flags == FZ_STEXT_TEXT_JUSTIFY_LEFT)
- fz_write_printf(ctx, out, " justify=\"left\"");
- if (block->u.t.flags == FZ_STEXT_TEXT_JUSTIFY_CENTRE)
- fz_write_printf(ctx, out, " justify=\"centre\"");
- if (block->u.t.flags == FZ_STEXT_TEXT_JUSTIFY_RIGHT)
- fz_write_printf(ctx, out, " justify=\"right\"");
- if (block->u.t.flags == FZ_STEXT_TEXT_JUSTIFY_FULL)
- fz_write_printf(ctx, out, " justify=\"full\"");
- fz_write_printf(ctx, out, ">\n");
- for (line = block->u.t.first_line; line; line = line->next)
- {
- fz_font *font = NULL;
- float size = 0;
- const char *name = NULL;
- fz_write_printf(ctx, out, "<line bbox=\"%g %g %g %g\" wmode=\"%d\" dir=\"%g %g\"",
- line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1,
- line->wmode,
- line->dir.x, line->dir.y);
- /* This is duplication of information, but it makes it MUCH easier to search for
- * text fragments in large output. */
- {
- int valid = 1;
- fz_write_printf(ctx, out, " text=\"");
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (valid)
- valid = fz_is_valid_xml_char(ch->c);
- xml_write_char(ctx, out, fz_range_limit_xml_char(ch->c));
- }
- if (!valid)
- {
- fz_write_printf(ctx, out, "\" hextext=\"");
- for (ch = line->first_char; ch; ch = ch->next)
- {
- char text[8];
- int n = fz_runetochar(text, ch->c);
- for (i = 0; i < n; i++)
- fz_write_printf(ctx, out, "%02x", text[i]);
- }
- }
- fz_write_printf(ctx, out, "\"");
- }
- fz_write_printf(ctx, out, ">\n");
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (ch->font != font || ch->size != size)
- {
- const char *s;
- if (font)
- fz_write_string(ctx, out, "</font>\n");
- font = ch->font;
- size = ch->size;
- s = name = font_full_name(ctx, font);
- while (*s)
- {
- int c = *s++;
- if (c < 32 || c >= 127)
- break;
- }
- if (*s)
- fz_write_printf(ctx, out, "<font hexname=%>", name);
- else
- fz_write_printf(ctx, out, "<font name=\"%s\"", name);
- fz_write_printf(ctx, out, " size=\"%g\">\n", size);
- }
- fz_write_printf(ctx, out, "<char quad=\"%g %g %g %g %g %g %g %g\" x=\"%g\" y=\"%g\" bidi=\"%d\" color=\"#%06x\" alpha=\"#%02x\" flags=\"%d\" c=\"",
- ch->quad.ul.x, ch->quad.ul.y,
- ch->quad.ur.x, ch->quad.ur.y,
- ch->quad.ll.x, ch->quad.ll.y,
- ch->quad.lr.x, ch->quad.lr.y,
- ch->origin.x, ch->origin.y,
- ch->bidi,
- ch->argb & 0xFFFFFF,
- ch->argb>>24,
- ch->flags);
- xml_write_char(ctx, out, ch->c);
- if (!fz_is_valid_xml_char(ch->c))
- {
- char text[8];
- int n = fz_runetochar(text, ch->c);
- fz_write_string(ctx, out, "\" hexc=\"");
- for (i = 0; i < n; i++)
- fz_write_printf(ctx, out, "%02x", text[i]);
- }
- fz_write_string(ctx, out, "\"/>\n");
- }
- if (font)
- fz_write_string(ctx, out, "</font>\n");
- fz_write_string(ctx, out, "</line>\n");
- }
- fz_write_string(ctx, out, "</block>\n");
- break;
- case FZ_STEXT_BLOCK_IMAGE:
- fz_write_printf(ctx, out, "<image bbox=\"%g %g %g %g\" />\n",
- block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
- break;
- case FZ_STEXT_BLOCK_STRUCT:
- fz_write_printf(ctx, out, "<struct idx=\"%d\" bbox=\"%g %g %g %g\"", block->u.s.index,
- block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
- if (block->u.s.down)
- fz_write_printf(ctx, out, " raw=\"%s\" std=\"%s\"",
- block->u.s.down->raw, fz_structure_to_string(block->u.s.down->standard));
- fz_write_printf(ctx, out, ">\n");
- if (block->u.s.down)
- as_xml(ctx, block->u.s.down->first_block, out);
- fz_write_printf(ctx, out, "</struct>\n");
- break;
- case FZ_STEXT_BLOCK_VECTOR:
- fz_write_printf(ctx, out, "<vector bbox=\"%g %g %g %g\" stroke=\"%d\" rectangle=\"%d\" continues=\"%d\" argb=\"%08x\"/>\n",
- block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1,
- !!(block->u.v.flags & FZ_STEXT_VECTOR_IS_STROKED),
- !!(block->u.v.flags & FZ_STEXT_VECTOR_IS_RECTANGLE),
- !!(block->u.v.flags & FZ_STEXT_VECTOR_CONTINUES),
- block->u.v.argb);
- break;
- case FZ_STEXT_BLOCK_GRID:
- fz_write_printf(ctx, out, "<grid xpos=\"");
- for (i = 0; i < block->u.b.xs->len; i++)
- fz_write_printf(ctx, out, "%g ", block->u.b.xs->list[i].pos);
- fz_write_printf(ctx, out, "\" xuncertainty=\"");
- for (i = 0; i < block->u.b.xs->len; i++)
- fz_write_printf(ctx, out, "%d ", block->u.b.xs->list[i].uncertainty);
- fz_write_printf(ctx, out, "\" xmaxuncertainty=\"%d\" ypos=\"", block->u.b.xs->max_uncertainty);
- for (i = 0; i < block->u.b.ys->len; i++)
- fz_write_printf(ctx, out, "%g ", block->u.b.ys->list[i].pos);
- fz_write_printf(ctx, out, "\" yuncertainty=\"");
- for (i = 0; i < block->u.b.ys->len; i++)
- fz_write_printf(ctx, out, "%d ", block->u.b.ys->list[i].uncertainty);
- fz_write_printf(ctx, out, "\" ymaxuncertainty=\"%d\" />\n", block->u.b.ys->max_uncertainty);
- break;
- }
- block = block->next;
- }
- }
- void
- fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
- {
- fz_write_printf(ctx, out, "<page id=\"page%d\" width=\"%g\" height=\"%g\">\n", id,
- page->mediabox.x1 - page->mediabox.x0,
- page->mediabox.y1 - page->mediabox.y0);
- as_xml(ctx, page->first_block, out);
- fz_write_string(ctx, out, "</page>\n");
- }
- /* JSON dump */
- static void
- as_json(fz_context *ctx, fz_stext_block *block, fz_output *out, float scale)
- {
- fz_stext_line *line;
- fz_stext_char *ch;
- int comma = 0;
- while (block)
- {
- if (comma)
- fz_write_string(ctx, out, ",");
- comma = 1;
- switch (block->type)
- {
- case FZ_STEXT_BLOCK_TEXT:
- fz_write_printf(ctx, out, "{%q:%q,", "type", "text");
- fz_write_printf(ctx, out, "%q:{", "bbox");
- fz_write_printf(ctx, out, "%q:%d,", "x", (int)(block->bbox.x0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "y", (int)(block->bbox.y0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "w", (int)((block->bbox.x1 - block->bbox.x0) * scale));
- fz_write_printf(ctx, out, "%q:%d},", "h", (int)((block->bbox.y1 - block->bbox.y0) * scale));
- fz_write_printf(ctx, out, "%q:[", "lines");
- for (line = block->u.t.first_line; line; line = line->next)
- {
- if (line != block->u.t.first_line)
- fz_write_string(ctx, out, ",");
- fz_write_printf(ctx, out, "{%q:%d,", "wmode", line->wmode);
- fz_write_printf(ctx, out, "%q:{", "bbox");
- fz_write_printf(ctx, out, "%q:%d,", "x", (int)(line->bbox.x0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "y", (int)(line->bbox.y0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "w", (int)((line->bbox.x1 - line->bbox.x0) * scale));
- fz_write_printf(ctx, out, "%q:%d},", "h", (int)((line->bbox.y1 - line->bbox.y0) * scale));
- /* Since we force preserve-spans, the first char has the style for the entire line. */
- if (line->first_char)
- {
- fz_font *font = line->first_char->font;
- char *font_family = "sans-serif";
- char *font_weight = "normal";
- char *font_style = "normal";
- if (fz_font_is_monospaced(ctx, font)) font_family = "monospace";
- else if (fz_font_is_serif(ctx, font)) font_family = "serif";
- if (fz_font_is_bold(ctx, font)) font_weight = "bold";
- if (fz_font_is_italic(ctx, font)) font_style = "italic";
- fz_write_printf(ctx, out, "%q:{", "font");
- fz_write_printf(ctx, out, "%q:%q,", "name", fz_font_name(ctx, font));
- fz_write_printf(ctx, out, "%q:%q,", "family", font_family);
- fz_write_printf(ctx, out, "%q:%q,", "weight", font_weight);
- fz_write_printf(ctx, out, "%q:%q,", "style", font_style);
- fz_write_printf(ctx, out, "%q:%d},", "size", (int)(line->first_char->size * scale));
- fz_write_printf(ctx, out, "%q:%d,", "x", (int)(line->first_char->origin.x * scale));
- fz_write_printf(ctx, out, "%q:%d,", "y", (int)(line->first_char->origin.y * scale));
- }
- fz_write_printf(ctx, out, "%q:\"", "text");
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (ch->c == '"' || ch->c == '\\')
- fz_write_printf(ctx, out, "\\%c", ch->c);
- else if (ch->c < 32)
- fz_write_printf(ctx, out, "\\u%04x", ch->c);
- else
- fz_write_printf(ctx, out, "%C", ch->c);
- }
- fz_write_printf(ctx, out, "\"}");
- }
- fz_write_string(ctx, out, "]}");
- break;
- case FZ_STEXT_BLOCK_IMAGE:
- fz_write_printf(ctx, out, "{%q:%q,", "type", "image");
- fz_write_printf(ctx, out, "%q:{", "bbox");
- fz_write_printf(ctx, out, "%q:%d,", "x", (int)(block->bbox.x0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "y", (int)(block->bbox.y0 * scale));
- fz_write_printf(ctx, out, "%q:%d,", "w", (int)((block->bbox.x1 - block->bbox.x0) * scale));
- fz_write_printf(ctx, out, "%q:%d}}", "h", (int)((block->bbox.y1 - block->bbox.y0) * scale));
- break;
- case FZ_STEXT_BLOCK_STRUCT:
- fz_write_printf(ctx, out, "{%q:%q,", "type", "structure");
- fz_write_printf(ctx, out, "%q:%d", "index", block->u.s.index);
- if (block->u.s.down)
- {
- fz_write_printf(ctx, out, ",%q:%q", "raw", block->u.s.down->raw);
- fz_write_printf(ctx, out, ",%q:%q", "std", fz_structure_to_string(block->u.s.down->standard));
- fz_write_printf(ctx, out, ",%q:[", "contents");
- as_json(ctx, block->u.s.down->first_block, out, scale);
- fz_write_printf(ctx, out, "]");
- }
- fz_write_printf(ctx, out, "}");
- break;
- }
- block = block->next;
- }
- }
- void
- fz_print_stext_page_as_json(fz_context *ctx, fz_output *out, fz_stext_page *page, float scale)
- {
- fz_write_printf(ctx, out, "{%q:[", "blocks");
- as_json(ctx, page->first_block, out, scale);
- fz_write_string(ctx, out, "]}");
- }
- /* Plain text */
- static void
- do_as_text(fz_context *ctx, fz_output *out, fz_stext_block *first_block)
- {
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_char *ch;
- char utf[10];
- int i, n;
- for (block = first_block; block; block = block->next)
- {
- switch (block->type)
- {
- case FZ_STEXT_BLOCK_TEXT:
- for (line = block->u.t.first_line; line; line = line->next)
- {
- for (ch = line->first_char; ch; ch = ch->next)
- {
- n = fz_runetochar(utf, ch->c);
- for (i = 0; i < n; i++)
- fz_write_byte(ctx, out, utf[i]);
- }
- fz_write_string(ctx, out, "\n");
- }
- fz_write_string(ctx, out, "\n");
- break;
- case FZ_STEXT_BLOCK_STRUCT:
- if (block->u.s.down != NULL)
- do_as_text(ctx, out, block->u.s.down->first_block);
- break;
- }
- }
- }
- void
- fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page)
- {
- do_as_text(ctx, out, page->first_block);
- }
- /* Text output writer */
- enum {
- FZ_FORMAT_TEXT,
- FZ_FORMAT_HTML,
- FZ_FORMAT_XHTML,
- FZ_FORMAT_STEXT_XML,
- FZ_FORMAT_STEXT_JSON,
- };
- typedef struct
- {
- fz_document_writer super;
- int format;
- int number;
- fz_stext_options opts;
- fz_stext_page *page;
- fz_output *out;
- } fz_text_writer;
- static fz_device *
- text_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
- {
- fz_text_writer *wri = (fz_text_writer*)wri_;
- float s = wri->opts.scale;
- if (wri->page)
- {
- fz_drop_stext_page(ctx, wri->page);
- wri->page = NULL;
- }
- wri->number++;
- wri->page = fz_new_stext_page(ctx, fz_transform_rect(mediabox, fz_scale(s, s)));
- return fz_new_stext_device(ctx, wri->page, &wri->opts);
- }
- static void
- text_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
- {
- fz_text_writer *wri = (fz_text_writer*)wri_;
- float s = wri->opts.scale;
- fz_scale_stext_page(ctx, wri->page, s);
- fz_try(ctx)
- {
- fz_close_device(ctx, dev);
- switch (wri->format)
- {
- default:
- case FZ_FORMAT_TEXT:
- fz_print_stext_page_as_text(ctx, wri->out, wri->page);
- break;
- case FZ_FORMAT_HTML:
- fz_print_stext_page_as_html(ctx, wri->out, wri->page, wri->number);
- break;
- case FZ_FORMAT_XHTML:
- fz_print_stext_page_as_xhtml(ctx, wri->out, wri->page, wri->number);
- break;
- case FZ_FORMAT_STEXT_XML:
- fz_print_stext_page_as_xml(ctx, wri->out, wri->page, wri->number);
- break;
- case FZ_FORMAT_STEXT_JSON:
- if (wri->number > 1)
- fz_write_string(ctx, wri->out, ",");
- fz_print_stext_page_as_json(ctx, wri->out, wri->page, 1);
- break;
- }
- }
- fz_always(ctx)
- {
- fz_drop_device(ctx, dev);
- fz_drop_stext_page(ctx, wri->page);
- wri->page = NULL;
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static void
- text_close_writer(fz_context *ctx, fz_document_writer *wri_)
- {
- fz_text_writer *wri = (fz_text_writer*)wri_;
- switch (wri->format)
- {
- case FZ_FORMAT_HTML:
- fz_print_stext_trailer_as_html(ctx, wri->out);
- break;
- case FZ_FORMAT_XHTML:
- fz_print_stext_trailer_as_xhtml(ctx, wri->out);
- break;
- case FZ_FORMAT_STEXT_XML:
- fz_write_string(ctx, wri->out, "</document>\n");
- break;
- case FZ_FORMAT_STEXT_JSON:
- fz_write_string(ctx, wri->out, "]\n");
- break;
- }
- fz_close_output(ctx, wri->out);
- }
- static void
- text_drop_writer(fz_context *ctx, fz_document_writer *wri_)
- {
- fz_text_writer *wri = (fz_text_writer*)wri_;
- fz_drop_stext_page(ctx, wri->page);
- fz_drop_output(ctx, wri->out);
- }
- fz_document_writer *
- fz_new_text_writer_with_output(fz_context *ctx, const char *format, fz_output *out, const char *options)
- {
- fz_text_writer *wri = NULL;
- fz_var(wri);
- fz_try(ctx)
- {
- wri = fz_new_derived_document_writer(ctx, fz_text_writer, text_begin_page, text_end_page, text_close_writer, text_drop_writer);
- fz_parse_stext_options(ctx, &wri->opts, options);
- wri->format = FZ_FORMAT_TEXT;
- if (!strcmp(format, "text"))
- wri->format = FZ_FORMAT_TEXT;
- else if (!strcmp(format, "html"))
- wri->format = FZ_FORMAT_HTML;
- else if (!strcmp(format, "xhtml"))
- wri->format = FZ_FORMAT_XHTML;
- else if (!strcmp(format, "stext"))
- wri->format = FZ_FORMAT_STEXT_XML;
- else if (!strcmp(format, "stext.xml"))
- wri->format = FZ_FORMAT_STEXT_XML;
- else if (!strcmp(format, "stext.json"))
- {
- wri->format = FZ_FORMAT_STEXT_JSON;
- wri->opts.flags |= FZ_STEXT_PRESERVE_SPANS;
- }
- wri->out = out;
- switch (wri->format)
- {
- case FZ_FORMAT_HTML:
- fz_print_stext_header_as_html(ctx, wri->out);
- break;
- case FZ_FORMAT_XHTML:
- fz_print_stext_header_as_xhtml(ctx, wri->out);
- break;
- case FZ_FORMAT_STEXT_XML:
- fz_write_string(ctx, wri->out, "<?xml version=\"1.0\"?>\n");
- fz_write_string(ctx, wri->out, "<document>\n");
- break;
- case FZ_FORMAT_STEXT_JSON:
- fz_write_string(ctx, wri->out, "[");
- break;
- }
- }
- fz_catch(ctx)
- {
- fz_drop_output(ctx, out);
- fz_free(ctx, wri);
- fz_rethrow(ctx);
- }
- return (fz_document_writer*)wri;
- }
- fz_document_writer *
- fz_new_text_writer(fz_context *ctx, const char *format, const char *path, const char *options)
- {
- fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.txt", 0);
- return fz_new_text_writer_with_output(ctx, format, out, options);
- }
|