| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #if FZ_ENABLE_DOCX_OUTPUT
- #include "glyphbox.h"
- #include "extract/extract.h"
- #include "extract/buffer.h"
- #include <assert.h>
- #include <errno.h>
- #include <string.h>
- typedef struct
- {
- fz_document_writer super;
- extract_alloc_t *alloc;
- /*
- * .ctx is needed for the callbacks we get from the Extract library, for
- * example s_realloc_fn(). Each of our main device callbacks sets .ctx on
- * entry, and resets back to NULL before returning.
- */
- fz_context *ctx;
- fz_output *output;
- extract_t *extract;
- int spacing;
- int rotation;
- int images;
- int mediabox_clip;
- fz_rect mediabox; /* As passed to writer_begin_page(). */
- char output_cache[1024];
- } fz_docx_writer;
- typedef struct
- {
- fz_device super;
- fz_docx_writer *writer;
- } fz_docx_device;
- static void dev_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
- fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
- {
- fz_docx_device *dev = (fz_docx_device*) dev_;
- fz_text_span *span;
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- for (span = text->head; span; span = span->next)
- {
- int i;
- fz_matrix combined, trm;
- fz_rect bbox;
- combined = fz_concat(span->trm, ctm);
- bbox = span->font->bbox;
- if (extract_span_begin(
- dev->writer->extract,
- span->font->name,
- span->font->flags.is_bold,
- span->font->flags.is_italic,
- span->wmode,
- combined.a,
- combined.b,
- combined.c,
- combined.d,
- bbox.x0,
- bbox.y0,
- bbox.x1,
- bbox.y1))
- {
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin span");
- }
- trm = span->trm;
- for (i=0; i<span->len; ++i)
- {
- fz_text_item *item = &span->items[i];
- float adv = 0;
- fz_rect bounds;
- trm.e = item->x;
- trm.f = item->y;
- combined = fz_concat(trm, ctm);
- if (dev->writer->mediabox_clip)
- if (fz_glyph_entirely_outside_box(ctx, &ctm, span, item, &dev->writer->mediabox))
- continue;
- if (span->items[i].gid >= 0)
- adv = span->items[i].adv;
- bounds = fz_bound_glyph(ctx, span->font, span->items[i].gid, combined);
- if (extract_add_char(dev->writer->extract, combined.e, combined.f, item->ucs, adv,
- bounds.x0, bounds.y0, bounds.x1, bounds.y1))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add char");
- }
- if (extract_span_end(dev->writer->extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end span");
- }
- }
- fz_always(ctx)
- {
- dev->writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void dev_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
- fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
- {
- dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
- }
- static void dev_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
- fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
- {
- dev_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
- }
- static void dev_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
- {
- dev_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params);
- }
- static void dev_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
- {
- dev_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params);
- }
- static void
- dev_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm)
- {
- }
- static void writer_image_free(void *handle, void *image_data)
- {
- fz_docx_writer *writer = handle;
- fz_free(writer->ctx, image_data);
- }
- static void dev_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
- {
- fz_docx_device *dev = (fz_docx_device*) dev_;
- const char *type = NULL;
- fz_compressed_buffer *compressed = fz_compressed_image_buffer(ctx, img);
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- if (compressed)
- {
- if (0) { /* For alignment */ }
- else if (compressed->params.type == FZ_IMAGE_RAW) type = "raw";
- else if (compressed->params.type == FZ_IMAGE_FAX) type = "fax";
- else if (compressed->params.type == FZ_IMAGE_FLATE) type = "flate";
- else if (compressed->params.type == FZ_IMAGE_LZW) type = "lzw";
- else if (compressed->params.type == FZ_IMAGE_BROTLI) type = "brotli";
- else if (compressed->params.type == FZ_IMAGE_BMP) type = "bmp";
- else if (compressed->params.type == FZ_IMAGE_GIF) type = "gif";
- else if (compressed->params.type == FZ_IMAGE_JBIG2) type = "jbig2";
- else if (compressed->params.type == FZ_IMAGE_JPEG) type = "jpeg";
- else if (compressed->params.type == FZ_IMAGE_JPX) type = "jpx";
- else if (compressed->params.type == FZ_IMAGE_JXR) type = "jxr";
- else if (compressed->params.type == FZ_IMAGE_PNG) type = "png";
- else if (compressed->params.type == FZ_IMAGE_PNM) type = "pnm";
- else if (compressed->params.type == FZ_IMAGE_TIFF) type = "tiff";
- if (type)
- {
- /* Write out raw data. */
- unsigned char *data;
- size_t datasize = fz_buffer_extract(ctx, compressed->buffer, &data);
- if (extract_add_image(
- dev->writer->extract,
- type,
- ctm.e /*x*/,
- ctm.f /*y*/,
- img->w /*w*/,
- img->h /*h*/,
- data,
- datasize,
- writer_image_free,
- dev->writer
- ))
- {
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to add image type=%s", type);
- }
- }
- else
- {
- /* We don't recognise this image type, so ignore. */
- }
- }
- else
- {
- /*
- * Compressed data not available, so we could write out
- * raw pixel values. But for now we ignore.
- */
- }
- }
- fz_always(ctx)
- {
- dev->writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- /*
- * Support for sending information to Extract when walking stroke/fill path
- * with fz_walk_path().
- */
- typedef struct
- {
- fz_path_walker walker;
- extract_t *extract;
- } walker_info_t;
- static void s_moveto(fz_context *ctx, void *arg, float x, float y)
- {
- extract_t* extract = arg;
- if (extract_moveto(extract, x, y))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
- }
- static void s_lineto(fz_context *ctx, void *arg, float x, float y)
- {
- extract_t* extract = arg;
- if (extract_lineto(extract, x, y))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_lineto() failed");
- }
- static void s_curveto(fz_context *ctx, void *arg, float x1, float y1,
- float x2, float y2, float x3, float y3)
- {
- /* We simply move to the end point of the curve so that subsequent
- (straight) lines will be handled correctly. */
- extract_t* extract = arg;
- if (extract_moveto(extract, x3, y3))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_moveto() failed");
- }
- static void s_closepath(fz_context *ctx, void *arg)
- {
- extract_t* extract = arg;
- if (extract_closepath(extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_closepath() failed");
- }
- /*
- * Calls extract_*() path functions on <path> using fz_walk_path() and the
- * above callbacks.
- */
- static void s_walk_path(fz_context *ctx, fz_docx_device *dev, extract_t *extract, const fz_path *path)
- {
- fz_path_walker walker;
- walker.moveto = s_moveto;
- walker.lineto = s_lineto;
- walker.curveto = s_curveto;
- walker.closepath = s_closepath;
- walker.quadto = NULL;
- walker.curvetov = NULL;
- walker.curvetoy = NULL;
- walker.rectto = NULL;
- assert(dev->writer->ctx == ctx);
- fz_walk_path(ctx, path, &walker, extract /*arg*/);
- }
- void dev_fill_path(fz_context *ctx, fz_device *dev_, const fz_path *path, int even_odd,
- fz_matrix matrix, fz_colorspace * colorspace, const float *color, float alpha,
- fz_color_params color_params)
- {
- fz_docx_device *dev = (fz_docx_device*) dev_;
- extract_t *extract = dev->writer->extract;
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- if (extract_fill_begin(
- extract,
- matrix.a,
- matrix.b,
- matrix.c,
- matrix.d,
- matrix.e,
- matrix.f,
- color[0]
- ))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin fill");
- s_walk_path(ctx, dev, extract, path);
- if (extract_fill_end(extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_fill_end() failed");
- }
- fz_always(ctx)
- {
- dev->writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- dev_stroke_path(fz_context *ctx, fz_device *dev_, const fz_path *path,
- const fz_stroke_state *stroke, fz_matrix in_ctm,
- fz_colorspace *colorspace_in, const float *color, float alpha,
- fz_color_params color_params)
- {
- fz_docx_device *dev = (fz_docx_device*) dev_;
- extract_t *extract = dev->writer->extract;
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- if (extract_stroke_begin(
- extract,
- in_ctm.a,
- in_ctm.b,
- in_ctm.c,
- in_ctm.d,
- in_ctm.e,
- in_ctm.f,
- stroke->linewidth,
- color[0]
- ))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin stroke");
- s_walk_path(ctx, dev, extract, path);
- if (extract_stroke_end(extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_stroke_end() failed");
- }
- fz_always(ctx)
- {
- dev->writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static extract_struct_t
- fz_struct_to_extract(fz_structure type)
- {
- switch (type)
- {
- default:
- return extract_struct_INVALID;
- case FZ_STRUCTURE_DOCUMENT:
- return extract_struct_DOCUMENT;
- case FZ_STRUCTURE_PART:
- return extract_struct_PART;
- case FZ_STRUCTURE_ART:
- return extract_struct_ART;
- case FZ_STRUCTURE_SECT:
- return extract_struct_SECT;
- case FZ_STRUCTURE_DIV:
- return extract_struct_DIV;
- case FZ_STRUCTURE_BLOCKQUOTE:
- return extract_struct_BLOCKQUOTE;
- case FZ_STRUCTURE_CAPTION:
- return extract_struct_CAPTION;
- case FZ_STRUCTURE_TOC:
- return extract_struct_TOC;
- case FZ_STRUCTURE_TOCI:
- return extract_struct_TOCI;
- case FZ_STRUCTURE_INDEX:
- return extract_struct_INDEX;
- case FZ_STRUCTURE_NONSTRUCT:
- return extract_struct_NONSTRUCT;
- case FZ_STRUCTURE_PRIVATE:
- return extract_struct_PRIVATE;
- /* Grouping elements (PDF 2.0 - Table 364) */
- case FZ_STRUCTURE_DOCUMENTFRAGMENT:
- return extract_struct_DOCUMENTFRAGMENT;
- /* Grouping elements (PDF 2.0 - Table 365) */
- case FZ_STRUCTURE_ASIDE:
- return extract_struct_ASIDE;
- /* Grouping elements (PDF 2.0 - Table 366) */
- case FZ_STRUCTURE_TITLE:
- return extract_struct_TITLE;
- case FZ_STRUCTURE_FENOTE:
- return extract_struct_FENOTE;
- /* Grouping elements (PDF 2.0 - Table 367) */
- case FZ_STRUCTURE_SUB:
- return extract_struct_SUB;
- /* Paragraphlike elements (PDF 1.7 - Table 10.21) */
- case FZ_STRUCTURE_P:
- return extract_struct_P;
- case FZ_STRUCTURE_H:
- return extract_struct_H;
- case FZ_STRUCTURE_H1:
- return extract_struct_H1;
- case FZ_STRUCTURE_H2:
- return extract_struct_H2;
- case FZ_STRUCTURE_H3:
- return extract_struct_H3;
- case FZ_STRUCTURE_H4:
- return extract_struct_H4;
- case FZ_STRUCTURE_H5:
- return extract_struct_H5;
- case FZ_STRUCTURE_H6:
- return extract_struct_H6;
- /* List elements (PDF 1.7 - Table 10.23) */
- case FZ_STRUCTURE_LIST:
- return extract_struct_LIST;
- case FZ_STRUCTURE_LISTITEM:
- return extract_struct_LISTITEM;
- case FZ_STRUCTURE_LABEL:
- return extract_struct_LABEL;
- case FZ_STRUCTURE_LISTBODY:
- return extract_struct_LISTBODY;
- /* Table elements (PDF 1.7 - Table 10.24) */
- case FZ_STRUCTURE_TABLE:
- return extract_struct_TABLE;
- case FZ_STRUCTURE_TR:
- return extract_struct_TR;
- case FZ_STRUCTURE_TH:
- return extract_struct_TH;
- case FZ_STRUCTURE_TD:
- return extract_struct_TD;
- case FZ_STRUCTURE_THEAD:
- return extract_struct_THEAD;
- case FZ_STRUCTURE_TBODY:
- return extract_struct_TBODY;
- case FZ_STRUCTURE_TFOOT:
- return extract_struct_TFOOT;
- /* Inline elements (PDF 1.7 - Table 10.25) */
- case FZ_STRUCTURE_SPAN:
- return extract_struct_SPAN;
- case FZ_STRUCTURE_QUOTE:
- return extract_struct_QUOTE;
- case FZ_STRUCTURE_NOTE:
- return extract_struct_NOTE;
- case FZ_STRUCTURE_REFERENCE:
- return extract_struct_REFERENCE;
- case FZ_STRUCTURE_BIBENTRY:
- return extract_struct_BIBENTRY;
- case FZ_STRUCTURE_CODE:
- return extract_struct_CODE;
- case FZ_STRUCTURE_LINK:
- return extract_struct_LINK;
- case FZ_STRUCTURE_ANNOT:
- return extract_struct_ANNOT;
- /* Inline elements (PDF 2.0 - Table 368) */
- case FZ_STRUCTURE_EM:
- return extract_struct_EM;
- case FZ_STRUCTURE_STRONG:
- return extract_struct_STRONG;
- /* Ruby inline element (PDF 1.7 - Table 10.26) */
- case FZ_STRUCTURE_RUBY:
- return extract_struct_RUBY;
- case FZ_STRUCTURE_RB:
- return extract_struct_RB;
- case FZ_STRUCTURE_RT:
- return extract_struct_RT;
- case FZ_STRUCTURE_RP:
- return extract_struct_RP;
- /* Warichu inline element (PDF 1.7 - Table 10.26) */
- case FZ_STRUCTURE_WARICHU:
- return extract_struct_WARICHU;
- case FZ_STRUCTURE_WT:
- return extract_struct_WT;
- case FZ_STRUCTURE_WP:
- return extract_struct_WP;
- /* Illustration elements (PDF 1.7 - Table 10.27) */
- case FZ_STRUCTURE_FIGURE:
- return extract_struct_FIGURE;
- case FZ_STRUCTURE_FORMULA:
- return extract_struct_FORMULA;
- case FZ_STRUCTURE_FORM:
- return extract_struct_FORM;
- /* Artifact structure type (PDF 2.0 - Table 375) */
- case FZ_STRUCTURE_ARTIFACT:
- return extract_struct_ARTIFACT;
- }
- }
- static void
- dev_begin_structure(fz_context *ctx, fz_device *dev_, fz_structure standard, const char *raw, int idx)
- {
- fz_docx_device *dev = (fz_docx_device *)dev_;
- extract_t *extract = dev->writer->extract;
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- if (extract_begin_struct(extract, fz_struct_to_extract(standard), idx, -1))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin struct");
- }
- fz_always(ctx)
- dev->writer->ctx = NULL;
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static void
- dev_end_structure(fz_context *ctx, fz_device *dev_)
- {
- fz_docx_device *dev = (fz_docx_device *)dev_;
- extract_t *extract = dev->writer->extract;
- assert(!dev->writer->ctx);
- dev->writer->ctx = ctx;
- fz_try(ctx)
- {
- if (extract_end_struct(extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end struct");
- }
- fz_always(ctx)
- dev->writer->ctx = NULL;
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static fz_device *writer_begin_page(fz_context *ctx, fz_document_writer *writer_, fz_rect mediabox)
- {
- fz_docx_writer *writer = (fz_docx_writer*) writer_;
- fz_docx_device *dev;
- assert(!writer->ctx);
- writer->ctx = ctx;
- writer->mediabox = mediabox;
- fz_var(dev);
- fz_try(ctx)
- {
- if (extract_page_begin(writer->extract, mediabox.x0, mediabox.y0, mediabox.x1, mediabox.y1))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to begin page");
- dev = fz_new_derived_device(ctx, fz_docx_device);
- dev->super.fill_text = dev_fill_text;
- dev->super.stroke_text = dev_stroke_text;
- dev->super.clip_text = dev_clip_text;
- dev->super.clip_stroke_text = dev_clip_stroke_text;
- dev->super.ignore_text = dev_ignore_text;
- dev->super.fill_image = dev_fill_image;
- dev->super.fill_path = dev_fill_path;
- dev->super.stroke_path = dev_stroke_path;
- dev->super.begin_structure = dev_begin_structure;
- dev->super.end_structure = dev_end_structure;
- dev->writer = writer;
- }
- fz_always(ctx)
- {
- writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return &dev->super;
- }
- static void writer_end_page(fz_context *ctx, fz_document_writer *writer_, fz_device *dev)
- {
- fz_docx_writer *writer = (fz_docx_writer*) writer_;
- assert(!writer->ctx);
- writer->ctx = ctx;
- fz_try(ctx)
- {
- fz_close_device(ctx, dev);
- if (extract_page_end(writer->extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to end page");
- if (extract_process(writer->extract, writer->spacing, writer->rotation, writer->images))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to process page");
- }
- fz_always(ctx)
- {
- writer->ctx = NULL;
- fz_drop_device(ctx, dev);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static int buffer_write(void *handle, const void *source, size_t numbytes, size_t *o_actual)
- /*
- * extract_buffer_t callback that calls fz_write_data(). <source> will be docx
- * archive data.
- */
- {
- int e = 0;
- fz_docx_writer *writer = handle;
- fz_var(e);
- fz_try(writer->ctx)
- {
- fz_write_data(writer->ctx, writer->output, source, numbytes);
- *o_actual = numbytes;
- }
- fz_catch(writer->ctx)
- {
- errno = EIO;
- e = -1;
- }
- return e;
- }
- static int buffer_cache(void *handle, void **o_cache, size_t *o_numbytes)
- /*
- * extract_buffer_t cache function. We simply return writer->output_cache.
- */
- {
- fz_docx_writer *writer = handle;
- *o_cache = writer->output_cache;
- *o_numbytes = sizeof(writer->output_cache);
- return 0;
- }
- static void writer_close(fz_context *ctx, fz_document_writer *writer_)
- {
- fz_docx_writer *writer = (fz_docx_writer*) writer_;
- extract_buffer_t *extract_buffer_output = NULL;
- fz_var(extract_buffer_output);
- fz_var(writer);
- assert(!writer->ctx);
- writer->ctx = ctx;
- fz_try(ctx)
- {
- /*
- * Write docx to writer->output. Need to create an
- * extract_buffer_t that writes to writer->output, for use by
- * extract_write().
- */
- if (extract_buffer_open(
- writer->alloc,
- writer,
- NULL /*fn_read*/,
- buffer_write,
- buffer_cache,
- NULL /*fn_close*/,
- &extract_buffer_output
- ))
- {
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_buffer_output: %s", strerror(errno));
- }
- if (extract_write(writer->extract, extract_buffer_output))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to generate docx content: %s", strerror(errno));
- if (extract_buffer_close(&extract_buffer_output))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to close extract_buffer: %s", strerror(errno));
- extract_end(&writer->extract);
- fz_close_output(ctx, writer->output);
- writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- /*
- * We don't call fz_close_output() because it can throw and in
- * this error case we can safely leave cleanup to our s_drop()
- * function's calls to fz_drop_output().
- */
- extract_buffer_close(&extract_buffer_output);
- extract_end(&writer->extract);
- writer->ctx = NULL;
- fz_rethrow(ctx);
- }
- }
- static void writer_drop(fz_context *ctx, fz_document_writer *writer_)
- {
- fz_docx_writer *writer = (fz_docx_writer*) writer_;
- fz_drop_output(ctx, writer->output);
- writer->output = NULL;
- assert(!writer->ctx);
- writer->ctx = ctx;
- extract_end(&writer->extract);
- extract_alloc_destroy(&writer->alloc);
- writer->ctx = NULL;
- }
- static int get_bool_option(fz_context *ctx, const char *options, const char *name, int default_)
- {
- const char *value;
- if (fz_has_option(ctx, options, name, &value))
- {
- if (fz_option_eq(value, "yes")) return 1;
- if (fz_option_eq(value, "no")) return 0;
- else fz_throw(ctx, FZ_ERROR_SYNTAX, "option '%s' should be yes or no in options='%s'", name, options);
- }
- else
- return default_;
- }
- static double get_double_option(fz_context *ctx, const char *options, const char *name, double default_)
- {
- const char *value;
- if (fz_has_option(ctx, options, name, &value))
- {
- double ret = atof(value);
- return ret;
- }
- else
- return default_;
- }
- static void *s_realloc_fn(void *state, void *prev, size_t size)
- {
- fz_docx_writer *writer = state;
- assert(writer);
- assert(writer->ctx);
- return fz_realloc_no_throw(writer->ctx, prev, size);
- }
- /* Will drop <out> if an error occurs. */
- static fz_document_writer *fz_new_docx_writer_internal(fz_context *ctx, fz_output *out,
- const char *options, extract_format_t format)
- {
- fz_docx_writer *writer = NULL;
- fz_var(writer);
- fz_try(ctx)
- {
- double space_guess = get_double_option(ctx, options, "space-guess", 0);
- writer = fz_new_derived_document_writer(
- ctx,
- fz_docx_writer,
- writer_begin_page,
- writer_end_page,
- writer_close,
- writer_drop
- );
- writer->ctx = ctx;
- writer->output = out;
- if (get_bool_option(ctx, options, "html", 0)) format = extract_format_HTML;
- if (get_bool_option(ctx, options, "text", 0)) format = extract_format_TEXT;
- if (get_bool_option(ctx, options, "json", 0)) format = extract_format_JSON;
- if (extract_alloc_create(s_realloc_fn, writer, &writer->alloc))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract_alloc instance");
- if (extract_begin(writer->alloc, format, &writer->extract))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to create extract instance");
- if (space_guess)
- extract_set_space_guess(writer->extract, space_guess);
- writer->spacing = get_bool_option(ctx, options, "spacing", 0);
- writer->rotation = get_bool_option(ctx, options, "rotation", 1);
- writer->images = get_bool_option(ctx, options, "images", 1);
- writer->mediabox_clip = get_bool_option(ctx, options, "mediabox-clip", 1);
- if (extract_set_layout_analysis(writer->extract, get_bool_option(ctx, options, "analyse", 0)))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_enable_analysis failed.");
- {
- const char* v;
- if (fz_has_option(ctx, options, "tables-csv-format", &v))
- {
- size_t len = strlen(v) + 1; /* Might include trailing options. */
- char* formatbuf = fz_malloc(ctx, len);
- fz_copy_option(ctx, v, formatbuf, len);
- fprintf(stderr, "tables-csv-format: %s\n", formatbuf);
- if (extract_tables_csv_format(writer->extract, formatbuf))
- {
- fz_free(ctx, formatbuf);
- fz_throw(ctx, FZ_ERROR_LIBRARY, "extract_tables_csv_format() failed.");
- }
- fz_free(ctx, formatbuf);
- }
- }
- writer->ctx = NULL;
- }
- fz_catch(ctx)
- {
- /* fz_drop_document_writer() drops its output so we only need to call
- fz_drop_output() if we failed before creating the writer. */
- if (writer)
- {
- writer->ctx = ctx;
- fz_drop_document_writer(ctx, &writer->super);
- writer->ctx = NULL;
- }
- else
- fz_drop_output(ctx, out);
- fz_rethrow(ctx);
- }
- return &writer->super;
- }
- fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
- {
- return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
- }
- fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
- {
- /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
- it always drops <out> if it fails. */
- fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
- return fz_new_docx_writer_internal(ctx, out, options, extract_format_DOCX);
- }
- #if FZ_ENABLE_ODT_OUTPUT
- fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
- {
- return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
- }
- fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
- {
- /* No need to drop <out> if fz_new_docx_writer_internal() throws, because
- it always drops <out> if it fails. */
- fz_output *out = fz_new_output_with_path(ctx, path, 0 /*append*/);
- return fz_new_docx_writer_internal(ctx, out, options, extract_format_ODT);
- }
- #else
- fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
- return NULL;
- }
- fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "ODT writer not enabled");
- return NULL;
- }
- #endif
- #else
- fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
- return NULL;
- }
- fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX/ODT writer not enabled");
- return NULL;
- }
- fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
- return NULL;
- }
- fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "DOCX writer not enabled");
- return NULL;
- }
- #endif
|