writer.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. /* Return non-null terminated pointers to key/value entries in comma separated
  25. * option string. A plain key has the default value 'yes'. Use strncmp to compare
  26. * key/value strings. */
  27. static const char *
  28. fz_get_option(fz_context *ctx, const char **key, const char **val, const char *opts)
  29. {
  30. if (!opts || *opts == 0)
  31. return NULL;
  32. if (*opts == ',')
  33. ++opts;
  34. *key = opts;
  35. while (*opts != 0 && *opts != ',' && *opts != '=')
  36. ++opts;
  37. if (*opts == '=')
  38. {
  39. *val = ++opts;
  40. while (*opts != 0 && *opts != ',')
  41. ++opts;
  42. }
  43. else
  44. {
  45. *val = "yes";
  46. }
  47. return opts;
  48. }
  49. int
  50. fz_has_option(fz_context *ctx, const char *opts, const char *key, const char **val)
  51. {
  52. const char *straw;
  53. size_t n = strlen(key);
  54. while ((opts = fz_get_option(ctx, &straw, val, opts)))
  55. if (!strncmp(straw, key, n) && (straw[n] == '=' || straw[n] == ',' || straw[n] == 0))
  56. return 1;
  57. return 0;
  58. }
  59. int
  60. fz_option_eq(const char *a, const char *b)
  61. {
  62. size_t n = strlen(b);
  63. return !strncmp(a, b, n) && (a[n] == ',' || a[n] == 0);
  64. }
  65. size_t
  66. fz_copy_option(fz_context *ctx, const char *val, char *dest, size_t maxlen)
  67. {
  68. const char *e = val;
  69. size_t len, len2;
  70. if (val == NULL) {
  71. if (maxlen)
  72. *dest = 0;
  73. return 0;
  74. }
  75. while (*e != ',' && *e != 0)
  76. e++;
  77. len = e-val;
  78. len2 = len+1; /* Allow for terminator */
  79. if (len > maxlen)
  80. len = maxlen;
  81. memcpy(dest, val, len);
  82. if (len < maxlen)
  83. memset(dest+len, 0, maxlen-len);
  84. return len2 >= maxlen ? len2 - maxlen : 0;
  85. }
  86. fz_document_writer *fz_new_document_writer_of_size(fz_context *ctx, size_t size, fz_document_writer_begin_page_fn *begin_page,
  87. fz_document_writer_end_page_fn *end_page, fz_document_writer_close_writer_fn *close, fz_document_writer_drop_writer_fn *drop)
  88. {
  89. fz_document_writer *wri = Memento_label(fz_calloc(ctx, 1, size), "fz_document_writer");
  90. wri->begin_page = begin_page;
  91. wri->end_page = end_page;
  92. wri->close_writer = close;
  93. wri->drop_writer = drop;
  94. return wri;
  95. }
  96. static void fz_save_pixmap_as_jpeg_default(fz_context *ctx, fz_pixmap *pixmap, const char *filename)
  97. {
  98. fz_save_pixmap_as_jpeg(ctx, pixmap, filename, 90);
  99. }
  100. fz_document_writer *fz_new_jpeg_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  101. {
  102. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.jpeg", 0, fz_save_pixmap_as_jpeg_default);
  103. }
  104. fz_document_writer *fz_new_png_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  105. {
  106. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.png", 0, fz_save_pixmap_as_png);
  107. }
  108. fz_document_writer *fz_new_pam_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  109. {
  110. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.pam", 0, fz_save_pixmap_as_pam);
  111. }
  112. fz_document_writer *fz_new_pnm_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  113. {
  114. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.pnm", 0, fz_save_pixmap_as_pnm);
  115. }
  116. fz_document_writer *fz_new_pgm_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  117. {
  118. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.pgm", 1, fz_save_pixmap_as_pnm);
  119. }
  120. fz_document_writer *fz_new_ppm_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  121. {
  122. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.ppm", 3, fz_save_pixmap_as_pnm);
  123. }
  124. fz_document_writer *fz_new_pbm_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  125. {
  126. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.pbm", 1, fz_save_pixmap_as_pbm);
  127. }
  128. fz_document_writer *fz_new_pkm_pixmap_writer(fz_context *ctx, const char *path, const char *options)
  129. {
  130. return fz_new_pixmap_writer(ctx, path, options, "out-%04d.pkm", 4, fz_save_pixmap_as_pkm);
  131. }
  132. static int is_extension(const char *a, const char *ext)
  133. {
  134. if (!a)
  135. return 0;
  136. if (a[0] == '.')
  137. ++a;
  138. return !fz_strcasecmp(a, ext);
  139. }
  140. static const char *prev_period(const char *start, const char *p)
  141. {
  142. while (--p > start)
  143. if (*p == '.')
  144. return p;
  145. return NULL;
  146. }
  147. fz_document_writer *
  148. fz_new_document_writer(fz_context *ctx, const char *path, const char *explicit_format, const char *options)
  149. {
  150. const char *format = explicit_format;
  151. if (!format)
  152. format = strrchr(path, '.');
  153. while (format)
  154. {
  155. #if FZ_ENABLE_OCR_OUTPUT
  156. if (is_extension(format, "ocr"))
  157. return fz_new_pdfocr_writer(ctx, path, options);
  158. #endif
  159. #if FZ_ENABLE_PDF
  160. if (is_extension(format, "pdf"))
  161. return fz_new_pdf_writer(ctx, path, options);
  162. #endif
  163. if (is_extension(format, "cbz"))
  164. return fz_new_cbz_writer(ctx, path, options);
  165. if (is_extension(format, "csv"))
  166. return fz_new_csv_writer(ctx, path, options);
  167. if (is_extension(format, "svg"))
  168. return fz_new_svg_writer(ctx, path, options);
  169. if (is_extension(format, "png"))
  170. return fz_new_png_pixmap_writer(ctx, path, options);
  171. if (is_extension(format, "pam"))
  172. return fz_new_pam_pixmap_writer(ctx, path, options);
  173. if (is_extension(format, "pnm"))
  174. return fz_new_pnm_pixmap_writer(ctx, path, options);
  175. if (is_extension(format, "pgm"))
  176. return fz_new_pgm_pixmap_writer(ctx, path, options);
  177. if (is_extension(format, "ppm"))
  178. return fz_new_ppm_pixmap_writer(ctx, path, options);
  179. if (is_extension(format, "pbm"))
  180. return fz_new_pbm_pixmap_writer(ctx, path, options);
  181. if (is_extension(format, "pkm"))
  182. return fz_new_pkm_pixmap_writer(ctx, path, options);
  183. if (is_extension(format, "jpeg") || is_extension(format, "jpg"))
  184. return fz_new_jpeg_pixmap_writer(ctx, path, options);
  185. if (is_extension(format, "pcl"))
  186. return fz_new_pcl_writer(ctx, path, options);
  187. if (is_extension(format, "pclm"))
  188. return fz_new_pclm_writer(ctx, path, options);
  189. if (is_extension(format, "ps"))
  190. return fz_new_ps_writer(ctx, path, options);
  191. if (is_extension(format, "pwg"))
  192. return fz_new_pwg_writer(ctx, path, options);
  193. if (is_extension(format, "txt") || is_extension(format, "text"))
  194. return fz_new_text_writer(ctx, "text", path, options);
  195. if (is_extension(format, "html"))
  196. return fz_new_text_writer(ctx, "html", path, options);
  197. if (is_extension(format, "xhtml"))
  198. return fz_new_text_writer(ctx, "xhtml", path, options);
  199. if (is_extension(format, "stext") || is_extension(format, "stext.xml"))
  200. return fz_new_text_writer(ctx, "stext.xml", path, options);
  201. if (is_extension(format, "stext.json"))
  202. return fz_new_text_writer(ctx, "stext.json", path, options);
  203. #if FZ_ENABLE_ODT_OUTPUT
  204. if (is_extension(format, "odt"))
  205. return fz_new_odt_writer(ctx, path, options);
  206. #endif
  207. #if FZ_ENABLE_DOCX_OUTPUT
  208. if (is_extension(format, "docx"))
  209. return fz_new_docx_writer(ctx, path, options);
  210. #endif
  211. if (format != explicit_format)
  212. format = prev_period(path, format);
  213. else
  214. format = NULL;
  215. }
  216. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot detect document format");
  217. }
  218. fz_document_writer *
  219. fz_new_document_writer_with_output(fz_context *ctx, fz_output *out, const char *format, const char *options)
  220. {
  221. #if FZ_ENABLE_OCR_OUTPUT
  222. if (is_extension(format, "ocr"))
  223. return fz_new_pdfocr_writer_with_output(ctx, out, options);
  224. #endif
  225. #if FZ_ENABLE_PDF
  226. if (is_extension(format, "pdf"))
  227. return fz_new_pdf_writer_with_output(ctx, out, options);
  228. #endif
  229. if (is_extension(format, "cbz"))
  230. return fz_new_cbz_writer_with_output(ctx, out, options);
  231. if (is_extension(format, "csv"))
  232. return fz_new_csv_writer_with_output(ctx, out, options);
  233. if (is_extension(format, "svg"))
  234. return fz_new_svg_writer_with_output(ctx, out, options);
  235. if (is_extension(format, "pcl"))
  236. return fz_new_pcl_writer_with_output(ctx, out, options);
  237. if (is_extension(format, "pclm"))
  238. return fz_new_pclm_writer_with_output(ctx, out, options);
  239. if (is_extension(format, "ps"))
  240. return fz_new_ps_writer_with_output(ctx, out, options);
  241. if (is_extension(format, "pwg"))
  242. return fz_new_pwg_writer_with_output(ctx, out, options);
  243. if (is_extension(format, "txt") || is_extension(format, "text"))
  244. return fz_new_text_writer_with_output(ctx, "text", out, options);
  245. if (is_extension(format, "html"))
  246. return fz_new_text_writer_with_output(ctx, "html", out, options);
  247. if (is_extension(format, "xhtml"))
  248. return fz_new_text_writer_with_output(ctx, "xhtml", out, options);
  249. if (is_extension(format, "stext") || is_extension(format, "stext.xml"))
  250. return fz_new_text_writer_with_output(ctx, "stext.xml", out, options);
  251. if (is_extension(format, "stext.json"))
  252. return fz_new_text_writer_with_output(ctx, "stext.json", out, options);
  253. #if FZ_ENABLE_ODT_OUTPUT
  254. if (is_extension(format, "odt"))
  255. return fz_new_odt_writer_with_output(ctx, out, options);
  256. #endif
  257. #if FZ_ENABLE_DOCX_OUTPUT
  258. if (is_extension(format, "docx"))
  259. return fz_new_docx_writer_with_output(ctx, out, options);
  260. #endif
  261. fz_throw(ctx, FZ_ERROR_ARGUMENT, "unknown output document format: %s", format);
  262. }
  263. fz_document_writer *
  264. fz_new_document_writer_with_buffer(fz_context *ctx, fz_buffer *buffer, const char *format, const char *options)
  265. {
  266. fz_document_writer *wri;
  267. fz_output *out = fz_new_output_with_buffer(ctx, buffer);
  268. fz_try(ctx) {
  269. wri = fz_new_document_writer_with_output(ctx, out, format, options);
  270. }
  271. fz_catch(ctx) {
  272. fz_drop_output(ctx, out);
  273. fz_rethrow(ctx);
  274. }
  275. return wri;
  276. }
  277. void
  278. fz_close_document_writer(fz_context *ctx, fz_document_writer *wri)
  279. {
  280. if (wri->close_writer)
  281. wri->close_writer(ctx, wri);
  282. wri->close_writer = NULL;
  283. }
  284. void
  285. fz_drop_document_writer(fz_context *ctx, fz_document_writer *wri)
  286. {
  287. if (!wri)
  288. return;
  289. if (wri->close_writer)
  290. fz_warn(ctx, "dropping unclosed document writer");
  291. if (wri->dev)
  292. fz_drop_device(ctx, wri->dev);
  293. if (wri->drop_writer)
  294. wri->drop_writer(ctx, wri);
  295. fz_free(ctx, wri);
  296. }
  297. fz_device *
  298. fz_begin_page(fz_context *ctx, fz_document_writer *wri, fz_rect mediabox)
  299. {
  300. if (!wri)
  301. return NULL;
  302. if (wri->dev)
  303. fz_throw(ctx, FZ_ERROR_ARGUMENT, "called begin page without ending the previous page");
  304. wri->dev = wri->begin_page(ctx, wri, mediabox);
  305. return wri->dev;
  306. }
  307. void
  308. fz_end_page(fz_context *ctx, fz_document_writer *wri)
  309. {
  310. fz_device *dev;
  311. if (!wri)
  312. return;
  313. dev = wri->dev;
  314. wri->dev = NULL;
  315. wri->end_page(ctx, wri, dev);
  316. }
  317. void
  318. fz_write_document(fz_context *ctx, fz_document_writer *wri, fz_document *doc)
  319. {
  320. int i, n;
  321. fz_page *page = NULL;
  322. fz_device *dev;
  323. fz_var(page);
  324. n = fz_count_pages(ctx, doc);
  325. fz_try(ctx)
  326. {
  327. for (i = 0; i < n; i++)
  328. {
  329. page = fz_load_page(ctx, doc, i);
  330. dev = fz_begin_page(ctx, wri, fz_bound_page(ctx, page));
  331. fz_run_page(ctx, page, dev, fz_identity, NULL);
  332. fz_drop_page(ctx, page);
  333. page = NULL;
  334. fz_end_page(ctx, wri);
  335. }
  336. }
  337. fz_catch(ctx)
  338. {
  339. fz_drop_page(ctx, page);
  340. fz_rethrow(ctx);
  341. }
  342. }