xmltext-device.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. static int s_xml_starttag_begin(fz_context *ctx, fz_output *out, const char *id)
  24. {
  25. fz_write_printf(ctx, out, "<%s", id);
  26. return 0;
  27. }
  28. static int s_xml_starttag_end(fz_context *ctx, fz_output *out)
  29. {
  30. fz_write_printf(ctx, out, ">\n");
  31. return 0;
  32. }
  33. static int s_xml_starttag_empty_end(fz_context *ctx, fz_output *out)
  34. {
  35. fz_write_printf(ctx, out, "/>\n");
  36. return 0;
  37. }
  38. static int s_xml_endtag(fz_context *ctx, fz_output *out, const char *id)
  39. {
  40. fz_write_printf(ctx, out, "</%s>\n", id);
  41. return 0;
  42. }
  43. static int s_write_attribute_int(fz_context *ctx, fz_output *out, const char *id, int value)
  44. {
  45. fz_write_printf(ctx, out, " %s=\"%i\"", id, value);
  46. return 0;
  47. }
  48. static int s_write_attribute_size(fz_context *ctx, fz_output *out, const char *id, size_t value)
  49. {
  50. fz_write_printf(ctx, out, " %s=\"%zi\"", id, value);
  51. return 0;
  52. }
  53. static int s_write_attribute_float(fz_context *ctx, fz_output *out, const char *id, float value)
  54. {
  55. fz_write_printf(ctx, out, " %s=\"%g\"", id, value);
  56. return 0;
  57. }
  58. static int s_write_attribute_string(fz_context *ctx, fz_output *out, const char *id, const char *value)
  59. {
  60. fz_write_printf(ctx, out, " %s=\"%s\"", id, value);
  61. return 0;
  62. }
  63. static int s_write_attribute_char(fz_context *ctx, fz_output *out, const char *id, char value)
  64. {
  65. if (value == '"') fz_write_printf(ctx, out, " %s=\"\\%c\"", id, value);
  66. else fz_write_printf(ctx, out, " %s=\"%c\"", id, value);
  67. return 0;
  68. }
  69. static int s_write_attribute_matrix(fz_context *ctx, fz_output *out, const char *id, const fz_matrix *matrix)
  70. {
  71. fz_write_printf(ctx, out,
  72. " %s=\"%g %g %g %g %g %g\"",
  73. id,
  74. matrix->a,
  75. matrix->b,
  76. matrix->c,
  77. matrix->d,
  78. matrix->e,
  79. matrix->f
  80. );
  81. return 0;
  82. }
  83. typedef struct
  84. {
  85. fz_device super;
  86. fz_output *out;
  87. } fz_xmltext_device;
  88. static void
  89. fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
  90. fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
  91. {
  92. fz_xmltext_device *dev = (fz_xmltext_device*) dev_;
  93. fz_text_span *span;
  94. for (span = text->head; span; span = span->next)
  95. {
  96. int i;
  97. s_xml_starttag_begin(ctx, dev->out, "span");
  98. s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm);
  99. s_write_attribute_string(ctx, dev->out, "font_name", span->font->name);
  100. if (span->font->flags.is_mono) s_write_attribute_int(ctx, dev->out, "is_mono", 1);
  101. if (span->font->flags.is_serif) s_write_attribute_int(ctx, dev->out, "is_serif", 1);
  102. if (span->font->flags.is_italic) s_write_attribute_int(ctx, dev->out, "is_italic", 1);
  103. if (span->font->flags.ft_substitute) s_write_attribute_int(ctx, dev->out, "ft_substitute", 1);
  104. if (span->font->flags.ft_stretch) s_write_attribute_int(ctx, dev->out, "ft_stretch", 1);
  105. if (span->font->flags.fake_bold) s_write_attribute_int(ctx, dev->out, "fake_bold", 1);
  106. if (span->font->flags.fake_italic) s_write_attribute_int(ctx, dev->out, "fake_italic", 1);
  107. if (span->font->flags.has_opentype) s_write_attribute_int(ctx, dev->out, "has_opentype", 1);
  108. if (span->font->flags.invalid_bbox) s_write_attribute_int(ctx, dev->out, "invalid_bbox", 1);
  109. s_write_attribute_matrix(ctx, dev->out, "trm", &span->trm);
  110. s_write_attribute_int(ctx, dev->out, "len", span->len);
  111. s_write_attribute_int(ctx, dev->out, "wmode", span->wmode);
  112. s_write_attribute_int(ctx, dev->out, "bidi_level", span->bidi_level);
  113. s_write_attribute_int(ctx, dev->out, "markup_dir", span->markup_dir);
  114. s_write_attribute_int(ctx, dev->out, "language", span->language);
  115. s_write_attribute_int(ctx, dev->out, "cap", span->cap);
  116. s_xml_starttag_end(ctx, dev->out);
  117. for (i=0; i<span->len; ++i)
  118. {
  119. fz_text_item *item = &span->items[i];
  120. s_xml_starttag_begin(ctx, dev->out, "char");
  121. s_write_attribute_float(ctx, dev->out, "x", item->x);
  122. s_write_attribute_float(ctx, dev->out, "y", item->y);
  123. s_write_attribute_int(ctx, dev->out, "gid", item->gid);
  124. s_write_attribute_int(ctx, dev->out, "ucs", item->ucs);
  125. /*
  126. * Firefox complains if we put special characters here; it's only for debugging
  127. * so this isn't really a problem.
  128. */
  129. s_write_attribute_char(ctx, dev->out, "debug_char",
  130. (item->ucs >= 32 && item->ucs < 128 && item->ucs != '"')
  131. ? item->ucs : ' '
  132. );
  133. s_write_attribute_float(ctx, dev->out, "adv", span->items[i].adv);
  134. s_xml_starttag_empty_end(ctx, dev->out);
  135. }
  136. s_xml_endtag(ctx, dev->out, "span");
  137. }
  138. }
  139. static void
  140. fz_xmltext_fill_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm,
  141. fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
  142. {
  143. fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
  144. }
  145. static void
  146. fz_xmltext_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
  147. fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
  148. {
  149. fz_xmltext_text(ctx, dev_, text, ctm, colorspace, color, alpha, color_params);
  150. }
  151. static void
  152. fz_xmltext_clip_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm, fz_rect scissor)
  153. {
  154. fz_xmltext_text(ctx, dev_, text, ctm, NULL, NULL, 0 /*alpha*/, fz_default_color_params);
  155. }
  156. static void
  157. fz_xmltext_clip_stroke_text(fz_context *ctx, fz_device *dev_, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
  158. {
  159. fz_xmltext_text(ctx, dev_, text, ctm, NULL, 0, 0, fz_default_color_params);
  160. }
  161. static void
  162. fz_xmltext_ignore_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix ctm)
  163. {
  164. }
  165. static void
  166. fz_stext_close_device(fz_context *ctx, fz_device *dev_)
  167. {
  168. }
  169. static void fz_xmltext_fill_image(fz_context *ctx, fz_device *dev_, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
  170. {
  171. fz_xmltext_device *dev = (fz_xmltext_device*) dev_;
  172. fz_pixmap *pixmap = NULL;
  173. fz_try(ctx)
  174. {
  175. const char *type = NULL;
  176. fz_compressed_buffer *compressed;
  177. s_xml_starttag_begin(ctx, dev->out, "image");
  178. /* First try to write compressed data. */
  179. compressed = fz_compressed_image_buffer(ctx, img);
  180. if (compressed)
  181. {
  182. if (compressed->params.type == FZ_IMAGE_UNKNOWN)
  183. {
  184. /* unknown image type. */
  185. }
  186. else if (compressed->params.type == FZ_IMAGE_RAW)
  187. {
  188. type = "raw";
  189. s_write_attribute_string(ctx, dev->out, "type", type);
  190. }
  191. else if (compressed->params.type == FZ_IMAGE_FAX)
  192. {
  193. type = "fax";
  194. s_write_attribute_string(ctx, dev->out, "type", type);
  195. s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.fax.columns);
  196. s_write_attribute_int(ctx, dev->out, "rows", compressed->params.u.fax.rows);
  197. s_write_attribute_int(ctx, dev->out, "k", compressed->params.u.fax.k);
  198. s_write_attribute_int(ctx, dev->out, "end_of_line", compressed->params.u.fax.end_of_line);
  199. s_write_attribute_int(ctx, dev->out, "encoded_byte_align", compressed->params.u.fax.encoded_byte_align);
  200. s_write_attribute_int(ctx, dev->out, "end_of_block", compressed->params.u.fax.end_of_block);
  201. s_write_attribute_int(ctx, dev->out, "black_is_1", compressed->params.u.fax.black_is_1);
  202. s_write_attribute_int(ctx, dev->out, "damaged_rows_before_error", compressed->params.u.fax.damaged_rows_before_error);
  203. }
  204. else if (compressed->params.type == FZ_IMAGE_FLATE)
  205. {
  206. type = "flate";
  207. s_write_attribute_string(ctx, dev->out, "type", type);
  208. s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.flate.columns);
  209. s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.flate.colors);
  210. s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.flate.predictor);
  211. s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.flate.bpc);
  212. }
  213. else if (compressed->params.type == FZ_IMAGE_BROTLI)
  214. {
  215. type = "brotli";
  216. s_write_attribute_string(ctx, dev->out, "type", type);
  217. s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.brotli.columns);
  218. s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.brotli.colors);
  219. s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.brotli.predictor);
  220. s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.brotli.bpc);
  221. }
  222. else if (compressed->params.type == FZ_IMAGE_LZW)
  223. {
  224. type = "lzw";
  225. s_write_attribute_string(ctx, dev->out, "type", type);
  226. s_write_attribute_int(ctx, dev->out, "columns", compressed->params.u.lzw.columns);
  227. s_write_attribute_int(ctx, dev->out, "colors", compressed->params.u.lzw.colors);
  228. s_write_attribute_int(ctx, dev->out, "predictor", compressed->params.u.lzw.predictor);
  229. s_write_attribute_int(ctx, dev->out, "bpc", compressed->params.u.lzw.bpc);
  230. s_write_attribute_int(ctx, dev->out, "early_change", compressed->params.u.lzw.early_change);
  231. }
  232. else if (compressed->params.type == FZ_IMAGE_BMP)
  233. {
  234. type = "bmp";
  235. s_write_attribute_string(ctx, dev->out, "type", type);
  236. }
  237. else if (compressed->params.type == FZ_IMAGE_GIF)
  238. {
  239. type = "gif";
  240. s_write_attribute_string(ctx, dev->out, "type", type);
  241. }
  242. else if (compressed->params.type == FZ_IMAGE_JBIG2)
  243. {
  244. type = "jbig2";
  245. s_write_attribute_string(ctx, dev->out, "type", type);
  246. /* do we need to write out *compressed->params.globals somehow? */
  247. }
  248. else if (compressed->params.type == FZ_IMAGE_JPEG)
  249. {
  250. type = "jpeg";
  251. s_write_attribute_string(ctx, dev->out, "type", type);
  252. s_write_attribute_int(ctx, dev->out, "color_transform", compressed->params.u.jpeg.color_transform);
  253. if (compressed->params.u.jpeg.invert_cmyk)
  254. s_write_attribute_int(ctx, dev->out, "invert_cmyk", 1);
  255. }
  256. else if (compressed->params.type == FZ_IMAGE_JPX)
  257. {
  258. type = "jpx";
  259. s_write_attribute_string(ctx, dev->out, "type", type);
  260. s_write_attribute_int(ctx, dev->out, "smask_in_data", compressed->params.u.jpx.smask_in_data);
  261. }
  262. else if (compressed->params.type == FZ_IMAGE_JXR)
  263. {
  264. type = "jxr";
  265. s_write_attribute_string(ctx, dev->out, "type", type);
  266. }
  267. else if (compressed->params.type == FZ_IMAGE_PNG)
  268. {
  269. type = "png";
  270. s_write_attribute_string(ctx, dev->out, "type", type);
  271. }
  272. else if (compressed->params.type == FZ_IMAGE_PNM)
  273. {
  274. type = "pnm";
  275. s_write_attribute_string(ctx, dev->out, "type", type);
  276. }
  277. else if (compressed->params.type == FZ_IMAGE_TIFF)
  278. {
  279. type = "tiff";
  280. s_write_attribute_string(ctx, dev->out, "type", type);
  281. }
  282. else
  283. {
  284. /* Unrecognised. */
  285. }
  286. if (type)
  287. {
  288. /* Write out raw data. */
  289. unsigned char *data;
  290. size_t datasize = fz_buffer_storage(ctx, compressed->buffer, &data);
  291. size_t i;
  292. s_write_attribute_size(ctx, dev->out, "datasize", datasize);
  293. s_xml_starttag_end(ctx, dev->out);
  294. for (i=0; i<datasize; ++i)
  295. {
  296. if (i % 32 == 0) fz_write_printf(ctx, dev->out, "\n ");
  297. if (i % 4 == 0) fz_write_printf(ctx, dev->out, " ");
  298. fz_write_printf(ctx, dev->out, "%02x", data[i]);
  299. }
  300. fz_write_printf(ctx, dev->out, "\n");
  301. }
  302. }
  303. if (!type)
  304. {
  305. /* Compressed data not available, so write out raw pixel values. */
  306. int l2factor = 0;
  307. int y;
  308. s_write_attribute_string(ctx, dev->out, "type", "pixmap");
  309. s_xml_starttag_end(ctx, dev->out);
  310. pixmap = img->get_pixmap(ctx, img, NULL /*subarea*/, img->w, img->h, &l2factor);
  311. s_write_attribute_int(ctx, dev->out, "x", pixmap->x);
  312. s_write_attribute_int(ctx, dev->out, "y", pixmap->y);
  313. s_write_attribute_int(ctx, dev->out, "w", pixmap->w);
  314. s_write_attribute_int(ctx, dev->out, "h", pixmap->h);
  315. s_write_attribute_int(ctx, dev->out, "n", pixmap->n);
  316. s_write_attribute_int(ctx, dev->out, "s", pixmap->s);
  317. s_write_attribute_int(ctx, dev->out, "alpha", pixmap->alpha);
  318. s_write_attribute_int(ctx, dev->out, "flags", pixmap->flags);
  319. s_write_attribute_int(ctx, dev->out, "xres", pixmap->xres);
  320. s_write_attribute_int(ctx, dev->out, "yres", pixmap->yres);
  321. s_write_attribute_matrix(ctx, dev->out, "ctm", &ctm);
  322. s_xml_starttag_end(ctx, dev->out);
  323. for (y=0; y<pixmap->h; ++y)
  324. {
  325. int x;
  326. s_xml_starttag_begin(ctx, dev->out, "line");
  327. s_write_attribute_int(ctx, dev->out, "y", y);
  328. s_xml_starttag_end(ctx, dev->out);
  329. for (x=0; x<pixmap->w; ++x)
  330. {
  331. int b;
  332. fz_write_printf(ctx, dev->out, " ");
  333. for (b=0; b<pixmap->n; ++b)
  334. {
  335. fz_write_printf(ctx, dev->out, "%02x", pixmap->samples[y*(size_t)pixmap->stride + x*(size_t)pixmap->n + b]);
  336. }
  337. }
  338. s_xml_endtag(ctx, dev->out, "line");
  339. }
  340. }
  341. s_xml_endtag(ctx, dev->out, "image");
  342. }
  343. fz_always(ctx)
  344. {
  345. fz_drop_pixmap(ctx, pixmap);
  346. }
  347. fz_catch(ctx)
  348. {
  349. fz_rethrow(ctx);
  350. }
  351. }
  352. fz_device *fz_new_xmltext_device(fz_context *ctx, fz_output *out)
  353. {
  354. fz_xmltext_device *dev = fz_new_derived_device(ctx, fz_xmltext_device);
  355. dev->super.close_device = fz_stext_close_device;
  356. dev->super.fill_text = fz_xmltext_fill_text;
  357. dev->super.stroke_text = fz_xmltext_stroke_text;
  358. dev->super.clip_text = fz_xmltext_clip_text;
  359. dev->super.clip_stroke_text = fz_xmltext_clip_stroke_text;
  360. dev->super.ignore_text = fz_xmltext_ignore_text;
  361. dev->super.fill_image = fz_xmltext_fill_image;
  362. dev->out = out;
  363. return (fz_device*)dev;
  364. }