load-jpeg.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. // Copyright (C) 2004-2023 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <math.h>
  24. #include <stdio.h>
  25. #include <string.h>
  26. #include <limits.h>
  27. #include <jpeglib.h>
  28. #ifdef SHARE_JPEG
  29. #define JZ_CTX_FROM_CINFO(c) (fz_context *)((c)->client_data)
  30. static void fz_jpg_mem_init(j_common_ptr cinfo, fz_context *ctx)
  31. {
  32. cinfo->client_data = ctx;
  33. }
  34. #define fz_jpg_mem_term(cinfo)
  35. #else /* SHARE_JPEG */
  36. typedef void * backing_store_ptr;
  37. #include "jmemcust.h"
  38. #define JZ_CTX_FROM_CINFO(c) (fz_context *)(GET_CUST_MEM_DATA(c)->priv)
  39. static void *
  40. fz_jpg_mem_alloc(j_common_ptr cinfo, size_t size)
  41. {
  42. fz_context *ctx = JZ_CTX_FROM_CINFO(cinfo);
  43. return fz_malloc_no_throw(ctx, size);
  44. }
  45. static void
  46. fz_jpg_mem_free(j_common_ptr cinfo, void *object, size_t size)
  47. {
  48. fz_context *ctx = JZ_CTX_FROM_CINFO(cinfo);
  49. fz_free(ctx, object);
  50. }
  51. static void
  52. fz_jpg_mem_init(j_common_ptr cinfo, fz_context *ctx)
  53. {
  54. jpeg_cust_mem_data *custmptr;
  55. custmptr = fz_malloc_struct(ctx, jpeg_cust_mem_data);
  56. if (!jpeg_cust_mem_init(custmptr, (void *) ctx, NULL, NULL, NULL,
  57. fz_jpg_mem_alloc, fz_jpg_mem_free,
  58. fz_jpg_mem_alloc, fz_jpg_mem_free, NULL))
  59. {
  60. fz_free(ctx, custmptr);
  61. fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot initialize custom JPEG memory handler");
  62. }
  63. cinfo->client_data = custmptr;
  64. }
  65. static void
  66. fz_jpg_mem_term(j_common_ptr cinfo)
  67. {
  68. if (cinfo->client_data)
  69. {
  70. fz_context *ctx = JZ_CTX_FROM_CINFO(cinfo);
  71. fz_free(ctx, cinfo->client_data);
  72. cinfo->client_data = NULL;
  73. }
  74. }
  75. #endif /* SHARE_JPEG */
  76. static void output_message(j_common_ptr cinfo)
  77. {
  78. /* swallow message */
  79. }
  80. static void error_exit(j_common_ptr cinfo)
  81. {
  82. char msg[JMSG_LENGTH_MAX];
  83. fz_context *ctx = JZ_CTX_FROM_CINFO(cinfo);
  84. cinfo->err->format_message(cinfo, msg);
  85. fz_throw(ctx, FZ_ERROR_LIBRARY, "jpeg error: %s", msg);
  86. }
  87. static void init_source(j_decompress_ptr cinfo)
  88. {
  89. /* nothing to do */
  90. }
  91. static void term_source(j_decompress_ptr cinfo)
  92. {
  93. /* nothing to do */
  94. }
  95. static boolean fill_input_buffer(j_decompress_ptr cinfo)
  96. {
  97. static unsigned char eoi[2] = { 0xFF, JPEG_EOI };
  98. struct jpeg_source_mgr *src = cinfo->src;
  99. src->next_input_byte = eoi;
  100. src->bytes_in_buffer = 2;
  101. return 1;
  102. }
  103. static void skip_input_data(j_decompress_ptr cinfo, long num_bytes)
  104. {
  105. struct jpeg_source_mgr *src = cinfo->src;
  106. if (num_bytes > 0)
  107. {
  108. size_t skip = (size_t)num_bytes; /* size_t may be 64bit */
  109. if (skip > src->bytes_in_buffer)
  110. skip = (size_t)src->bytes_in_buffer;
  111. src->next_input_byte += skip;
  112. src->bytes_in_buffer -= skip;
  113. }
  114. }
  115. static inline int read_value(const unsigned char *data, int bytes, int is_big_endian)
  116. {
  117. int value = 0;
  118. if (!is_big_endian)
  119. data += bytes;
  120. for (; bytes > 0; bytes--)
  121. value = (value << 8) | (is_big_endian ? *data++ : *--data);
  122. return value;
  123. }
  124. enum {
  125. MAX_ICC_PARTS = 256
  126. };
  127. static fz_colorspace *extract_icc_profile(fz_context *ctx, jpeg_saved_marker_ptr init_marker, int output_components, fz_colorspace *colorspace)
  128. {
  129. #if FZ_ENABLE_ICC
  130. const char idseq[] = { 'I', 'C', 'C', '_', 'P', 'R', 'O', 'F', 'I', 'L', 'E', '\0'};
  131. jpeg_saved_marker_ptr marker = init_marker;
  132. fz_buffer *buf = NULL;
  133. fz_colorspace *icc;
  134. int part = 1;
  135. int parts = MAX_ICC_PARTS;
  136. const unsigned char *data;
  137. size_t size;
  138. fz_var(buf);
  139. if (init_marker == NULL)
  140. return colorspace;
  141. fz_try(ctx)
  142. {
  143. while (part < parts && marker != NULL)
  144. {
  145. for (marker = init_marker; marker != NULL; marker = marker->next)
  146. {
  147. if (marker->marker != JPEG_APP0 + 2)
  148. continue;
  149. if (marker->data_length < nelem(idseq) + 2)
  150. continue;
  151. if (memcmp(marker->data, idseq, nelem(idseq)))
  152. continue;
  153. if (marker->data[nelem(idseq)] != part)
  154. continue;
  155. if (parts == MAX_ICC_PARTS)
  156. parts = marker->data[nelem(idseq) + 1];
  157. else if (marker->data[nelem(idseq) + 1] != parts)
  158. fz_warn(ctx, "inconsistent number of icc profile chunks in jpeg");
  159. if (part > parts)
  160. {
  161. fz_warn(ctx, "skipping out of range icc profile chunk in jpeg");
  162. continue;
  163. }
  164. data = marker->data + 14;
  165. size = marker->data_length - 14;
  166. if (!buf)
  167. buf = fz_new_buffer_from_copied_data(ctx, data, size);
  168. else
  169. fz_append_data(ctx, buf, data, size);
  170. part++;
  171. break;
  172. }
  173. }
  174. if (buf)
  175. {
  176. icc = fz_new_icc_colorspace(ctx, fz_colorspace_type(ctx, colorspace), 0, NULL, buf);
  177. fz_drop_colorspace(ctx, colorspace);
  178. colorspace = icc;
  179. }
  180. }
  181. fz_always(ctx)
  182. fz_drop_buffer(ctx, buf);
  183. fz_catch(ctx)
  184. {
  185. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  186. fz_report_error(ctx);
  187. fz_warn(ctx, "ignoring embedded ICC profile in JPEG");
  188. }
  189. return colorspace;
  190. #else
  191. return colorspace;
  192. #endif
  193. }
  194. /* Returns true if <x> can be represented as an integer without overflow.
  195. *
  196. * We can't use comparisons such as 'return x < INT_MAX' because INT_MAX is
  197. * not safely convertible to float - it ends up as INT_MAX+1 so the comparison
  198. * doesn't do what we want.
  199. *
  200. * Instead we do a round-trip conversion and return true if this differs by
  201. * less than 1. This relies on high adjacent float values that differ by more
  202. * than 1, actually being exact integers, so the round-trip doesn't change the
  203. * value.
  204. */
  205. static int float_can_be_int(float x)
  206. {
  207. return fabsf(x - (float)(int) x) < 1;
  208. }
  209. static uint8_t exif_orientation_to_mupdf[9] = { 0, 1, 5, 3, 7, 6, 4, 8, 2 };
  210. static int extract_exif_resolution(jpeg_saved_marker_ptr marker,
  211. int *xres, int *yres, uint8_t *orientation)
  212. {
  213. int is_big_endian, orient;
  214. const unsigned char *data;
  215. unsigned int offset, ifd_len, res_type = 0;
  216. float x_res = 0, y_res = 0;
  217. if (!marker || marker->marker != JPEG_APP0 + 1 || marker->data_length < 14)
  218. return 0;
  219. data = (const unsigned char *)marker->data;
  220. if (read_value(data, 4, 1) != 0x45786966 /* Exif */ || read_value(data + 4, 2, 1) != 0x0000)
  221. return 0;
  222. if (read_value(data + 6, 4, 1) == 0x49492A00)
  223. is_big_endian = 0;
  224. else if (read_value(data + 6, 4, 1) == 0x4D4D002A)
  225. is_big_endian = 1;
  226. else
  227. return 0;
  228. offset = read_value(data + 10, 4, is_big_endian) + 6;
  229. if (offset < 14 || offset > marker->data_length - 2)
  230. return 0;
  231. ifd_len = read_value(data + offset, 2, is_big_endian);
  232. for (offset += 2; ifd_len > 0 && offset + 12 < marker->data_length; ifd_len--, offset += 12)
  233. {
  234. int tag = read_value(data + offset, 2, is_big_endian);
  235. int type = read_value(data + offset + 2, 2, is_big_endian);
  236. int count = read_value(data + offset + 4, 4, is_big_endian);
  237. unsigned int value_off = read_value(data + offset + 8, 4, is_big_endian) + 6;
  238. switch (tag)
  239. {
  240. case 0x112:
  241. if (type == 3 && count == 1) {
  242. orient = read_value(data + offset + 8, 2, is_big_endian);
  243. if (orient >= 1 && orient <= 8 && orientation)
  244. *orientation = exif_orientation_to_mupdf[orient];
  245. }
  246. break;
  247. case 0x11A:
  248. if (type == 5 && value_off > offset && value_off <= marker->data_length - 8)
  249. x_res = 1.0f * read_value(data + value_off, 4, is_big_endian) / read_value(data + value_off + 4, 4, is_big_endian);
  250. break;
  251. case 0x11B:
  252. if (type == 5 && value_off > offset && value_off <= marker->data_length - 8)
  253. y_res = 1.0f * read_value(data + value_off, 4, is_big_endian) / read_value(data + value_off + 4, 4, is_big_endian);
  254. break;
  255. case 0x128:
  256. if (type == 3 && count == 1)
  257. res_type = read_value(data + offset + 8, 2, is_big_endian);
  258. break;
  259. }
  260. }
  261. if (x_res <= 0 || !float_can_be_int(x_res) || y_res <= 0 || !float_can_be_int(y_res))
  262. return 0;
  263. if (res_type == 2)
  264. {
  265. *xres = (int)x_res;
  266. *yres = (int)y_res;
  267. }
  268. else if (res_type == 3)
  269. {
  270. *xres = (int)(x_res * 254 / 100);
  271. *yres = (int)(y_res * 254 / 100);
  272. }
  273. else
  274. {
  275. *xres = 0;
  276. *yres = 0;
  277. }
  278. return 1;
  279. }
  280. static int extract_app13_resolution(jpeg_saved_marker_ptr marker, int *xres, int *yres)
  281. {
  282. const unsigned char *data, *data_end;
  283. if (!marker || marker->marker != JPEG_APP0 + 13 || marker->data_length < 42 ||
  284. strcmp((const char *)marker->data, "Photoshop 3.0") != 0)
  285. {
  286. return 0;
  287. }
  288. data = (const unsigned char *)marker->data;
  289. data_end = data + marker->data_length;
  290. for (data += 14; data + 12 < data_end; ) {
  291. int data_size = -1;
  292. int tag = read_value(data + 4, 2, 1);
  293. int value_off = 11 + read_value(data + 6, 2, 1);
  294. if (value_off % 2 == 1)
  295. value_off++;
  296. if (read_value(data, 4, 1) == 0x3842494D /* 8BIM */ && value_off <= data_end - data)
  297. data_size = read_value(data + value_off - 4, 4, 1);
  298. if (data_size < 0 || data_size > data_end - data - value_off)
  299. return 0;
  300. if (tag == 0x3ED && data_size == 16)
  301. {
  302. *xres = read_value(data + value_off, 2, 1);
  303. *yres = read_value(data + value_off + 8, 2, 1);
  304. return 1;
  305. }
  306. if (data_size % 2 == 1)
  307. data_size++;
  308. data += value_off + data_size;
  309. }
  310. return 0;
  311. }
  312. static void invert_cmyk(unsigned char *p, int n)
  313. {
  314. int i;
  315. for (i = 0; i < n; ++i)
  316. p[i] = 255 - p[i];
  317. }
  318. fz_pixmap *
  319. fz_load_jpeg(fz_context *ctx, const unsigned char *rbuf, size_t rlen)
  320. {
  321. struct jpeg_decompress_struct cinfo;
  322. struct jpeg_error_mgr err;
  323. struct jpeg_source_mgr src;
  324. unsigned char *row[1], *sp, *dp;
  325. fz_colorspace *colorspace = NULL;
  326. unsigned int x;
  327. int k;
  328. size_t stride;
  329. fz_pixmap *image = NULL;
  330. fz_var(colorspace);
  331. fz_var(image);
  332. fz_var(row);
  333. row[0] = NULL;
  334. cinfo.mem = NULL;
  335. cinfo.global_state = 0;
  336. cinfo.err = jpeg_std_error(&err);
  337. err.output_message = output_message;
  338. err.error_exit = error_exit;
  339. cinfo.client_data = NULL;
  340. fz_jpg_mem_init((j_common_ptr)&cinfo, ctx);
  341. fz_try(ctx)
  342. {
  343. jpeg_create_decompress(&cinfo);
  344. cinfo.src = &src;
  345. src.init_source = init_source;
  346. src.fill_input_buffer = fill_input_buffer;
  347. src.skip_input_data = skip_input_data;
  348. src.resync_to_restart = jpeg_resync_to_restart;
  349. src.term_source = term_source;
  350. src.next_input_byte = rbuf;
  351. src.bytes_in_buffer = rlen;
  352. jpeg_save_markers(&cinfo, JPEG_APP0+1, 0xffff);
  353. jpeg_save_markers(&cinfo, JPEG_APP0+13, 0xffff);
  354. jpeg_save_markers(&cinfo, JPEG_APP0+2, 0xffff);
  355. jpeg_read_header(&cinfo, 1);
  356. jpeg_start_decompress(&cinfo);
  357. if (cinfo.output_components == 1)
  358. colorspace = fz_keep_colorspace(ctx, fz_device_gray(ctx));
  359. else if (cinfo.output_components == 3)
  360. colorspace = fz_keep_colorspace(ctx, fz_device_rgb(ctx));
  361. else if (cinfo.output_components == 4)
  362. colorspace = fz_keep_colorspace(ctx, fz_device_cmyk(ctx));
  363. colorspace = extract_icc_profile(ctx, cinfo.marker_list, cinfo.output_components, colorspace);
  364. if (!colorspace)
  365. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot determine colorspace");
  366. image = fz_new_pixmap(ctx, colorspace, cinfo.output_width, cinfo.output_height, NULL, 0);
  367. if (extract_exif_resolution(cinfo.marker_list, &image->xres, &image->yres, NULL))
  368. /* XPS prefers EXIF resolution to JFIF density */;
  369. else if (extract_app13_resolution(cinfo.marker_list, &image->xres, &image->yres))
  370. /* XPS prefers APP13 resolution to JFIF density */;
  371. else if (cinfo.density_unit == 1)
  372. {
  373. image->xres = cinfo.X_density;
  374. image->yres = cinfo.Y_density;
  375. }
  376. else if (cinfo.density_unit == 2)
  377. {
  378. image->xres = cinfo.X_density * 254 / 100;
  379. image->yres = cinfo.Y_density * 254 / 100;
  380. }
  381. if (image->xres <= 0) image->xres = 96;
  382. if (image->yres <= 0) image->yres = 96;
  383. fz_clear_pixmap(ctx, image);
  384. row[0] = fz_malloc(ctx, (size_t)cinfo.output_components * cinfo.output_width);
  385. dp = image->samples;
  386. stride = image->stride - image->w * (size_t)image->n;
  387. while (cinfo.output_scanline < cinfo.output_height)
  388. {
  389. jpeg_read_scanlines(&cinfo, row, 1);
  390. // Invert CMYK polarity for some CMYK images (see comment in filter-dct for details).
  391. if (cinfo.out_color_space == JCS_CMYK && cinfo.Adobe_transform == 2)
  392. invert_cmyk(row[0], image->stride);
  393. sp = row[0];
  394. for (x = 0; x < cinfo.output_width; x++)
  395. {
  396. for (k = 0; k < cinfo.output_components; k++)
  397. *dp++ = *sp++;
  398. }
  399. dp += stride;
  400. }
  401. }
  402. fz_always(ctx)
  403. {
  404. fz_drop_colorspace(ctx, colorspace);
  405. fz_free(ctx, row[0]);
  406. row[0] = NULL;
  407. /* We call jpeg_abort rather than the more usual
  408. * jpeg_finish_decompress here. This has the same effect,
  409. * but doesn't spew warnings if we didn't read enough data etc.
  410. * Annoyingly jpeg_abort can throw
  411. */
  412. fz_try(ctx)
  413. jpeg_abort((j_common_ptr)&cinfo);
  414. fz_catch(ctx)
  415. {
  416. /* Ignore any errors here */
  417. }
  418. jpeg_destroy_decompress(&cinfo);
  419. fz_jpg_mem_term((j_common_ptr)&cinfo);
  420. }
  421. fz_catch(ctx)
  422. {
  423. fz_drop_pixmap(ctx, image);
  424. fz_rethrow(ctx);
  425. }
  426. return image;
  427. }
  428. void
  429. fz_load_jpeg_info(fz_context *ctx, const unsigned char *rbuf, size_t rlen, int *xp, int *yp, int *xresp, int *yresp, fz_colorspace **cspacep, uint8_t *orientation)
  430. {
  431. struct jpeg_decompress_struct cinfo;
  432. struct jpeg_error_mgr err;
  433. struct jpeg_source_mgr src;
  434. fz_colorspace *icc = NULL;
  435. *cspacep = NULL;
  436. if (orientation)
  437. *orientation = 0;
  438. cinfo.mem = NULL;
  439. cinfo.global_state = 0;
  440. cinfo.err = jpeg_std_error(&err);
  441. err.error_exit = error_exit;
  442. cinfo.client_data = NULL;
  443. fz_jpg_mem_init((j_common_ptr)&cinfo, ctx);
  444. fz_try(ctx)
  445. {
  446. jpeg_create_decompress(&cinfo);
  447. cinfo.src = &src;
  448. src.init_source = init_source;
  449. src.fill_input_buffer = fill_input_buffer;
  450. src.skip_input_data = skip_input_data;
  451. src.resync_to_restart = jpeg_resync_to_restart;
  452. src.term_source = term_source;
  453. src.next_input_byte = rbuf;
  454. src.bytes_in_buffer = rlen;
  455. jpeg_save_markers(&cinfo, JPEG_APP0+1, 0xffff);
  456. jpeg_save_markers(&cinfo, JPEG_APP0+13, 0xffff);
  457. jpeg_save_markers(&cinfo, JPEG_APP0+2, 0xffff);
  458. jpeg_read_header(&cinfo, 1);
  459. *xp = cinfo.image_width;
  460. *yp = cinfo.image_height;
  461. if (cinfo.num_components == 1)
  462. *cspacep = fz_keep_colorspace(ctx, fz_device_gray(ctx));
  463. else if (cinfo.num_components == 3)
  464. *cspacep = fz_keep_colorspace(ctx, fz_device_rgb(ctx));
  465. else if (cinfo.num_components == 4)
  466. *cspacep = fz_keep_colorspace(ctx, fz_device_cmyk(ctx));
  467. *cspacep = extract_icc_profile(ctx, cinfo.marker_list, cinfo.num_components, *cspacep);
  468. if (!*cspacep)
  469. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot determine colorspace");
  470. if (extract_exif_resolution(cinfo.marker_list, xresp, yresp, orientation))
  471. /* XPS prefers EXIF resolution to JFIF density */;
  472. else if (extract_app13_resolution(cinfo.marker_list, xresp, yresp))
  473. /* XPS prefers APP13 resolution to JFIF density */;
  474. else if (cinfo.density_unit == 1)
  475. {
  476. *xresp = cinfo.X_density;
  477. *yresp = cinfo.Y_density;
  478. }
  479. else if (cinfo.density_unit == 2)
  480. {
  481. *xresp = cinfo.X_density * 254 / 100;
  482. *yresp = cinfo.Y_density * 254 / 100;
  483. }
  484. else
  485. {
  486. *xresp = 0;
  487. *yresp = 0;
  488. }
  489. if (*xresp <= 0) *xresp = 96;
  490. if (*yresp <= 0) *yresp = 96;
  491. }
  492. fz_always(ctx)
  493. {
  494. jpeg_destroy_decompress(&cinfo);
  495. fz_jpg_mem_term((j_common_ptr)&cinfo);
  496. }
  497. fz_catch(ctx)
  498. {
  499. fz_drop_colorspace(ctx, icc);
  500. fz_rethrow(ctx);
  501. }
  502. }