filter-fax.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <limits.h>
  25. /* Fax G3/G4 decoder */
  26. /* TODO: uncompressed */
  27. /*
  28. <raph> the first 2^(initialbits) entries map bit patterns to decodes
  29. <raph> let's say initial_bits is 8 for the sake of example
  30. <raph> and that the code is 1001
  31. <raph> that means that entries 0x90 .. 0x9f have the entry { val, 4 }
  32. <raph> because those are all the bytes that start with the code
  33. <raph> and the 4 is the length of the code
  34. ... if (n_bits > initial_bits) ...
  35. <raph> anyway, in that case, it basically points to a mini table
  36. <raph> the n_bits is the maximum length of all codes beginning with that byte
  37. <raph> so 2^(n_bits - initial_bits) is the size of the mini-table
  38. <raph> peter came up with this, and it makes sense
  39. */
  40. typedef struct
  41. {
  42. short val;
  43. short nbits;
  44. } cfd_node;
  45. enum
  46. {
  47. cfd_white_initial_bits = 8,
  48. cfd_black_initial_bits = 7,
  49. cfd_2d_initial_bits = 7,
  50. cfd_uncompressed_initial_bits = 6 /* must be 6 */
  51. };
  52. /* non-run codes in tables */
  53. enum
  54. {
  55. ERROR = -1,
  56. ZEROS = -2, /* EOL follows, possibly with more padding first */
  57. UNCOMPRESSED = -3
  58. };
  59. /* semantic codes for cf_2d_decode */
  60. enum
  61. {
  62. P = -4,
  63. H = -5,
  64. VR3 = 0,
  65. VR2 = 1,
  66. VR1 = 2,
  67. V0 = 3,
  68. VL1 = 4,
  69. VL2 = 5,
  70. VL3 = 6
  71. };
  72. /* White decoding table. */
  73. static const cfd_node cf_white_decode[] = {
  74. {256,12},{272,12},{29,8},{30,8},{45,8},{46,8},{22,7},{22,7},
  75. {23,7},{23,7},{47,8},{48,8},{13,6},{13,6},{13,6},{13,6},{20,7},
  76. {20,7},{33,8},{34,8},{35,8},{36,8},{37,8},{38,8},{19,7},{19,7},
  77. {31,8},{32,8},{1,6},{1,6},{1,6},{1,6},{12,6},{12,6},{12,6},{12,6},
  78. {53,8},{54,8},{26,7},{26,7},{39,8},{40,8},{41,8},{42,8},{43,8},
  79. {44,8},{21,7},{21,7},{28,7},{28,7},{61,8},{62,8},{63,8},{0,8},
  80. {320,8},{384,8},{10,5},{10,5},{10,5},{10,5},{10,5},{10,5},{10,5},
  81. {10,5},{11,5},{11,5},{11,5},{11,5},{11,5},{11,5},{11,5},{11,5},
  82. {27,7},{27,7},{59,8},{60,8},{288,9},{290,9},{18,7},{18,7},{24,7},
  83. {24,7},{49,8},{50,8},{51,8},{52,8},{25,7},{25,7},{55,8},{56,8},
  84. {57,8},{58,8},{192,6},{192,6},{192,6},{192,6},{1664,6},{1664,6},
  85. {1664,6},{1664,6},{448,8},{512,8},{292,9},{640,8},{576,8},{294,9},
  86. {296,9},{298,9},{300,9},{302,9},{256,7},{256,7},{2,4},{2,4},{2,4},
  87. {2,4},{2,4},{2,4},{2,4},{2,4},{2,4},{2,4},{2,4},{2,4},{2,4},{2,4},
  88. {2,4},{2,4},{3,4},{3,4},{3,4},{3,4},{3,4},{3,4},{3,4},{3,4},{3,4},
  89. {3,4},{3,4},{3,4},{3,4},{3,4},{3,4},{3,4},{128,5},{128,5},{128,5},
  90. {128,5},{128,5},{128,5},{128,5},{128,5},{8,5},{8,5},{8,5},{8,5},
  91. {8,5},{8,5},{8,5},{8,5},{9,5},{9,5},{9,5},{9,5},{9,5},{9,5},{9,5},
  92. {9,5},{16,6},{16,6},{16,6},{16,6},{17,6},{17,6},{17,6},{17,6},
  93. {4,4},{4,4},{4,4},{4,4},{4,4},{4,4},{4,4},{4,4},{4,4},{4,4},{4,4},
  94. {4,4},{4,4},{4,4},{4,4},{4,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},
  95. {5,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},
  96. {14,6},{14,6},{14,6},{14,6},{15,6},{15,6},{15,6},{15,6},{64,5},
  97. {64,5},{64,5},{64,5},{64,5},{64,5},{64,5},{64,5},{6,4},{6,4},
  98. {6,4},{6,4},{6,4},{6,4},{6,4},{6,4},{6,4},{6,4},{6,4},{6,4},{6,4},
  99. {6,4},{6,4},{6,4},{7,4},{7,4},{7,4},{7,4},{7,4},{7,4},{7,4},{7,4},
  100. {7,4},{7,4},{7,4},{7,4},{7,4},{7,4},{7,4},{7,4},{-2,3},{-2,3},
  101. {-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},
  102. {-1,0},{-1,0},{-1,0},{-1,0},{-3,4},{1792,3},{1792,3},{1984,4},
  103. {2048,4},{2112,4},{2176,4},{2240,4},{2304,4},{1856,3},{1856,3},
  104. {1920,3},{1920,3},{2368,4},{2432,4},{2496,4},{2560,4},{1472,1},
  105. {1536,1},{1600,1},{1728,1},{704,1},{768,1},{832,1},{896,1},
  106. {960,1},{1024,1},{1088,1},{1152,1},{1216,1},{1280,1},{1344,1},
  107. {1408,1}
  108. };
  109. /* Black decoding table. */
  110. static const cfd_node cf_black_decode[] = {
  111. {128,12},{160,13},{224,12},{256,12},{10,7},{11,7},{288,12},{12,7},
  112. {9,6},{9,6},{8,6},{8,6},{7,5},{7,5},{7,5},{7,5},{6,4},{6,4},{6,4},
  113. {6,4},{6,4},{6,4},{6,4},{6,4},{5,4},{5,4},{5,4},{5,4},{5,4},{5,4},
  114. {5,4},{5,4},{1,3},{1,3},{1,3},{1,3},{1,3},{1,3},{1,3},{1,3},{1,3},
  115. {1,3},{1,3},{1,3},{1,3},{1,3},{1,3},{1,3},{4,3},{4,3},{4,3},{4,3},
  116. {4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},
  117. {4,3},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},
  118. {3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},
  119. {3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},{3,2},
  120. {2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},
  121. {2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},
  122. {2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},{2,2},
  123. {-2,4},{-2,4},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},
  124. {-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-3,5},{1792,4},
  125. {1792,4},{1984,5},{2048,5},{2112,5},{2176,5},{2240,5},{2304,5},
  126. {1856,4},{1856,4},{1920,4},{1920,4},{2368,5},{2432,5},{2496,5},
  127. {2560,5},{18,3},{18,3},{18,3},{18,3},{18,3},{18,3},{18,3},{18,3},
  128. {52,5},{52,5},{640,6},{704,6},{768,6},{832,6},{55,5},{55,5},
  129. {56,5},{56,5},{1280,6},{1344,6},{1408,6},{1472,6},{59,5},{59,5},
  130. {60,5},{60,5},{1536,6},{1600,6},{24,4},{24,4},{24,4},{24,4},
  131. {25,4},{25,4},{25,4},{25,4},{1664,6},{1728,6},{320,5},{320,5},
  132. {384,5},{384,5},{448,5},{448,5},{512,6},{576,6},{53,5},{53,5},
  133. {54,5},{54,5},{896,6},{960,6},{1024,6},{1088,6},{1152,6},{1216,6},
  134. {64,3},{64,3},{64,3},{64,3},{64,3},{64,3},{64,3},{64,3},{13,1},
  135. {13,1},{13,1},{13,1},{13,1},{13,1},{13,1},{13,1},{13,1},{13,1},
  136. {13,1},{13,1},{13,1},{13,1},{13,1},{13,1},{23,4},{23,4},{50,5},
  137. {51,5},{44,5},{45,5},{46,5},{47,5},{57,5},{58,5},{61,5},{256,5},
  138. {16,3},{16,3},{16,3},{16,3},{17,3},{17,3},{17,3},{17,3},{48,5},
  139. {49,5},{62,5},{63,5},{30,5},{31,5},{32,5},{33,5},{40,5},{41,5},
  140. {22,4},{22,4},{14,1},{14,1},{14,1},{14,1},{14,1},{14,1},{14,1},
  141. {14,1},{14,1},{14,1},{14,1},{14,1},{14,1},{14,1},{14,1},{14,1},
  142. {15,2},{15,2},{15,2},{15,2},{15,2},{15,2},{15,2},{15,2},{128,5},
  143. {192,5},{26,5},{27,5},{28,5},{29,5},{19,4},{19,4},{20,4},{20,4},
  144. {34,5},{35,5},{36,5},{37,5},{38,5},{39,5},{21,4},{21,4},{42,5},
  145. {43,5},{0,3},{0,3},{0,3},{0,3}
  146. };
  147. /* 2-D decoding table. */
  148. static const cfd_node cf_2d_decode[] = {
  149. {128,11},{144,10},{6,7},{0,7},{5,6},{5,6},{1,6},{1,6},{-4,4},
  150. {-4,4},{-4,4},{-4,4},{-4,4},{-4,4},{-4,4},{-4,4},{-5,3},{-5,3},
  151. {-5,3},{-5,3},{-5,3},{-5,3},{-5,3},{-5,3},{-5,3},{-5,3},{-5,3},
  152. {-5,3},{-5,3},{-5,3},{-5,3},{-5,3},{4,3},{4,3},{4,3},{4,3},{4,3},
  153. {4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},{4,3},
  154. {2,3},{2,3},{2,3},{2,3},{2,3},{2,3},{2,3},{2,3},{2,3},{2,3},{2,3},
  155. {2,3},{2,3},{2,3},{2,3},{2,3},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  156. {3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  157. {3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  158. {3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  159. {3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  160. {3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},{3,1},
  161. {3,1},{3,1},{3,1},{-2,4},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},
  162. {-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},
  163. {-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-1,0},{-3,3}
  164. };
  165. /* bit magic */
  166. static inline int getbit(const unsigned char *buf, int x)
  167. {
  168. return ( buf[x >> 3] >> ( 7 - (x & 7) ) ) & 1;
  169. }
  170. static const unsigned char mask[8] = {
  171. 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0
  172. };
  173. static const unsigned char clz[256] = {
  174. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  175. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  176. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  177. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  178. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  179. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  180. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  181. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  182. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  183. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  184. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  185. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  186. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  187. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  188. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  189. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  190. };
  191. static inline int
  192. find_changing(const unsigned char *line, int x, int w)
  193. {
  194. int a, b, m, W;
  195. if (!line)
  196. return w;
  197. /* We assume w > 0, -1 <= x < w */
  198. if (x < 0)
  199. {
  200. x = 0;
  201. m = 0xFF;
  202. }
  203. else
  204. {
  205. /* Mask out the bits we've already used (including the one
  206. * we started from) */
  207. m = mask[x & 7];
  208. }
  209. /* We have 'w' pixels (bits) in line. The last pixel that can be
  210. * safely accessed is the (w-1)th bit of line.
  211. * By taking W = w>>3, we know that the first W bytes of line are
  212. * full, with w&7 stray bits following. */
  213. W = w>>3;
  214. x >>= 3;
  215. a = line[x]; /* Safe as x < w => x <= w-1 => x>>3 <= (w-1)>>3 */
  216. b = a ^ (a>>1);
  217. b &= m;
  218. if (x >= W)
  219. {
  220. /* Within the last byte already */
  221. x = (x<<3) + clz[b];
  222. if (x > w)
  223. x = w;
  224. return x;
  225. }
  226. while (b == 0)
  227. {
  228. if (++x >= W)
  229. goto nearend;
  230. b = a & 1;
  231. a = line[x];
  232. b = (b<<7) ^ a ^ (a>>1);
  233. }
  234. return (x<<3) + clz[b];
  235. nearend:
  236. /* We have less than a byte to go. If no stray bits, exit now. */
  237. if ((x<<3) == w)
  238. return w;
  239. b = a&1;
  240. a = line[x];
  241. b = (b<<7) ^ a ^ (a>>1);
  242. x = (x<<3) + clz[b];
  243. if (x > w)
  244. x = w;
  245. return x;
  246. }
  247. static inline int
  248. find_changing_color(const unsigned char *line, int x, int w, int color)
  249. {
  250. if (!line || x >= w)
  251. return w;
  252. x = find_changing(line, (x > 0 || !color) ? x : -1, w);
  253. if (x < w && getbit(line, x) != color)
  254. x = find_changing(line, x, w);
  255. return x;
  256. }
  257. static const unsigned char lm[8] = {
  258. 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01
  259. };
  260. static const unsigned char rm[8] = {
  261. 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE
  262. };
  263. static inline void setbits(unsigned char *line, int x0, int x1)
  264. {
  265. int a0, a1, b0, b1, a;
  266. if (x1 <= x0)
  267. return;
  268. a0 = x0 >> 3;
  269. a1 = x1 >> 3;
  270. b0 = x0 & 7;
  271. b1 = x1 & 7;
  272. if (a0 == a1)
  273. {
  274. if (b1)
  275. line[a0] |= lm[b0] & rm[b1];
  276. }
  277. else
  278. {
  279. line[a0] |= lm[b0];
  280. for (a = a0 + 1; a < a1; a++)
  281. line[a] = 0xFF;
  282. if (b1)
  283. line[a1] |= rm[b1];
  284. }
  285. }
  286. enum
  287. {
  288. STATE_INIT, /* initial state, optionally waiting for EOL */
  289. STATE_NORMAL, /* neutral state, waiting for any code */
  290. STATE_MAKEUP, /* got a 1d makeup code, waiting for terminating code */
  291. STATE_EOL, /* at eol, needs output buffer space */
  292. STATE_H1, STATE_H2, /* in H part 1 and 2 (both makeup and terminating codes) */
  293. STATE_DONE /* all done */
  294. };
  295. typedef struct
  296. {
  297. fz_stream *chain;
  298. int k;
  299. int end_of_line;
  300. int encoded_byte_align;
  301. int columns;
  302. int rows;
  303. int end_of_block;
  304. int black_is_1;
  305. int stride;
  306. int ridx;
  307. int bidx;
  308. unsigned int word;
  309. int stage;
  310. int a, c, dim, eolc;
  311. unsigned char *ref;
  312. unsigned char *dst;
  313. unsigned char *rp, *wp;
  314. unsigned char buffer[4096];
  315. } fz_faxd;
  316. static inline void eat_bits(fz_faxd *fax, int nbits)
  317. {
  318. fax->word <<= nbits;
  319. fax->bidx += nbits;
  320. }
  321. static int
  322. fill_bits(fz_context *ctx, fz_faxd *fax)
  323. {
  324. /* The longest length of bits we'll ever need is 13. Never read more
  325. * than we need to avoid unnecessary overreading of the end of the
  326. * stream. */
  327. while (fax->bidx > (32-13))
  328. {
  329. int c = fz_read_byte(ctx, fax->chain);
  330. if (c == EOF)
  331. return EOF;
  332. fax->bidx -= 8;
  333. fax->word |= c << fax->bidx;
  334. }
  335. return 0;
  336. }
  337. static int
  338. get_code(fz_context *ctx, fz_faxd *fax, const cfd_node *table, int initialbits)
  339. {
  340. unsigned int word = fax->word;
  341. int tidx = word >> (32 - initialbits);
  342. int val = table[tidx].val;
  343. int nbits = table[tidx].nbits;
  344. if (nbits > initialbits)
  345. {
  346. int wordmask = (1 << (32 - initialbits)) - 1;
  347. tidx = val + ((word & wordmask) >> (32 - nbits));
  348. val = table[tidx].val;
  349. nbits = initialbits + table[tidx].nbits;
  350. }
  351. eat_bits(fax, nbits);
  352. return val;
  353. }
  354. /* decode one 1d code */
  355. static void
  356. dec1d(fz_context *ctx, fz_faxd *fax)
  357. {
  358. int code;
  359. if (fax->a == -1)
  360. fax->a = 0;
  361. if (fax->c)
  362. code = get_code(ctx, fax, cf_black_decode, cfd_black_initial_bits);
  363. else
  364. code = get_code(ctx, fax, cf_white_decode, cfd_white_initial_bits);
  365. if (code == UNCOMPRESSED)
  366. fz_throw(ctx, FZ_ERROR_FORMAT, "uncompressed data in faxd");
  367. if (code < 0)
  368. fz_throw(ctx, FZ_ERROR_FORMAT, "negative code in 1d faxd");
  369. if (fax->a + code > fax->columns)
  370. fz_throw(ctx, FZ_ERROR_FORMAT, "overflow in 1d faxd");
  371. if (fax->c)
  372. setbits(fax->dst, fax->a, fax->a + code);
  373. fax->a += code;
  374. if (code < 64)
  375. {
  376. fax->c = !fax->c;
  377. fax->stage = STATE_NORMAL;
  378. }
  379. else
  380. fax->stage = STATE_MAKEUP;
  381. }
  382. /* decode one 2d code */
  383. static void
  384. dec2d(fz_context *ctx, fz_faxd *fax)
  385. {
  386. int code, b1, b2;
  387. if (fax->stage == STATE_H1 || fax->stage == STATE_H2)
  388. {
  389. if (fax->a == -1)
  390. fax->a = 0;
  391. if (fax->c)
  392. code = get_code(ctx, fax, cf_black_decode, cfd_black_initial_bits);
  393. else
  394. code = get_code(ctx, fax, cf_white_decode, cfd_white_initial_bits);
  395. if (code == UNCOMPRESSED)
  396. fz_throw(ctx, FZ_ERROR_FORMAT, "uncompressed data in faxd");
  397. if (code < 0)
  398. fz_throw(ctx, FZ_ERROR_FORMAT, "negative code in 2d faxd");
  399. if (fax->a + code > fax->columns)
  400. fz_throw(ctx, FZ_ERROR_FORMAT, "overflow in 2d faxd");
  401. if (fax->c)
  402. setbits(fax->dst, fax->a, fax->a + code);
  403. fax->a += code;
  404. if (code < 64)
  405. {
  406. fax->c = !fax->c;
  407. if (fax->stage == STATE_H1)
  408. fax->stage = STATE_H2;
  409. else if (fax->stage == STATE_H2)
  410. fax->stage = STATE_NORMAL;
  411. }
  412. return;
  413. }
  414. code = get_code(ctx, fax, cf_2d_decode, cfd_2d_initial_bits);
  415. switch (code)
  416. {
  417. case H:
  418. fax->stage = STATE_H1;
  419. break;
  420. case P:
  421. b1 = find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  422. if (b1 >= fax->columns)
  423. b2 = fax->columns;
  424. else
  425. b2 = find_changing(fax->ref, b1, fax->columns);
  426. if (fax->c) setbits(fax->dst, fax->a, b2);
  427. fax->a = b2;
  428. break;
  429. case V0:
  430. b1 = find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  431. if (fax->c) setbits(fax->dst, fax->a, b1);
  432. fax->a = b1;
  433. fax->c = !fax->c;
  434. break;
  435. case VR1:
  436. b1 = 1 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  437. if (b1 >= fax->columns) b1 = fax->columns;
  438. if (fax->c) setbits(fax->dst, fax->a, b1);
  439. fax->a = b1;
  440. fax->c = !fax->c;
  441. break;
  442. case VR2:
  443. b1 = 2 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  444. if (b1 >= fax->columns) b1 = fax->columns;
  445. if (fax->c) setbits(fax->dst, fax->a, b1);
  446. fax->a = b1;
  447. fax->c = !fax->c;
  448. break;
  449. case VR3:
  450. b1 = 3 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  451. if (b1 >= fax->columns) b1 = fax->columns;
  452. if (fax->c) setbits(fax->dst, fax->a, b1);
  453. fax->a = b1;
  454. fax->c = !fax->c;
  455. break;
  456. case VL1:
  457. b1 = -1 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  458. if (b1 < 0) b1 = 0;
  459. if (fax->c) setbits(fax->dst, fax->a, b1);
  460. fax->a = b1;
  461. fax->c = !fax->c;
  462. break;
  463. case VL2:
  464. b1 = -2 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  465. if (b1 < 0) b1 = 0;
  466. if (fax->c) setbits(fax->dst, fax->a, b1);
  467. fax->a = b1;
  468. fax->c = !fax->c;
  469. break;
  470. case VL3:
  471. b1 = -3 + find_changing_color(fax->ref, fax->a, fax->columns, !fax->c);
  472. if (b1 < 0) b1 = 0;
  473. if (fax->c) setbits(fax->dst, fax->a, b1);
  474. fax->a = b1;
  475. fax->c = !fax->c;
  476. break;
  477. case UNCOMPRESSED:
  478. fz_throw(ctx, FZ_ERROR_FORMAT, "uncompressed data in faxd");
  479. case ERROR:
  480. fz_throw(ctx, FZ_ERROR_FORMAT, "invalid code in 2d faxd");
  481. default:
  482. fz_throw(ctx, FZ_ERROR_FORMAT, "invalid code in 2d faxd (%d)", code);
  483. }
  484. }
  485. static int
  486. next_faxd(fz_context *ctx, fz_stream *stm, size_t max)
  487. {
  488. fz_faxd *fax = stm->state;
  489. unsigned char *p = fax->buffer;
  490. unsigned char *ep;
  491. unsigned char *tmp;
  492. if (max > sizeof(fax->buffer))
  493. max = sizeof(fax->buffer);
  494. ep = p + max;
  495. if (fax->stage == STATE_INIT && fax->end_of_line)
  496. {
  497. fill_bits(ctx, fax);
  498. if ((fax->word >> (32 - 12)) != 1)
  499. {
  500. fz_warn(ctx, "faxd stream doesn't start with EOL");
  501. while (!fill_bits(ctx, fax) && (fax->word >> (32 - 12)) != 1)
  502. eat_bits(fax, 1);
  503. }
  504. if ((fax->word >> (32 - 12)) != 1)
  505. fz_throw(ctx, FZ_ERROR_FORMAT, "initial EOL not found");
  506. }
  507. if (fax->stage == STATE_INIT)
  508. fax->stage = STATE_NORMAL;
  509. if (fax->stage == STATE_DONE)
  510. return EOF;
  511. if (fax->stage == STATE_EOL)
  512. goto eol;
  513. loop:
  514. if (fill_bits(ctx, fax))
  515. {
  516. if (fax->bidx > 31)
  517. {
  518. if (fax->a > 0)
  519. goto eol;
  520. goto rtc;
  521. }
  522. }
  523. if ((fax->word >> (32 - 12)) == 0)
  524. {
  525. eat_bits(fax, 1);
  526. goto loop;
  527. }
  528. if ((fax->word >> (32 - 12)) == 1)
  529. {
  530. eat_bits(fax, 12);
  531. fax->eolc ++;
  532. if (fax->k > 0)
  533. {
  534. if (fax->a == -1)
  535. fax->a = 0;
  536. if ((fax->word >> (32 - 1)) == 1)
  537. fax->dim = 1;
  538. else
  539. fax->dim = 2;
  540. eat_bits(fax, 1);
  541. }
  542. }
  543. else if (fax->k > 0 && fax->a == -1)
  544. {
  545. fax->a = 0;
  546. if ((fax->word >> (32 - 1)) == 1)
  547. fax->dim = 1;
  548. else
  549. fax->dim = 2;
  550. eat_bits(fax, 1);
  551. }
  552. else if (fax->dim == 1)
  553. {
  554. fax->eolc = 0;
  555. fz_try(ctx)
  556. {
  557. dec1d(ctx, fax);
  558. }
  559. fz_catch(ctx)
  560. {
  561. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  562. fz_report_error(ctx);
  563. goto error;
  564. }
  565. }
  566. else if (fax->dim == 2)
  567. {
  568. fax->eolc = 0;
  569. fz_try(ctx)
  570. {
  571. dec2d(ctx, fax);
  572. }
  573. fz_catch(ctx)
  574. {
  575. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  576. fz_report_error(ctx);
  577. goto error;
  578. }
  579. }
  580. /* Some Fax streams appear to give up at the end. We could detect for this
  581. * with this:
  582. * if (fax->a >= fax->columns && fax->rows == fax->ridx+1)
  583. * goto eol;
  584. */
  585. /* no eol check after makeup codes nor in the middle of an H code */
  586. if (fax->stage == STATE_MAKEUP || fax->stage == STATE_H1 || fax->stage == STATE_H2)
  587. goto loop;
  588. /* check for eol conditions */
  589. if (fax->eolc || fax->a >= fax->columns)
  590. {
  591. if (fax->a > 0)
  592. goto eol;
  593. if (fax->eolc == (fax->k < 0 ? 2 : 6))
  594. goto rtc;
  595. }
  596. goto loop;
  597. eol:
  598. fax->stage = STATE_EOL;
  599. if (fax->black_is_1)
  600. {
  601. while (fax->rp < fax->wp && p < ep)
  602. *p++ = *fax->rp++;
  603. }
  604. else
  605. {
  606. while (fax->rp < fax->wp && p < ep)
  607. *p++ = *fax->rp++ ^ 0xff;
  608. }
  609. if (fax->rp < fax->wp)
  610. {
  611. stm->rp = fax->buffer;
  612. stm->wp = p;
  613. stm->pos += (p - fax->buffer);
  614. if (p == fax->buffer)
  615. return EOF;
  616. return *stm->rp++;
  617. }
  618. tmp = fax->ref;
  619. fax->ref = fax->dst;
  620. fax->dst = tmp;
  621. memset(fax->dst, 0, fax->stride);
  622. fax->rp = fax->dst;
  623. fax->wp = fax->dst + fax->stride;
  624. fax->stage = STATE_NORMAL;
  625. fax->c = 0;
  626. fax->a = -1;
  627. fax->ridx ++;
  628. if (!fax->end_of_block && fax->rows && fax->ridx >= fax->rows)
  629. goto rtc;
  630. /* we have not read dim from eol, make a guess */
  631. if (fax->k > 0 && !fax->eolc && fax->a == -1)
  632. {
  633. if (fax->ridx % fax->k == 0)
  634. fax->dim = 1;
  635. else
  636. fax->dim = 2;
  637. }
  638. /* If end_of_line & encoded_byte_align - we don't know what to do here.
  639. * GS doesn't offer us any hints either. Previously, we used to do:
  640. * eat_bits(fax, (12 - fax->bidx) & 7);
  641. * but we can't understand what we were trying to do, and it fails with
  642. * at least one file. Removing it doesn't harm anything in the cluster,
  643. * and brings us into line with gs. */
  644. if (fax->encoded_byte_align && !fax->end_of_line)
  645. eat_bits(fax, (8 - fax->bidx) & 7);
  646. /* no more space in output, don't decode the next row yet */
  647. if (p == fax->buffer + max)
  648. {
  649. stm->rp = fax->buffer;
  650. stm->wp = p;
  651. stm->pos += (p - fax->buffer);
  652. if (p == fax->buffer)
  653. return EOF;
  654. return *stm->rp++;
  655. }
  656. goto loop;
  657. error:
  658. /* decode the remaining pixels up to where the error occurred */
  659. if (fax->black_is_1)
  660. {
  661. while (fax->rp < fax->wp && p < ep)
  662. *p++ = *fax->rp++;
  663. }
  664. else
  665. {
  666. while (fax->rp < fax->wp && p < ep)
  667. *p++ = *fax->rp++ ^ 0xff;
  668. }
  669. /* fallthrough */
  670. rtc:
  671. fax->stage = STATE_DONE;
  672. stm->rp = fax->buffer;
  673. stm->wp = p;
  674. stm->pos += (p - fax->buffer);
  675. if (p == fax->buffer)
  676. return EOF;
  677. return *stm->rp++;
  678. }
  679. static void
  680. close_faxd(fz_context *ctx, void *state_)
  681. {
  682. fz_faxd *fax = (fz_faxd *)state_;
  683. int i;
  684. /* if we read any extra bytes, try to put them back */
  685. i = (32 - fax->bidx) / 8;
  686. while (i--)
  687. fz_unread_byte(ctx, fax->chain);
  688. fz_drop_stream(ctx, fax->chain);
  689. fz_free(ctx, fax->ref);
  690. fz_free(ctx, fax->dst);
  691. fz_free(ctx, fax);
  692. }
  693. fz_stream *
  694. fz_open_faxd(fz_context *ctx, fz_stream *chain,
  695. int k, int end_of_line, int encoded_byte_align,
  696. int columns, int rows, int end_of_block, int black_is_1)
  697. {
  698. fz_faxd *fax;
  699. if (columns < 0 || columns >= INT_MAX - 7)
  700. fz_throw(ctx, FZ_ERROR_LIMIT, "too many columns integer overflow (%d)", columns);
  701. fax = fz_malloc_struct(ctx, fz_faxd);
  702. fz_try(ctx)
  703. {
  704. fax->ref = NULL;
  705. fax->dst = NULL;
  706. fax->k = k;
  707. fax->end_of_line = end_of_line;
  708. fax->encoded_byte_align = encoded_byte_align;
  709. fax->columns = columns;
  710. fax->rows = rows;
  711. fax->end_of_block = end_of_block;
  712. fax->black_is_1 = black_is_1;
  713. fax->stride = ((fax->columns - 1) >> 3) + 1;
  714. fax->ridx = 0;
  715. fax->bidx = 32;
  716. fax->word = 0;
  717. fax->stage = STATE_INIT;
  718. fax->a = -1;
  719. fax->c = 0;
  720. fax->dim = fax->k < 0 ? 2 : 1;
  721. fax->eolc = 0;
  722. fax->ref = Memento_label(fz_malloc(ctx, fax->stride), "fax_ref");
  723. fax->dst = Memento_label(fz_malloc(ctx, fax->stride), "fax_dst");
  724. fax->rp = fax->dst;
  725. fax->wp = fax->dst + fax->stride;
  726. memset(fax->ref, 0, fax->stride);
  727. memset(fax->dst, 0, fax->stride);
  728. fax->chain = fz_keep_stream(ctx, chain);
  729. }
  730. fz_catch(ctx)
  731. {
  732. fz_free(ctx, fax->dst);
  733. fz_free(ctx, fax->ref);
  734. fz_free(ctx, fax);
  735. fz_rethrow(ctx);
  736. }
  737. return fz_new_stream(ctx, fax, next_faxd, close_faxd);
  738. }