filter-lzw.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // Copyright (C) 2004-2022 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <assert.h>
  24. /* TODO: error checking */
  25. #define LZW_CLEAR(lzw) (1 << ((lzw)->min_bits - 1))
  26. #define LZW_EOD(lzw) (LZW_CLEAR(lzw) + 1)
  27. #define LZW_FIRST(lzw) (LZW_CLEAR(lzw) + 2)
  28. enum
  29. {
  30. MAX_BITS = 12,
  31. NUM_CODES = (1 << MAX_BITS),
  32. MAX_LENGTH = 4097
  33. };
  34. typedef struct
  35. {
  36. int prev; /* prev code (in string) */
  37. unsigned short length; /* string len, including this token */
  38. unsigned char value; /* data value */
  39. unsigned char first_char; /* first token of string */
  40. } lzw_code;
  41. typedef struct
  42. {
  43. fz_stream *chain;
  44. int eod;
  45. int early_change;
  46. int reverse_bits;
  47. int old_tiff;
  48. int min_bits; /* minimum num bits/code */
  49. int code_bits; /* num bits/code */
  50. int code; /* current code */
  51. int old_code; /* previously recognized code */
  52. int next_code; /* next free entry */
  53. lzw_code table[NUM_CODES];
  54. unsigned char bp[MAX_LENGTH];
  55. unsigned char *rp, *wp;
  56. unsigned char buffer[4096];
  57. } fz_lzwd;
  58. static int
  59. next_lzwd(fz_context *ctx, fz_stream *stm, size_t len)
  60. {
  61. fz_lzwd *lzw = stm->state;
  62. lzw_code *table = lzw->table;
  63. unsigned char *buf = lzw->buffer;
  64. unsigned char *p = buf;
  65. unsigned char *ep;
  66. unsigned char *s;
  67. int codelen;
  68. int code_bits = lzw->code_bits;
  69. int code = lzw->code;
  70. int old_code = lzw->old_code;
  71. int next_code = lzw->next_code;
  72. if (len > sizeof(lzw->buffer))
  73. len = sizeof(lzw->buffer);
  74. ep = buf + len;
  75. while (lzw->rp < lzw->wp && p < ep)
  76. *p++ = *lzw->rp++;
  77. while (p < ep)
  78. {
  79. if (lzw->eod)
  80. return EOF;
  81. if (fz_is_eof_bits(ctx, lzw->chain))
  82. {
  83. fz_warn(ctx, "premature end in lzw decode");
  84. lzw->eod = 1;
  85. break;
  86. }
  87. if (lzw->reverse_bits)
  88. code = fz_read_rbits(ctx, lzw->chain, code_bits);
  89. else
  90. code = fz_read_bits(ctx, lzw->chain, code_bits);
  91. if (code == LZW_EOD(lzw))
  92. {
  93. lzw->eod = 1;
  94. break;
  95. }
  96. /* Old Tiffs are allowed to NOT send the clear code, and to
  97. * overrun at the end. */
  98. if (!lzw->old_tiff && next_code > NUM_CODES && code != LZW_CLEAR(lzw))
  99. {
  100. fz_warn(ctx, "missing clear code in lzw decode");
  101. code = LZW_CLEAR(lzw);
  102. }
  103. if (code == LZW_CLEAR(lzw))
  104. {
  105. code_bits = lzw->min_bits;
  106. next_code = LZW_FIRST(lzw);
  107. old_code = -1;
  108. continue;
  109. }
  110. /* if stream starts without a clear code, old_code is undefined... */
  111. if (old_code == -1)
  112. {
  113. old_code = code;
  114. }
  115. else if (!lzw->old_tiff && next_code == NUM_CODES)
  116. {
  117. /* TODO: Ghostscript checks for a following clear code before tolerating */
  118. fz_warn(ctx, "tolerating a single out of range code in lzw decode");
  119. next_code++;
  120. }
  121. else if (code > next_code || (!lzw->old_tiff && next_code >= NUM_CODES))
  122. {
  123. fz_throw(ctx, FZ_ERROR_FORMAT, "out of range code encountered in lzw decode");
  124. }
  125. else if (next_code < NUM_CODES)
  126. {
  127. /* add new entry to the code table */
  128. table[next_code].prev = old_code;
  129. table[next_code].first_char = table[old_code].first_char;
  130. table[next_code].length = table[old_code].length + 1;
  131. if (code < next_code)
  132. table[next_code].value = table[code].first_char;
  133. else if (code == next_code)
  134. table[next_code].value = table[next_code].first_char;
  135. else
  136. fz_throw(ctx, FZ_ERROR_FORMAT, "out of range code encountered in lzw decode");
  137. next_code ++;
  138. if (next_code > (1 << code_bits) - lzw->early_change - 1)
  139. {
  140. code_bits ++;
  141. if (code_bits > MAX_BITS)
  142. code_bits = MAX_BITS;
  143. }
  144. old_code = code;
  145. }
  146. /* code maps to a string, copy to output (in reverse...) */
  147. if (code >= LZW_CLEAR(lzw))
  148. {
  149. codelen = table[code].length;
  150. lzw->rp = lzw->bp;
  151. lzw->wp = lzw->bp + codelen;
  152. assert(codelen < MAX_LENGTH);
  153. s = lzw->wp;
  154. do {
  155. *(--s) = table[code].value;
  156. code = table[code].prev;
  157. } while (code >= 0 && s > lzw->bp);
  158. }
  159. /* ... or just a single character */
  160. else
  161. {
  162. lzw->bp[0] = code;
  163. lzw->rp = lzw->bp;
  164. lzw->wp = lzw->bp + 1;
  165. }
  166. /* copy to output */
  167. while (lzw->rp < lzw->wp && p < ep)
  168. *p++ = *lzw->rp++;
  169. }
  170. lzw->code_bits = code_bits;
  171. lzw->code = code;
  172. lzw->old_code = old_code;
  173. lzw->next_code = next_code;
  174. stm->rp = buf;
  175. stm->wp = p;
  176. if (buf == p)
  177. return EOF;
  178. stm->pos += p - buf;
  179. return *stm->rp++;
  180. }
  181. static void
  182. close_lzwd(fz_context *ctx, void *state_)
  183. {
  184. fz_lzwd *lzw = (fz_lzwd *)state_;
  185. fz_sync_bits(ctx, lzw->chain);
  186. fz_drop_stream(ctx, lzw->chain);
  187. fz_free(ctx, lzw);
  188. }
  189. fz_stream *
  190. fz_open_lzwd(fz_context *ctx, fz_stream *chain, int early_change, int min_bits, int reverse_bits, int old_tiff)
  191. {
  192. fz_lzwd *lzw;
  193. int i;
  194. if (min_bits > MAX_BITS)
  195. {
  196. fz_warn(ctx, "out of range initial lzw code size");
  197. min_bits = MAX_BITS;
  198. }
  199. lzw = fz_malloc_struct(ctx, fz_lzwd);
  200. lzw->eod = 0;
  201. lzw->early_change = early_change;
  202. lzw->reverse_bits = reverse_bits;
  203. lzw->old_tiff = old_tiff;
  204. lzw->min_bits = min_bits;
  205. lzw->code_bits = lzw->min_bits;
  206. lzw->code = -1;
  207. lzw->next_code = LZW_FIRST(lzw);
  208. lzw->old_code = -1;
  209. lzw->rp = lzw->bp;
  210. lzw->wp = lzw->bp;
  211. for (i = 0; i < LZW_CLEAR(lzw); i++)
  212. {
  213. lzw->table[i].value = i;
  214. lzw->table[i].first_char = i;
  215. lzw->table[i].length = 1;
  216. lzw->table[i].prev = -1;
  217. }
  218. for (i = LZW_CLEAR(lzw); i < NUM_CODES; i++)
  219. {
  220. lzw->table[i].value = 0;
  221. lzw->table[i].first_char = 0;
  222. lzw->table[i].length = 0;
  223. lzw->table[i].prev = -1;
  224. }
  225. lzw->chain = fz_keep_stream(ctx, chain);
  226. return fz_new_stream(ctx, lzw, next_lzwd, close_lzwd);
  227. }