tif_lzw.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /* tif_lzw.h - LZW compression for TIFF */
  2. /*
  3. libzint - the open source barcode library
  4. Copyright (C) 2021-2024 Robin Stuart <rstuart114@gmail.com>
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions
  7. are met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions and the following disclaimer in the
  12. documentation and/or other materials provided with the distribution.
  13. 3. Neither the name of the project nor the names of its contributors
  14. may be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  20. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. SUCH DAMAGE.
  27. */
  28. /* SPDX-License-Identifier: BSD-3-Clause */
  29. #ifndef Z_TIF_LZW_H
  30. #define Z_TIF_LZW_H
  31. #ifdef __cplusplus
  32. extern "C" {
  33. #endif /* __cplusplus */
  34. /*
  35. * Adapted from TIFF Library 4.2.0 libtiff/tif_lzw.c */
  36. /*
  37. * Copyright (c) 1988-1997 Sam Leffler
  38. * Copyright (c) 1991-1997 Silicon Graphics, Inc.
  39. *
  40. * Permission to use, copy, modify, distribute, and sell this software and
  41. * its documentation for any purpose is hereby granted without fee, provided
  42. * that (i) the above copyright notices and this permission notice appear in
  43. * all copies of the software and related documentation, and (ii) the names of
  44. * Sam Leffler and Silicon Graphics may not be used in any advertising or
  45. * publicity relating to the software without the specific, prior written
  46. * permission of Sam Leffler and Silicon Graphics.
  47. *
  48. * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
  49. * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  50. * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  51. *
  52. * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
  53. * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
  54. * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  55. * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
  56. * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  57. * OF THIS SOFTWARE.
  58. */
  59. /*
  60. * TIFF Library.
  61. * Rev 5.0 Lempel-Ziv & Welch Compression Support
  62. *
  63. * This code is derived from the compress program whose code is
  64. * derived from software contributed to Berkeley by James A. Woods,
  65. * derived from original work by Spencer Thomas and Joseph Orost.
  66. *
  67. * The original Berkeley copyright notice appears below in its entirety.
  68. */
  69. /*
  70. * Copyright (c) 1985, 1986 The Regents of the University of California.
  71. * All rights reserved.
  72. *
  73. * This code is derived from software contributed to Berkeley by
  74. * James A. Woods, derived from original work by Spencer Thomas
  75. * and Joseph Orost.
  76. *
  77. * Redistribution and use in source and binary forms are permitted
  78. * provided that the above copyright notice and this paragraph are
  79. * duplicated in all such forms and that any documentation,
  80. * advertising materials, and other materials related to such
  81. * distribution and use acknowledge that the software was developed
  82. * by the University of California, Berkeley. The name of the
  83. * University may not be used to endorse or promote products derived
  84. * from this software without specific prior written permission.
  85. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  86. * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  87. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  88. */
  89. #define MAXCODE(n) ((1L << (n)) - 1)
  90. /*
  91. * The TIFF spec specifies that encoded bit
  92. * strings range from 9 to 12 bits.
  93. */
  94. #define BITS_MIN 9 /* start with 9 bits */
  95. #define BITS_MAX 12 /* max of 12 bit strings */
  96. /* predefined codes */
  97. #define CODE_CLEAR 256 /* code to clear string table */
  98. #define CODE_EOI 257 /* end-of-information code */
  99. #define CODE_FIRST 258 /* first free code entry */
  100. #define CODE_MAX MAXCODE(BITS_MAX)
  101. #define HSIZE 9001L /* 91% occupancy */
  102. #define HSHIFT (13 - 8)
  103. /*
  104. * Encoding-specific state.
  105. */
  106. typedef uint16_t tif_lzw_hcode; /* codes fit in 16 bits */
  107. typedef struct {
  108. long hash;
  109. tif_lzw_hcode code;
  110. } tif_lzw_hash;
  111. #define CHECK_GAP 10000 /* ratio check interval */
  112. /*
  113. * State block.
  114. */
  115. typedef struct {
  116. tif_lzw_hash *enc_hashtab; /* kept separate for small machines */
  117. } tif_lzw_state;
  118. /*
  119. * LZW Encoding.
  120. */
  121. /*
  122. * Reset encoding hash table.
  123. */
  124. static void tif_lzw_cl_hash(tif_lzw_state *sp) {
  125. register tif_lzw_hash *hp = &sp->enc_hashtab[HSIZE - 1];
  126. register long i = HSIZE - 8;
  127. do {
  128. i -= 8;
  129. hp[-7].hash = -1;
  130. hp[-6].hash = -1;
  131. hp[-5].hash = -1;
  132. hp[-4].hash = -1;
  133. hp[-3].hash = -1;
  134. hp[-2].hash = -1;
  135. hp[-1].hash = -1;
  136. hp[ 0].hash = -1;
  137. hp -= 8;
  138. } while (i >= 0);
  139. for (i += 8; i > 0; i--, hp--) {
  140. hp->hash = -1;
  141. }
  142. }
  143. #define CALCRATIO(sp, rat) { \
  144. if (incount > 0x007fffff) { /* NB: shift will overflow */ \
  145. rat = outcount >> 8; \
  146. rat = (rat == 0 ? 0x7fffffff : incount / rat); \
  147. } else \
  148. rat = (incount << 8) / outcount; \
  149. }
  150. /* Explicit 0xff masking to make icc -check=conversions happy */
  151. #define PutNextCode(op_fmp, c) { \
  152. nextdata = (nextdata << nbits) | c; \
  153. nextbits += nbits; \
  154. fm_putc((nextdata >> (nextbits - 8)) & 0xff, op_fmp); \
  155. nextbits -= 8; \
  156. if (nextbits >= 8) { \
  157. fm_putc((nextdata >> (nextbits - 8)) & 0xff, op_fmp); \
  158. nextbits -= 8; \
  159. } \
  160. outcount += nbits; \
  161. }
  162. /*
  163. * Encode a chunk of pixels.
  164. *
  165. * Uses an open addressing double hashing (no chaining) on the
  166. * prefix code/next character combination. We do a variant of
  167. * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
  168. * relatively-prime secondary probe. Here, the modular division
  169. * first probe is gives way to a faster exclusive-or manipulation.
  170. * Also do block compression with an adaptive reset, whereby the
  171. * code table is cleared when the compression ratio decreases,
  172. * but after the table fills. The variable-length output codes
  173. * are re-sized at this point, and a CODE_CLEAR is generated
  174. * for the decoder.
  175. */
  176. static int tif_lzw_encode(tif_lzw_state *sp, struct filemem *op_fmp, const unsigned char *bp, int cc) {
  177. register long fcode;
  178. register tif_lzw_hash *hp;
  179. register int h, c;
  180. tif_lzw_hcode ent;
  181. long disp;
  182. int nbits; /* # of bits/code */
  183. int maxcode; /* maximum code for nbits */
  184. int free_ent; /* next free entry in hash table */
  185. unsigned long nextdata; /* next bits of i/o */
  186. long nextbits; /* # of valid bits in nextdata */
  187. long checkpoint; /* point at which to clear table */
  188. long ratio; /* current compression ratio */
  189. long incount; /* (input) data bytes encoded */
  190. long outcount; /* encoded (output) bytes */
  191. /*
  192. * Reset encoding state at the start of a strip.
  193. */
  194. if (sp->enc_hashtab == NULL) {
  195. sp->enc_hashtab = (tif_lzw_hash *) malloc(HSIZE * sizeof(tif_lzw_hash));
  196. if (sp->enc_hashtab == NULL) {
  197. return 0;
  198. }
  199. }
  200. tif_lzw_cl_hash(sp); /* clear hash table */
  201. nbits = BITS_MIN;
  202. maxcode = MAXCODE(BITS_MIN);
  203. free_ent = CODE_FIRST;
  204. nextdata = 0;
  205. nextbits = 0;
  206. checkpoint = CHECK_GAP;
  207. ratio = 0;
  208. incount = 0;
  209. outcount = 0;
  210. ent = (tif_lzw_hcode) -1;
  211. if (cc > 0) {
  212. PutNextCode(op_fmp, CODE_CLEAR);
  213. ent = *bp++; cc--; incount++;
  214. }
  215. while (cc > 0) {
  216. c = *bp++; cc--; incount++;
  217. fcode = ((long)c << BITS_MAX) + ent;
  218. h = (c << HSHIFT) ^ ent; /* xor hashing */
  219. #ifdef _WINDOWS
  220. /*
  221. * Check hash index for an overflow.
  222. */
  223. if (h >= HSIZE) {
  224. h -= HSIZE;
  225. }
  226. #endif
  227. hp = &sp->enc_hashtab[h];
  228. if (hp->hash == fcode) {
  229. ent = hp->code;
  230. continue;
  231. }
  232. if (hp->hash >= 0) {
  233. /*
  234. * Primary hash failed, check secondary hash.
  235. */
  236. disp = HSIZE - h;
  237. if (h == 0) {
  238. disp = 1;
  239. }
  240. do {
  241. /*
  242. * Avoid pointer arithmetic because of
  243. * wraparound problems with segments.
  244. */
  245. if ((h -= disp) < 0) {
  246. h += HSIZE;
  247. }
  248. hp = &sp->enc_hashtab[h];
  249. if (hp->hash == fcode) {
  250. ent = hp->code;
  251. goto hit;
  252. }
  253. } while (hp->hash >= 0);
  254. }
  255. /*
  256. * New entry, emit code and add to table.
  257. */
  258. PutNextCode(op_fmp, ent);
  259. ent = (tif_lzw_hcode) c;
  260. hp->code = (tif_lzw_hcode) (free_ent++);
  261. hp->hash = fcode;
  262. if (free_ent == CODE_MAX - 1) {
  263. /* table is full, emit clear code and reset */
  264. tif_lzw_cl_hash(sp);
  265. ratio = 0;
  266. incount = 0;
  267. outcount = 0;
  268. free_ent = CODE_FIRST;
  269. PutNextCode(op_fmp, CODE_CLEAR);
  270. nbits = BITS_MIN;
  271. maxcode = MAXCODE(BITS_MIN);
  272. } else {
  273. /*
  274. * If the next entry is going to be too big for
  275. * the code size, then increase it, if possible.
  276. */
  277. if (free_ent > maxcode) {
  278. nbits++;
  279. assert(nbits <= BITS_MAX);
  280. maxcode = (int) MAXCODE(nbits);
  281. } else if (incount >= checkpoint) {
  282. long rat;
  283. /*
  284. * Check compression ratio and, if things seem
  285. * to be slipping, clear the hash table and
  286. * reset state. The compression ratio is a
  287. * 24+8-bit fractional number.
  288. */
  289. checkpoint = incount + CHECK_GAP;
  290. CALCRATIO(sp, rat);
  291. if (rat <= ratio) {
  292. tif_lzw_cl_hash(sp);
  293. ratio = 0;
  294. incount = 0;
  295. outcount = 0;
  296. free_ent = CODE_FIRST;
  297. PutNextCode(op_fmp, CODE_CLEAR);
  298. nbits = BITS_MIN;
  299. maxcode = MAXCODE(BITS_MIN);
  300. } else {
  301. ratio = rat;
  302. }
  303. }
  304. }
  305. hit:
  306. ;
  307. }
  308. /*
  309. * Finish off an encoded strip by flushing the last
  310. * string and tacking on an End Of Information code.
  311. */
  312. if (ent != (tif_lzw_hcode) -1) {
  313. PutNextCode(op_fmp, ent);
  314. free_ent++;
  315. if (free_ent == CODE_MAX - 1) {
  316. /* table is full, emit clear code and reset */
  317. outcount = 0;
  318. PutNextCode(op_fmp, CODE_CLEAR);
  319. nbits = BITS_MIN;
  320. } else {
  321. /*
  322. * If the next entry is going to be too big for
  323. * the code size, then increase it, if possible.
  324. */
  325. if (free_ent > maxcode) {
  326. nbits++;
  327. assert(nbits <= BITS_MAX);
  328. }
  329. }
  330. }
  331. PutNextCode(op_fmp, CODE_EOI);
  332. /* Explicit 0xff masking to make icc -check=conversions happy */
  333. if (nextbits > 0) {
  334. fm_putc((nextdata << (8 - nextbits)) & 0xff, op_fmp);
  335. }
  336. return 1;
  337. }
  338. static void tif_lzw_cleanup(tif_lzw_state *sp) {
  339. if (sp->enc_hashtab) {
  340. free(sp->enc_hashtab);
  341. }
  342. }
  343. static void tif_lzw_init(tif_lzw_state *sp) {
  344. sp->enc_hashtab = NULL;
  345. }
  346. #ifdef __cplusplus
  347. }
  348. #endif /* __cplusplus */
  349. /* vim: set ts=4 sw=4 et : */
  350. #endif /* Z_TIF_LZW_H */