buffer.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <stdarg.h>
  25. fz_buffer *
  26. fz_new_buffer(fz_context *ctx, size_t size)
  27. {
  28. fz_buffer *b;
  29. size = size > 1 ? size : 16;
  30. b = fz_malloc_struct(ctx, fz_buffer);
  31. b->refs = 1;
  32. fz_try(ctx)
  33. {
  34. b->data = Memento_label(fz_malloc(ctx, size), "fz_buffer_data");
  35. }
  36. fz_catch(ctx)
  37. {
  38. fz_free(ctx, b);
  39. fz_rethrow(ctx);
  40. }
  41. b->cap = size;
  42. b->len = 0;
  43. b->unused_bits = 0;
  44. return b;
  45. }
  46. fz_buffer *
  47. fz_new_buffer_from_data(fz_context *ctx, unsigned char *data, size_t size)
  48. {
  49. fz_buffer *b = NULL;
  50. fz_try(ctx)
  51. {
  52. b = fz_malloc_struct(ctx, fz_buffer);
  53. b->refs = 1;
  54. b->data = data;
  55. b->cap = size;
  56. b->len = size;
  57. b->unused_bits = 0;
  58. }
  59. fz_catch(ctx)
  60. {
  61. fz_free(ctx, data);
  62. fz_rethrow(ctx);
  63. }
  64. return b;
  65. }
  66. fz_buffer *
  67. fz_new_buffer_from_shared_data(fz_context *ctx, const unsigned char *data, size_t size)
  68. {
  69. fz_buffer *b;
  70. b = fz_malloc_struct(ctx, fz_buffer);
  71. b->refs = 1;
  72. b->data = (unsigned char *)data; /* cast away const */
  73. b->cap = size;
  74. b->len = size;
  75. b->unused_bits = 0;
  76. b->shared = 1;
  77. return b;
  78. }
  79. fz_buffer *
  80. fz_new_buffer_from_copied_data(fz_context *ctx, const unsigned char *data, size_t size)
  81. {
  82. fz_buffer *b;
  83. if (size > 0 && data == NULL)
  84. fz_throw(ctx, FZ_ERROR_ARGUMENT, "no data provided");
  85. b = fz_new_buffer(ctx, size);
  86. b->len = size;
  87. memcpy(b->data, data, size);
  88. return b;
  89. }
  90. fz_buffer *fz_clone_buffer(fz_context *ctx, fz_buffer *buf)
  91. {
  92. return fz_new_buffer_from_copied_data(ctx, buf ? buf->data : NULL, buf ? buf->len : 0);
  93. }
  94. static inline int iswhite(int a)
  95. {
  96. switch (a) {
  97. case '\n': case '\r': case '\t': case ' ':
  98. case '\f':
  99. return 1;
  100. }
  101. return 0;
  102. }
  103. fz_buffer *
  104. fz_new_buffer_from_base64(fz_context *ctx, const char *data, size_t size)
  105. {
  106. fz_buffer *out = fz_new_buffer(ctx, size > 0 ? size : strlen(data));
  107. const char *end = data + (size > 0 ? size : strlen(data));
  108. const char *s = data;
  109. uint32_t buf = 0;
  110. int bits = 0;
  111. /* This is https://infra.spec.whatwg.org/#forgiving-base64-decode
  112. * but even more relaxed. We allow any number of trailing '=' code
  113. * points and instead of returning failure on invalid characters, we
  114. * warn and truncate.
  115. */
  116. while (s < end && iswhite(*s))
  117. s++;
  118. while (s < end && iswhite(end[-1]))
  119. end--;
  120. while (s < end && end[-1] == '=')
  121. end--;
  122. fz_try(ctx)
  123. {
  124. while (s < end)
  125. {
  126. int c = *s++;
  127. if (c >= 'A' && c <= 'Z')
  128. c = c - 'A';
  129. else if (c >= 'a' && c <= 'z')
  130. c = c - 'a' + 26;
  131. else if (c >= '0' && c <= '9')
  132. c = c - '0' + 52;
  133. else if (c == '+')
  134. c = 62;
  135. else if (c == '/')
  136. c = 63;
  137. else if (iswhite(c))
  138. continue;
  139. else
  140. {
  141. fz_warn(ctx, "invalid character in base64");
  142. break;
  143. }
  144. buf <<= 6;
  145. buf |= c & 0x3f;
  146. bits += 6;
  147. if (bits == 24)
  148. {
  149. fz_append_byte(ctx, out, buf >> 16);
  150. fz_append_byte(ctx, out, buf >> 8);
  151. fz_append_byte(ctx, out, buf >> 0);
  152. bits = 0;
  153. }
  154. }
  155. if (bits == 18)
  156. {
  157. fz_append_byte(ctx, out, buf >> 10);
  158. fz_append_byte(ctx, out, buf >> 2);
  159. }
  160. else if (bits == 12)
  161. {
  162. fz_append_byte(ctx, out, buf >> 4);
  163. }
  164. }
  165. fz_catch(ctx)
  166. {
  167. fz_drop_buffer(ctx, out);
  168. fz_rethrow(ctx);
  169. }
  170. return out;
  171. }
  172. fz_buffer *
  173. fz_keep_buffer(fz_context *ctx, fz_buffer *buf)
  174. {
  175. return fz_keep_imp(ctx, buf, &buf->refs);
  176. }
  177. void
  178. fz_drop_buffer(fz_context *ctx, fz_buffer *buf)
  179. {
  180. if (fz_drop_imp(ctx, buf, &buf->refs))
  181. {
  182. if (!buf->shared)
  183. fz_free(ctx, buf->data);
  184. fz_free(ctx, buf);
  185. }
  186. }
  187. void
  188. fz_resize_buffer(fz_context *ctx, fz_buffer *buf, size_t size)
  189. {
  190. if (buf->shared)
  191. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot resize a buffer with shared storage");
  192. buf->data = fz_realloc(ctx, buf->data, size);
  193. buf->cap = size;
  194. if (buf->len > buf->cap)
  195. buf->len = buf->cap;
  196. }
  197. void
  198. fz_grow_buffer(fz_context *ctx, fz_buffer *buf)
  199. {
  200. size_t newsize = (buf->cap * 3) / 2;
  201. if (newsize == 0)
  202. newsize = 256;
  203. fz_resize_buffer(ctx, buf, newsize);
  204. }
  205. static void
  206. fz_ensure_buffer(fz_context *ctx, fz_buffer *buf, size_t min)
  207. {
  208. size_t newsize = buf->cap;
  209. if (newsize < 16)
  210. newsize = 16;
  211. while (newsize < min)
  212. {
  213. newsize = (newsize * 3) / 2;
  214. }
  215. fz_resize_buffer(ctx, buf, newsize);
  216. }
  217. void
  218. fz_trim_buffer(fz_context *ctx, fz_buffer *buf)
  219. {
  220. if (buf->cap > buf->len+1)
  221. fz_resize_buffer(ctx, buf, buf->len);
  222. }
  223. void
  224. fz_clear_buffer(fz_context *ctx, fz_buffer *buf)
  225. {
  226. buf->len = 0;
  227. }
  228. void
  229. fz_terminate_buffer(fz_context *ctx, fz_buffer *buf)
  230. {
  231. /* ensure that there is a zero-byte after the end of the data */
  232. if (buf->len + 1 > buf->cap)
  233. fz_grow_buffer(ctx, buf);
  234. buf->data[buf->len] = 0;
  235. }
  236. size_t
  237. fz_buffer_storage(fz_context *ctx, fz_buffer *buf, unsigned char **datap)
  238. {
  239. if (datap)
  240. *datap = (buf ? buf->data : NULL);
  241. return (buf ? buf->len : 0);
  242. }
  243. const char *
  244. fz_string_from_buffer(fz_context *ctx, fz_buffer *buf)
  245. {
  246. if (!buf)
  247. return "";
  248. fz_terminate_buffer(ctx, buf);
  249. return (const char *)buf->data;
  250. }
  251. size_t
  252. fz_buffer_extract(fz_context *ctx, fz_buffer *buf, unsigned char **datap)
  253. {
  254. size_t len = buf ? buf->len : 0;
  255. *datap = (buf ? buf->data : NULL);
  256. if (buf)
  257. {
  258. buf->data = NULL;
  259. buf->len = 0;
  260. }
  261. return len;
  262. }
  263. fz_buffer *
  264. fz_slice_buffer(fz_context *ctx, fz_buffer *buf, int64_t start, int64_t end)
  265. {
  266. unsigned char *src = NULL;
  267. size_t size = fz_buffer_storage(ctx, buf, &src);
  268. size_t s, e;
  269. if (start < 0)
  270. start += size;
  271. if (end < 0)
  272. end += size;
  273. s = fz_clamp64(start, 0, size);
  274. e = fz_clamp64(end, 0, size);
  275. if (s == size || e <= s)
  276. return fz_new_buffer(ctx, 0);
  277. return fz_new_buffer_from_copied_data(ctx, &src[s], e - s);
  278. }
  279. void
  280. fz_append_buffer(fz_context *ctx, fz_buffer *buf, fz_buffer *extra)
  281. {
  282. if (buf->cap - buf->len < extra->len)
  283. {
  284. buf->data = fz_realloc(ctx, buf->data, buf->len + extra->len);
  285. buf->cap = buf->len + extra->len;
  286. }
  287. memcpy(buf->data + buf->len, extra->data, extra->len);
  288. buf->len += extra->len;
  289. }
  290. void
  291. fz_append_data(fz_context *ctx, fz_buffer *buf, const void *data, size_t len)
  292. {
  293. if (buf->len + len > buf->cap)
  294. fz_ensure_buffer(ctx, buf, buf->len + len);
  295. memcpy(buf->data + buf->len, data, len);
  296. buf->len += len;
  297. buf->unused_bits = 0;
  298. }
  299. void
  300. fz_append_string(fz_context *ctx, fz_buffer *buf, const char *data)
  301. {
  302. size_t len = strlen(data);
  303. if (buf->len + len > buf->cap)
  304. fz_ensure_buffer(ctx, buf, buf->len + len);
  305. memcpy(buf->data + buf->len, data, len);
  306. buf->len += len;
  307. buf->unused_bits = 0;
  308. }
  309. void
  310. fz_append_byte(fz_context *ctx, fz_buffer *buf, int val)
  311. {
  312. if (buf->len + 1 > buf->cap)
  313. fz_grow_buffer(ctx, buf);
  314. buf->data[buf->len++] = val;
  315. buf->unused_bits = 0;
  316. }
  317. void
  318. fz_append_rune(fz_context *ctx, fz_buffer *buf, int c)
  319. {
  320. char data[10];
  321. int len = fz_runetochar(data, c);
  322. if (buf->len + len > buf->cap)
  323. fz_ensure_buffer(ctx, buf, buf->len + len);
  324. memcpy(buf->data + buf->len, data, len);
  325. buf->len += len;
  326. buf->unused_bits = 0;
  327. }
  328. void
  329. fz_append_int32_be(fz_context *ctx, fz_buffer *buf, int x)
  330. {
  331. fz_append_byte(ctx, buf, (x >> 24) & 0xFF);
  332. fz_append_byte(ctx, buf, (x >> 16) & 0xFF);
  333. fz_append_byte(ctx, buf, (x >> 8) & 0xFF);
  334. fz_append_byte(ctx, buf, (x) & 0xFF);
  335. }
  336. void
  337. fz_append_int16_be(fz_context *ctx, fz_buffer *buf, int x)
  338. {
  339. fz_append_byte(ctx, buf, (x >> 8) & 0xFF);
  340. fz_append_byte(ctx, buf, (x) & 0xFF);
  341. }
  342. void
  343. fz_append_int32_le(fz_context *ctx, fz_buffer *buf, int x)
  344. {
  345. fz_append_byte(ctx, buf, (x)&0xFF);
  346. fz_append_byte(ctx, buf, (x>>8)&0xFF);
  347. fz_append_byte(ctx, buf, (x>>16)&0xFF);
  348. fz_append_byte(ctx, buf, (x>>24)&0xFF);
  349. }
  350. void
  351. fz_append_int16_le(fz_context *ctx, fz_buffer *buf, int x)
  352. {
  353. fz_append_byte(ctx, buf, (x)&0xFF);
  354. fz_append_byte(ctx, buf, (x>>8)&0xFF);
  355. }
  356. void
  357. fz_append_bits(fz_context *ctx, fz_buffer *buf, int val, int bits)
  358. {
  359. int shift;
  360. /* Throughout this code, the invariant is that we need to write the
  361. * bottom 'bits' bits of 'val' into the stream. On entry we assume
  362. * that val & ((1<<bits)-1) == val, but we do not rely on this after
  363. * having written the first partial byte. */
  364. if (bits == 0)
  365. return;
  366. /* buf->len always covers all the bits in the buffer, including
  367. * any unused ones in the last byte, which will always be 0.
  368. * buf->unused_bits = the number of unused bits in the last byte.
  369. */
  370. /* Find the amount we need to shift val up by so that it will be in
  371. * the correct position to be inserted into any existing data byte. */
  372. shift = (buf->unused_bits - bits);
  373. /* Extend the buffer as required before we start; that way we never
  374. * fail part way during writing. If shift < 0, then we'll need -shift
  375. * more bits. */
  376. if (shift < 0)
  377. {
  378. int extra = (7-shift)>>3; /* Round up to bytes */
  379. fz_ensure_buffer(ctx, buf, buf->len + extra);
  380. }
  381. /* Write any bits that will fit into the existing byte */
  382. if (buf->unused_bits)
  383. {
  384. buf->data[buf->len-1] |= (shift >= 0 ? (((unsigned int)val)<<shift) : (((unsigned int)val)>>-shift));
  385. if (shift >= 0)
  386. {
  387. /* If we were shifting up, we're done. */
  388. buf->unused_bits -= bits;
  389. return;
  390. }
  391. /* The number of bits left to write is the number that didn't
  392. * fit in this first byte. */
  393. bits = -shift;
  394. }
  395. /* Write any whole bytes */
  396. while (bits >= 8)
  397. {
  398. bits -= 8;
  399. buf->data[buf->len++] = val>>bits;
  400. }
  401. /* Write trailing bits (with 0's in unused bits) */
  402. if (bits > 0)
  403. {
  404. bits = 8-bits;
  405. buf->data[buf->len++] = val<<bits;
  406. }
  407. buf->unused_bits = bits;
  408. }
  409. void
  410. fz_append_bits_pad(fz_context *ctx, fz_buffer *buf)
  411. {
  412. buf->unused_bits = 0;
  413. }
  414. static void fz_append_emit(fz_context *ctx, void *buffer, int c)
  415. {
  416. fz_append_byte(ctx, buffer, c);
  417. }
  418. void
  419. fz_append_printf(fz_context *ctx, fz_buffer *buffer, const char *fmt, ...)
  420. {
  421. va_list args;
  422. va_start(args, fmt);
  423. fz_format_string(ctx, buffer, fz_append_emit, fmt, args);
  424. va_end(args);
  425. }
  426. void
  427. fz_append_vprintf(fz_context *ctx, fz_buffer *buffer, const char *fmt, va_list args)
  428. {
  429. fz_format_string(ctx, buffer, fz_append_emit, fmt, args);
  430. }
  431. void
  432. fz_append_pdf_string(fz_context *ctx, fz_buffer *buffer, const char *text)
  433. {
  434. size_t len = 2;
  435. const char *s = text;
  436. char *d;
  437. char c;
  438. while ((c = *s++) != 0)
  439. {
  440. switch (c)
  441. {
  442. case '\n':
  443. case '\r':
  444. case '\t':
  445. case '\b':
  446. case '\f':
  447. case '(':
  448. case ')':
  449. case '\\':
  450. len++;
  451. break;
  452. }
  453. len++;
  454. }
  455. while(buffer->cap - buffer->len < len)
  456. fz_grow_buffer(ctx, buffer);
  457. s = text;
  458. d = (char *)buffer->data + buffer->len;
  459. *d++ = '(';
  460. while ((c = *s++) != 0)
  461. {
  462. switch (c)
  463. {
  464. case '\n':
  465. *d++ = '\\';
  466. *d++ = 'n';
  467. break;
  468. case '\r':
  469. *d++ = '\\';
  470. *d++ = 'r';
  471. break;
  472. case '\t':
  473. *d++ = '\\';
  474. *d++ = 't';
  475. break;
  476. case '\b':
  477. *d++ = '\\';
  478. *d++ = 'b';
  479. break;
  480. case '\f':
  481. *d++ = '\\';
  482. *d++ = 'f';
  483. break;
  484. case '(':
  485. *d++ = '\\';
  486. *d++ = '(';
  487. break;
  488. case ')':
  489. *d++ = '\\';
  490. *d++ = ')';
  491. break;
  492. case '\\':
  493. *d++ = '\\';
  494. *d++ = '\\';
  495. break;
  496. default:
  497. *d++ = c;
  498. }
  499. }
  500. *d = ')';
  501. buffer->len += len;
  502. }
  503. void
  504. fz_md5_buffer(fz_context *ctx, fz_buffer *buffer, unsigned char digest[16])
  505. {
  506. fz_md5 state;
  507. fz_md5_init(&state);
  508. if (buffer)
  509. fz_md5_update(&state, buffer->data, buffer->len);
  510. fz_md5_final(&state, digest);
  511. }
  512. #ifdef TEST_BUFFER_WRITE
  513. #define TEST_LEN 1024
  514. void
  515. fz_test_buffer_write(fz_context *ctx)
  516. {
  517. fz_buffer *master = fz_new_buffer(ctx, TEST_LEN);
  518. fz_buffer *copy = fz_new_buffer(ctx, TEST_LEN);
  519. fz_stream *stm;
  520. int i, j, k;
  521. /* Make us a dummy buffer */
  522. for (i = 0; i < TEST_LEN; i++)
  523. {
  524. master->data[i] = rand();
  525. }
  526. master->len = TEST_LEN;
  527. /* Now copy that buffer several times, checking it for validity */
  528. stm = fz_open_buffer(ctx, master);
  529. for (i = 0; i < 256; i++)
  530. {
  531. memset(copy->data, i, TEST_LEN);
  532. copy->len = 0;
  533. j = TEST_LEN * 8;
  534. do
  535. {
  536. k = (rand() & 31)+1;
  537. if (k > j)
  538. k = j;
  539. fz_append_bits(ctx, copy, fz_read_bits(ctx, stm, k), k);
  540. j -= k;
  541. }
  542. while (j);
  543. if (memcmp(copy->data, master->data, TEST_LEN) != 0)
  544. fprintf(stderr, "Copied buffer is different!\n");
  545. fz_seek(stm, 0, 0);
  546. }
  547. fz_drop_stream(stm);
  548. fz_drop_buffer(ctx, master);
  549. fz_drop_buffer(ctx, copy);
  550. }
  551. #endif