stream-read.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #define MIN_BOMB (100 << 20)
  25. size_t
  26. fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len)
  27. {
  28. size_t count, n;
  29. count = 0;
  30. do
  31. {
  32. n = fz_available(ctx, stm, len);
  33. if (n > len)
  34. n = len;
  35. if (n == 0)
  36. break;
  37. memcpy(buf, stm->rp, n);
  38. stm->rp += n;
  39. buf += n;
  40. count += n;
  41. len -= n;
  42. }
  43. while (len > 0);
  44. return count;
  45. }
  46. static unsigned char skip_buf[4096];
  47. size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len)
  48. {
  49. size_t count, l, total = 0;
  50. while (len)
  51. {
  52. l = len;
  53. if (l > sizeof(skip_buf))
  54. l = sizeof(skip_buf);
  55. count = fz_read(ctx, stm, skip_buf, l);
  56. total += count;
  57. if (count < l)
  58. break;
  59. len -= count;
  60. }
  61. return total;
  62. }
  63. fz_buffer *
  64. fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial)
  65. {
  66. return fz_read_best(ctx, stm, initial, NULL, 0);
  67. }
  68. fz_buffer *
  69. fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case)
  70. {
  71. fz_buffer *buf = NULL;
  72. int check_bomb = (initial > 0);
  73. size_t n;
  74. fz_var(buf);
  75. if (truncated)
  76. *truncated = 0;
  77. if (worst_case == 0)
  78. worst_case = initial * 200;
  79. if (worst_case < MIN_BOMB)
  80. worst_case = MIN_BOMB;
  81. fz_try(ctx)
  82. {
  83. if (initial < 1024)
  84. initial = 1024;
  85. buf = fz_new_buffer(ctx, initial+1);
  86. while (1)
  87. {
  88. if (buf->len == buf->cap)
  89. fz_grow_buffer(ctx, buf);
  90. if (check_bomb && buf->len > worst_case)
  91. fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected");
  92. n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len);
  93. if (n == 0)
  94. break;
  95. buf->len += n;
  96. }
  97. }
  98. fz_catch(ctx)
  99. {
  100. if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
  101. {
  102. fz_drop_buffer(ctx, buf);
  103. fz_rethrow(ctx);
  104. }
  105. if (truncated)
  106. {
  107. *truncated = 1;
  108. fz_report_error(ctx);
  109. }
  110. else
  111. {
  112. fz_drop_buffer(ctx, buf);
  113. fz_rethrow(ctx);
  114. }
  115. }
  116. return buf;
  117. }
  118. char *
  119. fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n)
  120. {
  121. char *s = mem;
  122. int c = EOF;
  123. while (n > 1)
  124. {
  125. c = fz_read_byte(ctx, stm);
  126. if (c == EOF)
  127. break;
  128. if (c == '\r') {
  129. c = fz_peek_byte(ctx, stm);
  130. if (c == '\n')
  131. fz_read_byte(ctx, stm);
  132. break;
  133. }
  134. if (c == '\n')
  135. break;
  136. *s++ = c;
  137. n--;
  138. }
  139. if (n)
  140. *s = '\0';
  141. return (s == mem && c == EOF) ? NULL : mem;
  142. }
  143. int64_t
  144. fz_tell(fz_context *ctx, fz_stream *stm)
  145. {
  146. return stm->pos - (stm->wp - stm->rp);
  147. }
  148. void
  149. fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
  150. {
  151. stm->avail = 0; /* Reset bit reading */
  152. if (stm->seek)
  153. {
  154. if (whence == 1)
  155. {
  156. offset += fz_tell(ctx, stm);
  157. whence = 0;
  158. }
  159. stm->seek(ctx, stm, offset, whence);
  160. stm->eof = 0;
  161. }
  162. else if (whence != 2)
  163. {
  164. if (whence == 0)
  165. offset -= fz_tell(ctx, stm);
  166. if (offset < 0)
  167. fz_warn(ctx, "cannot seek backwards");
  168. /* dog slow, but rare enough */
  169. while (offset-- > 0)
  170. {
  171. if (fz_read_byte(ctx, stm) == EOF)
  172. {
  173. fz_warn(ctx, "seek failed");
  174. break;
  175. }
  176. }
  177. }
  178. else
  179. fz_warn(ctx, "cannot seek");
  180. }
  181. fz_buffer *
  182. fz_read_file(fz_context *ctx, const char *filename)
  183. {
  184. fz_stream *stm;
  185. fz_buffer *buf = NULL;
  186. fz_var(buf);
  187. stm = fz_open_file(ctx, filename);
  188. fz_try(ctx)
  189. {
  190. buf = fz_read_all(ctx, stm, 0);
  191. }
  192. fz_always(ctx)
  193. {
  194. fz_drop_stream(ctx, stm);
  195. }
  196. fz_catch(ctx)
  197. {
  198. fz_rethrow(ctx);
  199. }
  200. return buf;
  201. }
  202. fz_buffer *
  203. fz_try_read_file(fz_context *ctx, const char *filename)
  204. {
  205. fz_stream *stm;
  206. fz_buffer *buf = NULL;
  207. fz_var(buf);
  208. stm = fz_try_open_file(ctx, filename);
  209. if (stm == NULL)
  210. return NULL;
  211. fz_try(ctx)
  212. {
  213. buf = fz_read_all(ctx, stm, 0);
  214. }
  215. fz_always(ctx)
  216. {
  217. fz_drop_stream(ctx, stm);
  218. }
  219. fz_catch(ctx)
  220. {
  221. fz_rethrow(ctx);
  222. }
  223. return buf;
  224. }
  225. uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm)
  226. {
  227. int a = fz_read_byte(ctx, stm);
  228. int b = fz_read_byte(ctx, stm);
  229. if (a == EOF || b == EOF)
  230. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
  231. return ((uint16_t)a<<8) | ((uint16_t)b);
  232. }
  233. uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm)
  234. {
  235. int a = fz_read_byte(ctx, stm);
  236. int b = fz_read_byte(ctx, stm);
  237. int c = fz_read_byte(ctx, stm);
  238. if (a == EOF || b == EOF || c == EOF)
  239. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
  240. return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c);
  241. }
  242. uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm)
  243. {
  244. int a = fz_read_byte(ctx, stm);
  245. int b = fz_read_byte(ctx, stm);
  246. int c = fz_read_byte(ctx, stm);
  247. int d = fz_read_byte(ctx, stm);
  248. if (a == EOF || b == EOF || c == EOF || d == EOF)
  249. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
  250. return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d);
  251. }
  252. uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm)
  253. {
  254. int a = fz_read_byte(ctx, stm);
  255. int b = fz_read_byte(ctx, stm);
  256. int c = fz_read_byte(ctx, stm);
  257. int d = fz_read_byte(ctx, stm);
  258. int e = fz_read_byte(ctx, stm);
  259. int f = fz_read_byte(ctx, stm);
  260. int g = fz_read_byte(ctx, stm);
  261. int h = fz_read_byte(ctx, stm);
  262. if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
  263. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
  264. return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32)
  265. | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h);
  266. }
  267. uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
  268. {
  269. int a = fz_read_byte(ctx, stm);
  270. int b = fz_read_byte(ctx, stm);
  271. if (a == EOF || b == EOF)
  272. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
  273. return ((uint16_t)a) | ((uint16_t)b<<8);
  274. }
  275. uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm)
  276. {
  277. int a = fz_read_byte(ctx, stm);
  278. int b = fz_read_byte(ctx, stm);
  279. int c = fz_read_byte(ctx, stm);
  280. if (a == EOF || b == EOF || c == EOF)
  281. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
  282. return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16);
  283. }
  284. uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
  285. {
  286. int a = fz_read_byte(ctx, stm);
  287. int b = fz_read_byte(ctx, stm);
  288. int c = fz_read_byte(ctx, stm);
  289. int d = fz_read_byte(ctx, stm);
  290. if (a == EOF || b == EOF || c == EOF || d == EOF)
  291. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
  292. return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24);
  293. }
  294. uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
  295. {
  296. int a = fz_read_byte(ctx, stm);
  297. int b = fz_read_byte(ctx, stm);
  298. int c = fz_read_byte(ctx, stm);
  299. int d = fz_read_byte(ctx, stm);
  300. int e = fz_read_byte(ctx, stm);
  301. int f = fz_read_byte(ctx, stm);
  302. int g = fz_read_byte(ctx, stm);
  303. int h = fz_read_byte(ctx, stm);
  304. if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
  305. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
  306. return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24)
  307. | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56);
  308. }
  309. int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); }
  310. int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); }
  311. int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); }
  312. int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); }
  313. int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); }
  314. int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); }
  315. float
  316. fz_read_float_le(fz_context *ctx, fz_stream *stm)
  317. {
  318. union {float f;int32_t i;} u;
  319. u.i = fz_read_int32_le(ctx, stm);
  320. return u.f;
  321. }
  322. float
  323. fz_read_float(fz_context *ctx, fz_stream *stm)
  324. {
  325. union {float f;int32_t i;} u;
  326. u.i = fz_read_int32(ctx, stm);
  327. return u.f;
  328. }
  329. void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
  330. {
  331. int c;
  332. do
  333. {
  334. if (len <= 0)
  335. fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string");
  336. c = fz_read_byte(ctx, stm);
  337. if (c == EOF)
  338. fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string");
  339. *buffer++ = c;
  340. len--;
  341. }
  342. while (c != 0);
  343. }
  344. int fz_read_rune(fz_context *ctx, fz_stream *in)
  345. {
  346. uint8_t d, e, f;
  347. int x;
  348. int c = fz_read_byte(ctx, in);
  349. if (c == EOF)
  350. return EOF;
  351. if ((c & 0xF8) == 0xF0)
  352. {
  353. x = fz_read_byte(ctx, in);
  354. if (x == EOF)
  355. return 0xFFFD;
  356. d = (uint8_t)x;
  357. c = (c & 7)<<18;
  358. if ((d & 0xC0) == 0x80)
  359. {
  360. x = fz_read_byte(ctx, in);
  361. if (x == EOF)
  362. return 0xFFFD;
  363. e = (uint8_t)x;
  364. c += (d & 0x3f)<<12;
  365. if ((e & 0xC0) == 0x80)
  366. {
  367. x = fz_read_byte(ctx, in);
  368. if (x == EOF)
  369. return 0xFFFD;
  370. f = (uint8_t)x;
  371. c += (e & 0x3f)<<6;
  372. if ((f & 0xC0) == 0x80)
  373. {
  374. c += f & 0x3f;
  375. }
  376. else
  377. goto bad_byte;
  378. }
  379. else
  380. goto bad_byte;
  381. }
  382. else
  383. goto bad_byte;
  384. }
  385. else if ((c & 0xF0) == 0xE0)
  386. {
  387. x = fz_read_byte(ctx, in);
  388. if (x == EOF)
  389. return 0xFFFD;
  390. d = (uint8_t)x;
  391. c = (c & 15)<<12;
  392. if ((d & 0xC0) == 0x80)
  393. {
  394. x = fz_read_byte(ctx, in);
  395. if (x == EOF)
  396. return 0xFFFD;
  397. e = (uint8_t)x;
  398. c += (d & 0x3f)<<6;
  399. if ((e & 0xC0) == 0x80)
  400. {
  401. c += e & 0x3f;
  402. }
  403. else
  404. goto bad_byte;
  405. }
  406. else
  407. goto bad_byte;
  408. }
  409. else if ((c & 0xE0) == 0xC0)
  410. {
  411. x = fz_read_byte(ctx, in);
  412. if (x == EOF)
  413. return 0xFFFD;
  414. d = (uint8_t)x;
  415. c = (c & 31)<<6;
  416. if ((d & 0xC0) == 0x80)
  417. {
  418. c += d & 0x3f;
  419. }
  420. else
  421. fz_unread_byte(ctx, in);
  422. }
  423. else if ((c & 0xc0) == 0x80)
  424. {
  425. bad_byte:
  426. fz_unread_byte(ctx, in);
  427. return 0xFFFD;
  428. }
  429. return c;
  430. }
  431. int fz_read_utf16_le(fz_context *ctx, fz_stream *stm)
  432. {
  433. int c = fz_read_byte(ctx, stm);
  434. int d, e;
  435. if (c == EOF)
  436. return EOF;
  437. d = fz_read_byte(ctx, stm);
  438. if (d == EOF)
  439. return c; /* Might be wrong, but the best we can do. */
  440. c |= d<<8;
  441. /* If it's not a surrogate, we're done. */
  442. if (c < 0xd800 || c >= 0xe000)
  443. return c;
  444. /* It *ought* to be a leading (high) surrogate. If it's not,
  445. * then we're in trouble. */
  446. if (c >= 0xdc00)
  447. return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
  448. /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
  449. * 2 to tell if it was a low surrogate. Just assume it is. */
  450. d = fz_read_byte(ctx, stm);
  451. if (d == EOF)
  452. {
  453. /* Failure! Imagine the trailing surrogate was 0. */
  454. return 0x10000 + ((c - 0xd800)<<10);
  455. }
  456. e = fz_read_byte(ctx, stm);
  457. if (e == EOF)
  458. {
  459. e = 0xDC; /* Fudge a low surrogate */
  460. }
  461. d |= e<<8;
  462. if (d < 0xdc00 || d >= 0xe000)
  463. {
  464. /* Bad encoding! This is nasty, because we've eaten 2 bytes from the
  465. * stream which ideally we would not have. Serves you right for
  466. * having a broken stream. */
  467. return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */
  468. }
  469. c -= 0xd800;
  470. d -= 0xdc00;
  471. return 0x10000 + (c<<10) + d;
  472. }
  473. int fz_read_utf16_be(fz_context *ctx, fz_stream *stm)
  474. {
  475. int c = fz_read_byte(ctx, stm);
  476. int d, e;
  477. if (c == EOF)
  478. return EOF;
  479. d = fz_read_byte(ctx, stm);
  480. if (d == EOF)
  481. return c; /* Might be wrong, but the best we can do. */
  482. c = (c<<8) | d;
  483. /* If it's not a surrogate, we're done. */
  484. if (c < 0xd800 || c >= 0xe000)
  485. return c;
  486. /* It *ought* to be a leading (high) surrogate. If it's not,
  487. * then we're in trouble. */
  488. if (c >= 0xdc00)
  489. return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
  490. /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
  491. * 2 to tell if it was a low surrogate. Just assume it is. */
  492. d = fz_read_byte(ctx, stm);
  493. if (d == EOF)
  494. {
  495. /* Failure! Imagine the trailing surrogate was 0. */
  496. return 0x10000 + ((c - 0xd800)<<10);
  497. }
  498. /* The next byte ought to be the start of a trailing (low) surrogate. */
  499. if (d < 0xdc || d >= 0xe0)
  500. {
  501. /* It wasn't. Put the byte back. */
  502. fz_unread_byte(ctx, stm);
  503. d = 0xdc00; /* Pretend it was a 0 surrogate. */
  504. }
  505. else
  506. {
  507. e = fz_read_byte(ctx, stm);
  508. if (e == EOF)
  509. {
  510. e = 0;
  511. }
  512. d = (d<<8) | e;
  513. }
  514. c -= 0xd800;
  515. d -= 0xdc00;
  516. return 0x10000 + (c<<10) + d;
  517. }