| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- // Copyright (C) 2004-2021 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include <string.h>
- #define MIN_BOMB (100 << 20)
- size_t
- fz_read(fz_context *ctx, fz_stream *stm, unsigned char *buf, size_t len)
- {
- size_t count, n;
- count = 0;
- do
- {
- n = fz_available(ctx, stm, len);
- if (n > len)
- n = len;
- if (n == 0)
- break;
- memcpy(buf, stm->rp, n);
- stm->rp += n;
- buf += n;
- count += n;
- len -= n;
- }
- while (len > 0);
- return count;
- }
- static unsigned char skip_buf[4096];
- size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len)
- {
- size_t count, l, total = 0;
- while (len)
- {
- l = len;
- if (l > sizeof(skip_buf))
- l = sizeof(skip_buf);
- count = fz_read(ctx, stm, skip_buf, l);
- total += count;
- if (count < l)
- break;
- len -= count;
- }
- return total;
- }
- fz_buffer *
- fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial)
- {
- return fz_read_best(ctx, stm, initial, NULL, 0);
- }
- fz_buffer *
- fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case)
- {
- fz_buffer *buf = NULL;
- int check_bomb = (initial > 0);
- size_t n;
- fz_var(buf);
- if (truncated)
- *truncated = 0;
- if (worst_case == 0)
- worst_case = initial * 200;
- if (worst_case < MIN_BOMB)
- worst_case = MIN_BOMB;
- fz_try(ctx)
- {
- if (initial < 1024)
- initial = 1024;
- buf = fz_new_buffer(ctx, initial+1);
- while (1)
- {
- if (buf->len == buf->cap)
- fz_grow_buffer(ctx, buf);
- if (check_bomb && buf->len > worst_case)
- fz_throw(ctx, FZ_ERROR_FORMAT, "compression bomb detected");
- n = fz_read(ctx, stm, buf->data + buf->len, buf->cap - buf->len);
- if (n == 0)
- break;
- buf->len += n;
- }
- }
- fz_catch(ctx)
- {
- if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
- {
- fz_drop_buffer(ctx, buf);
- fz_rethrow(ctx);
- }
- if (truncated)
- {
- *truncated = 1;
- fz_report_error(ctx);
- }
- else
- {
- fz_drop_buffer(ctx, buf);
- fz_rethrow(ctx);
- }
- }
- return buf;
- }
- char *
- fz_read_line(fz_context *ctx, fz_stream *stm, char *mem, size_t n)
- {
- char *s = mem;
- int c = EOF;
- while (n > 1)
- {
- c = fz_read_byte(ctx, stm);
- if (c == EOF)
- break;
- if (c == '\r') {
- c = fz_peek_byte(ctx, stm);
- if (c == '\n')
- fz_read_byte(ctx, stm);
- break;
- }
- if (c == '\n')
- break;
- *s++ = c;
- n--;
- }
- if (n)
- *s = '\0';
- return (s == mem && c == EOF) ? NULL : mem;
- }
- int64_t
- fz_tell(fz_context *ctx, fz_stream *stm)
- {
- return stm->pos - (stm->wp - stm->rp);
- }
- void
- fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
- {
- stm->avail = 0; /* Reset bit reading */
- if (stm->seek)
- {
- if (whence == 1)
- {
- offset += fz_tell(ctx, stm);
- whence = 0;
- }
- stm->seek(ctx, stm, offset, whence);
- stm->eof = 0;
- }
- else if (whence != 2)
- {
- if (whence == 0)
- offset -= fz_tell(ctx, stm);
- if (offset < 0)
- fz_warn(ctx, "cannot seek backwards");
- /* dog slow, but rare enough */
- while (offset-- > 0)
- {
- if (fz_read_byte(ctx, stm) == EOF)
- {
- fz_warn(ctx, "seek failed");
- break;
- }
- }
- }
- else
- fz_warn(ctx, "cannot seek");
- }
- fz_buffer *
- fz_read_file(fz_context *ctx, const char *filename)
- {
- fz_stream *stm;
- fz_buffer *buf = NULL;
- fz_var(buf);
- stm = fz_open_file(ctx, filename);
- fz_try(ctx)
- {
- buf = fz_read_all(ctx, stm, 0);
- }
- fz_always(ctx)
- {
- fz_drop_stream(ctx, stm);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return buf;
- }
- fz_buffer *
- fz_try_read_file(fz_context *ctx, const char *filename)
- {
- fz_stream *stm;
- fz_buffer *buf = NULL;
- fz_var(buf);
- stm = fz_try_open_file(ctx, filename);
- if (stm == NULL)
- return NULL;
- fz_try(ctx)
- {
- buf = fz_read_all(ctx, stm, 0);
- }
- fz_always(ctx)
- {
- fz_drop_stream(ctx, stm);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return buf;
- }
- uint16_t fz_read_uint16(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
- return ((uint16_t)a<<8) | ((uint16_t)b);
- }
- uint32_t fz_read_uint24(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
- return ((uint32_t)a<<16) | ((uint32_t)b<<8) | ((uint32_t)c);
- }
- uint32_t fz_read_uint32(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- int d = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF || d == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
- return ((uint32_t)a<<24) | ((uint32_t)b<<16) | ((uint32_t)c<<8) | ((uint32_t)d);
- }
- uint64_t fz_read_uint64(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- int d = fz_read_byte(ctx, stm);
- int e = fz_read_byte(ctx, stm);
- int f = fz_read_byte(ctx, stm);
- int g = fz_read_byte(ctx, stm);
- int h = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
- return ((uint64_t)a<<56) | ((uint64_t)b<<48) | ((uint64_t)c<<40) | ((uint64_t)d<<32)
- | ((uint64_t)e<<24) | ((uint64_t)f<<16) | ((uint64_t)g<<8) | ((uint64_t)h);
- }
- uint16_t fz_read_uint16_le(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int16");
- return ((uint16_t)a) | ((uint16_t)b<<8);
- }
- uint32_t fz_read_uint24_le(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int24");
- return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16);
- }
- uint32_t fz_read_uint32_le(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- int d = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF || d == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int32");
- return ((uint32_t)a) | ((uint32_t)b<<8) | ((uint32_t)c<<16) | ((uint32_t)d<<24);
- }
- uint64_t fz_read_uint64_le(fz_context *ctx, fz_stream *stm)
- {
- int a = fz_read_byte(ctx, stm);
- int b = fz_read_byte(ctx, stm);
- int c = fz_read_byte(ctx, stm);
- int d = fz_read_byte(ctx, stm);
- int e = fz_read_byte(ctx, stm);
- int f = fz_read_byte(ctx, stm);
- int g = fz_read_byte(ctx, stm);
- int h = fz_read_byte(ctx, stm);
- if (a == EOF || b == EOF || c == EOF || d == EOF || e == EOF || f == EOF || g == EOF || h == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of file in int64");
- return ((uint64_t)a) | ((uint64_t)b<<8) | ((uint64_t)c<<16) | ((uint64_t)d<<24)
- | ((uint64_t)e<<32) | ((uint64_t)f<<40) | ((uint64_t)g<<48) | ((uint64_t)h<<56);
- }
- int16_t fz_read_int16(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16(ctx, stm); }
- int32_t fz_read_int32(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32(ctx, stm); }
- int64_t fz_read_int64(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64(ctx, stm); }
- int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm) { return (int16_t)fz_read_uint16_le(ctx, stm); }
- int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm) { return (int32_t)fz_read_uint32_le(ctx, stm); }
- int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm) { return (int64_t)fz_read_uint64_le(ctx, stm); }
- float
- fz_read_float_le(fz_context *ctx, fz_stream *stm)
- {
- union {float f;int32_t i;} u;
- u.i = fz_read_int32_le(ctx, stm);
- return u.f;
- }
- float
- fz_read_float(fz_context *ctx, fz_stream *stm)
- {
- union {float f;int32_t i;} u;
- u.i = fz_read_int32(ctx, stm);
- return u.f;
- }
- void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len)
- {
- int c;
- do
- {
- if (len <= 0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Buffer overrun reading null terminated string");
- c = fz_read_byte(ctx, stm);
- if (c == EOF)
- fz_throw(ctx, FZ_ERROR_FORMAT, "EOF reading null terminated string");
- *buffer++ = c;
- len--;
- }
- while (c != 0);
- }
- int fz_read_rune(fz_context *ctx, fz_stream *in)
- {
- uint8_t d, e, f;
- int x;
- int c = fz_read_byte(ctx, in);
- if (c == EOF)
- return EOF;
- if ((c & 0xF8) == 0xF0)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- d = (uint8_t)x;
- c = (c & 7)<<18;
- if ((d & 0xC0) == 0x80)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- e = (uint8_t)x;
- c += (d & 0x3f)<<12;
- if ((e & 0xC0) == 0x80)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- f = (uint8_t)x;
- c += (e & 0x3f)<<6;
- if ((f & 0xC0) == 0x80)
- {
- c += f & 0x3f;
- }
- else
- goto bad_byte;
- }
- else
- goto bad_byte;
- }
- else
- goto bad_byte;
- }
- else if ((c & 0xF0) == 0xE0)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- d = (uint8_t)x;
- c = (c & 15)<<12;
- if ((d & 0xC0) == 0x80)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- e = (uint8_t)x;
- c += (d & 0x3f)<<6;
- if ((e & 0xC0) == 0x80)
- {
- c += e & 0x3f;
- }
- else
- goto bad_byte;
- }
- else
- goto bad_byte;
- }
- else if ((c & 0xE0) == 0xC0)
- {
- x = fz_read_byte(ctx, in);
- if (x == EOF)
- return 0xFFFD;
- d = (uint8_t)x;
- c = (c & 31)<<6;
- if ((d & 0xC0) == 0x80)
- {
- c += d & 0x3f;
- }
- else
- fz_unread_byte(ctx, in);
- }
- else if ((c & 0xc0) == 0x80)
- {
- bad_byte:
- fz_unread_byte(ctx, in);
- return 0xFFFD;
- }
- return c;
- }
- int fz_read_utf16_le(fz_context *ctx, fz_stream *stm)
- {
- int c = fz_read_byte(ctx, stm);
- int d, e;
- if (c == EOF)
- return EOF;
- d = fz_read_byte(ctx, stm);
- if (d == EOF)
- return c; /* Might be wrong, but the best we can do. */
- c |= d<<8;
- /* If it's not a surrogate, we're done. */
- if (c < 0xd800 || c >= 0xe000)
- return c;
- /* It *ought* to be a leading (high) surrogate. If it's not,
- * then we're in trouble. */
- if (c >= 0xdc00)
- return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
- /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
- * 2 to tell if it was a low surrogate. Just assume it is. */
- d = fz_read_byte(ctx, stm);
- if (d == EOF)
- {
- /* Failure! Imagine the trailing surrogate was 0. */
- return 0x10000 + ((c - 0xd800)<<10);
- }
- e = fz_read_byte(ctx, stm);
- if (e == EOF)
- {
- e = 0xDC; /* Fudge a low surrogate */
- }
- d |= e<<8;
- if (d < 0xdc00 || d >= 0xe000)
- {
- /* Bad encoding! This is nasty, because we've eaten 2 bytes from the
- * stream which ideally we would not have. Serves you right for
- * having a broken stream. */
- return 0x10000 + ((c - 0xd800)<<10); /* Imagine the high surrogate was 0. */
- }
- c -= 0xd800;
- d -= 0xdc00;
- return 0x10000 + (c<<10) + d;
- }
- int fz_read_utf16_be(fz_context *ctx, fz_stream *stm)
- {
- int c = fz_read_byte(ctx, stm);
- int d, e;
- if (c == EOF)
- return EOF;
- d = fz_read_byte(ctx, stm);
- if (d == EOF)
- return c; /* Might be wrong, but the best we can do. */
- c = (c<<8) | d;
- /* If it's not a surrogate, we're done. */
- if (c < 0xd800 || c >= 0xe000)
- return c;
- /* It *ought* to be a leading (high) surrogate. If it's not,
- * then we're in trouble. */
- if (c >= 0xdc00)
- return 0x10000 + c - 0xdc00; /* Imagine the high surrogate was 0. */
- /* Our stream abstraction only enables us to peek 1 byte ahead, and we'd need
- * 2 to tell if it was a low surrogate. Just assume it is. */
- d = fz_read_byte(ctx, stm);
- if (d == EOF)
- {
- /* Failure! Imagine the trailing surrogate was 0. */
- return 0x10000 + ((c - 0xd800)<<10);
- }
- /* The next byte ought to be the start of a trailing (low) surrogate. */
- if (d < 0xdc || d >= 0xe0)
- {
- /* It wasn't. Put the byte back. */
- fz_unread_byte(ctx, stm);
- d = 0xdc00; /* Pretend it was a 0 surrogate. */
- }
- else
- {
- e = fz_read_byte(ctx, stm);
- if (e == EOF)
- {
- e = 0;
- }
- d = (d<<8) | e;
- }
- c -= 0xd800;
- d -= 0xdc00;
- return 0x10000 + (c<<10) + d;
- }
|