| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979 |
- // Copyright (C) 2004-2021 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "mupdf/pdf.h"
- #include <string.h>
- #include <time.h>
- #ifdef _WIN32
- #define timegm _mkgmtime
- #endif
- #define isdigit(c) (c >= '0' && c <= '9')
- fz_rect
- pdf_to_rect(fz_context *ctx, pdf_obj *array)
- {
- if (!pdf_is_array(ctx, array))
- return fz_empty_rect;
- else
- {
- float a = pdf_array_get_real(ctx, array, 0);
- float b = pdf_array_get_real(ctx, array, 1);
- float c = pdf_array_get_real(ctx, array, 2);
- float d = pdf_array_get_real(ctx, array, 3);
- fz_rect r;
- r.x0 = fz_min(a, c);
- r.y0 = fz_min(b, d);
- r.x1 = fz_max(a, c);
- r.y1 = fz_max(b, d);
- return r;
- }
- }
- fz_quad
- pdf_to_quad(fz_context *ctx, pdf_obj *array, int offset)
- {
- fz_quad q;
- q.ul.x = pdf_array_get_real(ctx, array, offset+0);
- q.ul.y = pdf_array_get_real(ctx, array, offset+1);
- q.ur.x = pdf_array_get_real(ctx, array, offset+2);
- q.ur.y = pdf_array_get_real(ctx, array, offset+3);
- q.ll.x = pdf_array_get_real(ctx, array, offset+4);
- q.ll.y = pdf_array_get_real(ctx, array, offset+5);
- q.lr.x = pdf_array_get_real(ctx, array, offset+6);
- q.lr.y = pdf_array_get_real(ctx, array, offset+7);
- return q;
- }
- fz_point
- pdf_to_point(fz_context *ctx, pdf_obj *array, int offset)
- {
- fz_point p;
- p.x = pdf_array_get_real(ctx, array, offset+0);
- p.y = pdf_array_get_real(ctx, array, offset+1);
- return p;
- }
- fz_matrix
- pdf_to_matrix(fz_context *ctx, pdf_obj *array)
- {
- if (!pdf_is_array(ctx, array))
- return fz_identity;
- else
- {
- fz_matrix m;
- m.a = pdf_array_get_real(ctx, array, 0);
- m.b = pdf_array_get_real(ctx, array, 1);
- m.c = pdf_array_get_real(ctx, array, 2);
- m.d = pdf_array_get_real(ctx, array, 3);
- m.e = pdf_array_get_real(ctx, array, 4);
- m.f = pdf_array_get_real(ctx, array, 5);
- return m;
- }
- }
- char *
- pdf_format_date(fz_context *ctx, int64_t time, char *s, size_t n)
- {
- time_t secs = time;
- #ifdef _POSIX_SOURCE
- struct tm tmbuf, *tm = gmtime_r(&secs, &tmbuf);
- #else
- struct tm *tm = gmtime(&secs);
- #endif
- if (time < 0 || !tm || !strftime(s, n, "D:%Y%m%d%H%M%SZ", tm))
- return NULL;
- return s;
- }
- int64_t
- pdf_parse_date(fz_context *ctx, const char *s)
- {
- int tz_sign, tz_hour, tz_min, tz_adj;
- struct tm tm;
- time_t utc;
- if (!s[0])
- return -1;
- memset(&tm, 0, sizeof tm);
- tm.tm_mday = 1;
- tz_sign = 1;
- tz_hour = 0;
- tz_min = 0;
- if (s[0] == 'D' && s[1] == ':')
- s += 2;
- if (!isdigit(s[0]) || !isdigit(s[1]) || !isdigit(s[2]) || !isdigit(s[3]))
- {
- fz_warn(ctx, "invalid date format (missing year)");
- return -1;
- }
- tm.tm_year = (s[0]-'0')*1000 + (s[1]-'0')*100 + (s[2]-'0')*10 + (s[3]-'0') - 1900;
- s += 4;
- if (tm.tm_year < 70)
- {
- fz_warn(ctx, "invalid date (year out of range)");
- return -1;
- }
- if (isdigit(s[0]) && isdigit(s[1]))
- {
- tm.tm_mon = (s[0]-'0')*10 + (s[1]-'0') - 1; /* month is 0-11 in struct tm */
- s += 2;
- if (isdigit(s[0]) && isdigit(s[1]))
- {
- tm.tm_mday = (s[0]-'0')*10 + (s[1]-'0');
- s += 2;
- if (isdigit(s[0]) && isdigit(s[1]))
- {
- tm.tm_hour = (s[0]-'0')*10 + (s[1]-'0');
- s += 2;
- if (isdigit(s[0]) && isdigit(s[1]))
- {
- tm.tm_min = (s[0]-'0')*10 + (s[1]-'0');
- s += 2;
- if (isdigit(s[0]) && isdigit(s[1]))
- {
- tm.tm_sec = (s[0]-'0')*10 + (s[1]-'0');
- s += 2;
- }
- }
- }
- }
- }
- if (tm.tm_sec > 60 || tm.tm_min > 59 || tm.tm_hour > 23 || tm.tm_mday > 31 || tm.tm_mon > 11)
- {
- fz_warn(ctx, "invalid date (a field is out of range)");
- return -1;
- }
- if (s[0] == 'Z')
- {
- if (s[1] == '0' && s[2] == '0')
- {
- s += 3;
- if (s[0] == '\'' && s[1] == '0' && s[2] == '0')
- {
- s += 3;
- if (s[0] == '\'')
- s += 1;
- }
- }
- else
- {
- s += 1;
- }
- }
- else if ((s[0] == '-' || s[0] == '+') && isdigit(s[1]) && isdigit(s[2]))
- {
- tz_sign = (s[0] == '-') ? -1 : 1;
- tz_hour = (s[1]-'0')*10 + (s[2]-'0');
- s += 3;
- if (s[0] == '\'' && isdigit(s[1]) && isdigit(s[2]))
- {
- tz_min = (s[1]-'0')*10 + (s[2]-'0');
- s += 3;
- if (s[0] == '\'')
- s += 1;
- }
- }
- /* PDF is based on ISO/IEC 8824 which limits time zones from -15 to +16. */
- if (tz_sign < 0 && (tz_hour > 15 || (tz_hour == 15 && tz_min > 0)))
- {
- fz_warn(ctx, "invalid date format (time zone out of range)");
- return -1;
- }
- if (tz_sign > 0 && (tz_hour > 16 || (tz_hour == 16 && tz_min > 0)))
- {
- fz_warn(ctx, "invalid date format (time zone out of range)");
- return -1;
- }
- if (s[0] != 0)
- fz_warn(ctx, "invalid date format (garbage at end)");
- utc = timegm(&tm);
- if (utc == (time_t)-1)
- {
- fz_warn(ctx, "date overflow error");
- return -1;
- }
- tz_adj = tz_sign * (tz_hour * 3600 + tz_min * 60);
- return utc - tz_adj;
- }
- int64_t
- pdf_to_date(fz_context *ctx, pdf_obj *time)
- {
- return pdf_parse_date(ctx, pdf_to_str_buf(ctx, time));
- }
- static int
- rune_from_utf16be(int *out, const unsigned char *s, const unsigned char *end)
- {
- if (s + 2 <= end)
- {
- int a = s[0] << 8 | s[1];
- if (a >= 0xD800 && a <= 0xDFFF && s + 4 <= end)
- {
- int b = s[2] << 8 | s[3];
- *out = ((a - 0xD800) << 10) + (b - 0xDC00) + 0x10000;
- return 4;
- }
- *out = a;
- return 2;
- }
- *out = FZ_REPLACEMENT_CHARACTER;
- return 1;
- }
- static int
- rune_from_utf16le(int *out, const unsigned char *s, const unsigned char *end)
- {
- if (s + 2 <= end)
- {
- int a = s[1] << 8 | s[0];
- if (a >= 0xD800 && a <= 0xDFFF && s + 4 <= end)
- {
- int b = s[3] << 8 | s[2];
- *out = ((a - 0xD800) << 10) + (b - 0xDC00) + 0x10000;
- return 4;
- }
- *out = a;
- return 2;
- }
- *out = FZ_REPLACEMENT_CHARACTER;
- return 1;
- }
- static size_t
- skip_language_code_utf16le(const unsigned char *s, size_t n, size_t i)
- {
- /* skip language escape codes */
- if (i + 6 <= n && s[i+1] == 0 && s[i+0] == 27 && s[i+5] == 0 && s[i+4] == 27)
- return 6;
- else if (i + 8 <= n && s[i+1] == 0 && s[i+0] == 27 && s[i+7] == 0 && s[i+6] == 27)
- return 8;
- return 0;
- }
- static size_t
- skip_language_code_utf16be(const unsigned char *s, size_t n, size_t i)
- {
- /* skip language escape codes */
- if (i + 6 <= n && s[i+0] == 0 && s[i+1] == 27 && s[i+4] == 0 && s[i+5] == 27)
- return 6;
- else if (i + 8 <= n && s[i+0] == 0 && s[i+1] == 27 && s[i+6] == 0 && s[i+7] == 27)
- return 8;
- return 0;
- }
- static size_t
- skip_language_code_utf8(const unsigned char *s, size_t n, size_t i)
- {
- /* skip language escape codes */
- if (i + 3 <= n && s[i] == 27 && s[i+3])
- return 3;
- else if (i + 5 <= n && s[i] == 27 && s[i+5] == 27)
- return 5;
- return 0;
- }
- static int
- is_valid_utf8(const unsigned char *s, const unsigned char *end)
- {
- for (; s < end; ++s)
- {
- int skip = *s < 0x80 ? 0 : *s < 0xC0 ? -1 : *s < 0xE0 ? 1 : *s < 0xF0 ? 2 : *s < 0xF5 ? 3 : -1;
- if (skip == -1)
- return 0;
- while (skip-- > 0)
- if (++s >= end || (*s & 0xC0) != 0x80)
- return 0;
- }
- return 1;
- }
- char *
- pdf_new_utf8_from_pdf_string(fz_context *ctx, const char *ssrcptr, size_t srclen)
- {
- const unsigned char *srcptr = (const unsigned char*)ssrcptr;
- char *dstptr, *dst;
- size_t dstlen = 0;
- int ucs;
- size_t i, n;
- /* UTF-16BE */
- if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255)
- {
- i = 2;
- while (i + 2 <= srclen)
- {
- n = skip_language_code_utf16be(srcptr, srclen, i);
- if (n)
- i += n;
- else
- {
- i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
- dstlen += fz_runelen(ucs);
- }
- }
- dstptr = dst = Memento_label(fz_malloc(ctx, dstlen + 1), "utf8_from_utf16be");
- i = 2;
- while (i + 2 <= srclen)
- {
- n = skip_language_code_utf16be(srcptr, srclen, i);
- if (n)
- i += n;
- else
- {
- i += rune_from_utf16be(&ucs, srcptr + i, srcptr + srclen);
- dstptr += fz_runetochar(dstptr, ucs);
- }
- }
- }
- /* UTF-16LE */
- else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254)
- {
- i = 2;
- while (i + 2 <= srclen)
- {
- n = skip_language_code_utf16le(srcptr, srclen, i);
- if (n)
- i += n;
- else
- {
- i += rune_from_utf16le(&ucs, srcptr + i, srcptr + srclen);
- dstlen += fz_runelen(ucs);
- }
- }
- dstptr = dst = Memento_label(fz_malloc(ctx, dstlen + 1), "utf8_from_utf16le");
- i = 2;
- while (i + 2 <= srclen)
- {
- n = skip_language_code_utf16le(srcptr, srclen, i);
- if (n)
- i += n;
- else
- {
- i += rune_from_utf16le(&ucs, srcptr + i, srcptr + srclen);
- dstptr += fz_runetochar(dstptr, ucs);
- }
- }
- }
- /* UTF-8 */
- else if (srclen >= 3 && srcptr[0] == 239 && srcptr[1] == 187 && srcptr[2] == 191)
- {
- i = 3;
- while (i < srclen)
- {
- n = skip_language_code_utf8(srcptr, srclen, i);
- if (n)
- i += n;
- else
- {
- i += 1;
- dstlen += 1;
- }
- }
- dstptr = dst = Memento_label(fz_malloc(ctx, dstlen + 1), "utf8_from_utf8");
- i = 3;
- while (i < srclen)
- {
- n = skip_language_code_utf8(srcptr, srclen, i);
- if (n)
- i += n;
- else
- *dstptr++ = srcptr[i++];
- }
- }
- /* Detect UTF-8 strings that aren't marked with a BOM */
- else if (is_valid_utf8(srcptr, srcptr + srclen))
- {
- dst = Memento_label(fz_malloc(ctx, srclen + 1), "utf8_from_guess");
- memcpy(dst, srcptr, srclen);
- dstptr = dst + srclen;
- }
- /* PDFDocEncoding */
- else
- {
- for (i = 0; i < srclen; i++)
- dstlen += fz_runelen(fz_unicode_from_pdf_doc_encoding[srcptr[i]]);
- dstptr = dst = Memento_label(fz_malloc(ctx, dstlen + 1), "utf8_from_pdfdocenc");
- for (i = 0; i < srclen; i++)
- {
- ucs = fz_unicode_from_pdf_doc_encoding[srcptr[i]];
- dstptr += fz_runetochar(dstptr, ucs);
- }
- }
- *dstptr = 0;
- return dst;
- }
- char *
- pdf_new_utf8_from_pdf_string_obj(fz_context *ctx, pdf_obj *src)
- {
- const char *srcptr;
- size_t srclen;
- srcptr = pdf_to_string(ctx, src, &srclen);
- return pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen);
- }
- char *
- pdf_new_utf8_from_pdf_stream_obj(fz_context *ctx, pdf_obj *src)
- {
- fz_buffer *stmbuf;
- char *srcptr;
- size_t srclen;
- char *dst = NULL;
- stmbuf = pdf_load_stream(ctx, src);
- srclen = fz_buffer_storage(ctx, stmbuf, (unsigned char **)&srcptr);
- fz_try(ctx)
- dst = pdf_new_utf8_from_pdf_string(ctx, srcptr, srclen);
- fz_always(ctx)
- fz_drop_buffer(ctx, stmbuf);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return dst;
- }
- char *
- pdf_load_stream_or_string_as_utf8(fz_context *ctx, pdf_obj *src)
- {
- if (pdf_is_stream(ctx, src))
- return pdf_new_utf8_from_pdf_stream_obj(ctx, src);
- return pdf_new_utf8_from_pdf_string_obj(ctx, src);
- }
- static pdf_obj *
- pdf_new_text_string_utf16be(fz_context *ctx, const char *s)
- {
- const char *ss;
- int c, i, n, a, b;
- unsigned char *p;
- pdf_obj *obj;
- ss = s;
- n = 0;
- while (*ss)
- {
- ss += fz_chartorune(&c, ss);
- n += (c >= 0x10000) ? 2 : 1;
- }
- p = fz_malloc(ctx, n * 2 + 2);
- i = 0;
- p[i++] = 254;
- p[i++] = 255;
- while (*s)
- {
- s += fz_chartorune(&c, s);
- if (c >= 0x10000)
- {
- a = (((c - 0x10000) >> 10) & 0x3ff) + 0xD800;
- p[i++] = (a>>8) & 0xff;
- p[i++] = (a) & 0xff;
- b = (((c - 0x10000)) & 0x3ff) + 0xDC00;
- p[i++] = (b>>8) & 0xff;
- p[i++] = (b) & 0xff;
- }
- else
- {
- p[i++] = (c>>8) & 0xff;
- p[i++] = (c) & 0xff;
- }
- }
- fz_try(ctx)
- obj = pdf_new_string(ctx, (char*)p, i);
- fz_always(ctx)
- fz_free(ctx, p);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return obj;
- }
- pdf_obj *
- pdf_new_text_string(fz_context *ctx, const char *s)
- {
- int i = 0;
- while (s[i] != 0)
- {
- if (((unsigned char)s[i]) >= 128)
- return pdf_new_text_string_utf16be(ctx, s);
- ++i;
- }
- return pdf_new_string(ctx, s, i);
- }
- pdf_obj *
- pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_obj *ary = NULL;
- pdf_obj *obj = NULL;
- int64_t a = 0, b = 0, n = 0;
- pdf_token tok;
- pdf_obj *op = NULL;
- fz_var(obj);
- ary = pdf_new_array(ctx, doc, 4);
- fz_try(ctx)
- {
- while (1)
- {
- tok = pdf_lex(ctx, file, buf);
- if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
- {
- if (n > 0)
- pdf_array_push_int(ctx, ary, a);
- if (n > 1)
- pdf_array_push_int(ctx, ary, b);
- n = 0;
- }
- if (tok == PDF_TOK_INT && n == 2)
- {
- pdf_array_push_int(ctx, ary, a);
- a = b;
- n --;
- }
- switch (tok)
- {
- case PDF_TOK_EOF:
- fz_throw(ctx, FZ_ERROR_SYNTAX, "array not closed before end of file");
- case PDF_TOK_CLOSE_ARRAY:
- op = ary;
- goto end;
- case PDF_TOK_INT:
- if (n == 0)
- a = buf->i;
- if (n == 1)
- b = buf->i;
- n ++;
- break;
- case PDF_TOK_R:
- if (n != 2)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse indirect reference in array");
- pdf_array_push_drop(ctx, ary, pdf_new_indirect(ctx, doc, a, b));
- n = 0;
- break;
- case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(ctx, doc, file, buf);
- pdf_array_push_drop(ctx, ary, obj);
- break;
- case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(ctx, doc, file, buf);
- pdf_array_push_drop(ctx, ary, obj);
- break;
- case PDF_TOK_NAME:
- pdf_array_push_name(ctx, ary, buf->scratch);
- break;
- case PDF_TOK_REAL:
- pdf_array_push_real(ctx, ary, buf->f);
- break;
- case PDF_TOK_STRING:
- pdf_array_push_string(ctx, ary, buf->scratch, buf->len);
- break;
- case PDF_TOK_TRUE:
- pdf_array_push_bool(ctx, ary, 1);
- break;
- case PDF_TOK_FALSE:
- pdf_array_push_bool(ctx, ary, 0);
- break;
- case PDF_TOK_NULL:
- pdf_array_push(ctx, ary, PDF_NULL);
- break;
- default:
- pdf_array_push(ctx, ary, PDF_NULL);
- break;
- }
- }
- end:
- {}
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, ary);
- fz_rethrow(ctx);
- }
- return op;
- }
- pdf_obj *
- pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_obj *dict;
- pdf_obj *key = NULL;
- pdf_obj *val = NULL;
- pdf_token tok;
- int64_t a, b;
- dict = pdf_new_dict(ctx, doc, 8);
- fz_var(key);
- fz_var(val);
- fz_try(ctx)
- {
- while (1)
- {
- tok = pdf_lex(ctx, file, buf);
- skip:
- if (tok == PDF_TOK_CLOSE_DICT)
- break;
- /* for BI .. ID .. EI in content streams */
- if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
- break;
- if (tok != PDF_TOK_NAME)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid key in dict");
- key = pdf_new_name(ctx, buf->scratch);
- tok = pdf_lex(ctx, file, buf);
- switch (tok)
- {
- case PDF_TOK_OPEN_ARRAY:
- val = pdf_parse_array(ctx, doc, file, buf);
- break;
- case PDF_TOK_OPEN_DICT:
- val = pdf_parse_dict(ctx, doc, file, buf);
- break;
- case PDF_TOK_NAME: val = pdf_new_name(ctx, buf->scratch); break;
- case PDF_TOK_REAL: val = pdf_new_real(ctx, buf->f); break;
- case PDF_TOK_STRING: val = pdf_new_string(ctx, buf->scratch, buf->len); break;
- case PDF_TOK_TRUE: val = PDF_TRUE; break;
- case PDF_TOK_FALSE: val = PDF_FALSE; break;
- case PDF_TOK_NULL: val = PDF_NULL; break;
- case PDF_TOK_INT:
- /* 64-bit to allow for numbers > INT_MAX and overflow */
- a = buf->i;
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
- (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
- {
- pdf_dict_put_int(ctx, dict, key, a);
- pdf_drop_obj(ctx, key);
- key = NULL;
- goto skip;
- }
- if (tok == PDF_TOK_INT)
- {
- b = buf->i;
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_R)
- {
- val = pdf_new_indirect(ctx, doc, a, b);
- break;
- }
- }
- fz_warn(ctx, "invalid indirect reference in dict");
- val = PDF_NULL;
- break;
- default:
- val = PDF_NULL;
- break;
- }
- pdf_dict_put(ctx, dict, key, val);
- pdf_drop_obj(ctx, val);
- val = NULL;
- pdf_drop_obj(ctx, key);
- key = NULL;
- }
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, dict);
- pdf_drop_obj(ctx, key);
- pdf_drop_obj(ctx, val);
- fz_rethrow(ctx);
- }
- return dict;
- }
- pdf_obj *
- pdf_parse_stm_obj(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *buf)
- {
- pdf_token tok;
- tok = pdf_lex(ctx, file, buf);
- switch (tok)
- {
- case PDF_TOK_OPEN_ARRAY:
- return pdf_parse_array(ctx, doc, file, buf);
- case PDF_TOK_OPEN_DICT:
- return pdf_parse_dict(ctx, doc, file, buf);
- case PDF_TOK_NAME: return pdf_new_name(ctx, buf->scratch);
- case PDF_TOK_REAL: return pdf_new_real(ctx, buf->f);
- case PDF_TOK_STRING: return pdf_new_string(ctx, buf->scratch, buf->len);
- case PDF_TOK_TRUE: return PDF_TRUE;
- case PDF_TOK_FALSE: return PDF_FALSE;
- case PDF_TOK_NULL: return PDF_NULL;
- case PDF_TOK_INT: return pdf_new_int(ctx, buf->i);
- default: fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in object stream");
- }
- }
- pdf_obj *
- pdf_parse_ind_obj_or_newobj(fz_context *ctx, pdf_document *doc, fz_stream *file,
- int *onum, int *ogen, int64_t *ostmofs, int *try_repair, int *newobj)
- {
- pdf_obj *obj = NULL;
- int num = 0, gen = 0;
- int64_t stm_ofs;
- pdf_token tok;
- pdf_lexbuf *buf = &doc->lexbuf.base;
- int64_t a, b;
- int read_next_token = 1;
- fz_var(obj);
- tok = pdf_lex(ctx, file, buf);
- if (tok != PDF_TOK_INT)
- {
- if (try_repair)
- *try_repair = 1;
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected object number");
- }
- num = buf->i;
- if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "object number out of range");
- tok = pdf_lex(ctx, file, buf);
- if (tok != PDF_TOK_INT)
- {
- if (try_repair)
- *try_repair = 1;
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected generation number (%d ? obj)", num);
- }
- gen = buf->i;
- if (gen < 0 || gen >= 65536)
- {
- if (try_repair)
- *try_repair = 1;
- fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid generation number (%d)", gen);
- }
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_NEWOBJ && newobj)
- {
- *newobj = 1;
- if (onum) *onum = num;
- if (ogen) *ogen = gen;
- if (ostmofs) *ostmofs = 0;
- return NULL;
- }
- if (tok != PDF_TOK_OBJ)
- {
- if (try_repair)
- *try_repair = 1;
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'obj' keyword (%d %d ?)", num, gen);
- }
- tok = pdf_lex(ctx, file, buf);
- switch (tok)
- {
- case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(ctx, doc, file, buf);
- break;
- case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(ctx, doc, file, buf);
- break;
- case PDF_TOK_NAME: obj = pdf_new_name(ctx, buf->scratch); break;
- case PDF_TOK_REAL: obj = pdf_new_real(ctx, buf->f); break;
- case PDF_TOK_STRING: obj = pdf_new_string(ctx, buf->scratch, buf->len); break;
- case PDF_TOK_TRUE: obj = PDF_TRUE; break;
- case PDF_TOK_FALSE: obj = PDF_FALSE; break;
- case PDF_TOK_NULL: obj = PDF_NULL; break;
- case PDF_TOK_INT:
- a = buf->i;
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
- {
- obj = pdf_new_int(ctx, a);
- read_next_token = 0;
- break;
- }
- else if (tok == PDF_TOK_INT)
- {
- b = buf->i;
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_R)
- {
- obj = pdf_new_indirect(ctx, doc, a, b);
- break;
- }
- }
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'R' keyword (%d %d R)", num, gen);
- case PDF_TOK_ENDOBJ:
- obj = PDF_NULL;
- read_next_token = 0;
- break;
- default:
- fz_throw(ctx, FZ_ERROR_SYNTAX, "syntax error in object (%d %d R)", num, gen);
- }
- fz_try(ctx)
- {
- if (read_next_token)
- tok = pdf_lex(ctx, file, buf);
- if (tok == PDF_TOK_STREAM)
- {
- int c = fz_read_byte(ctx, file);
- while (c == ' ')
- c = fz_read_byte(ctx, file);
- if (c == '\r')
- {
- c = fz_peek_byte(ctx, file);
- if (c != '\n')
- fz_warn(ctx, "line feed missing after stream begin marker (%d %d R)", num, gen);
- else
- fz_read_byte(ctx, file);
- }
- stm_ofs = fz_tell(ctx, file);
- }
- else if (tok == PDF_TOK_ENDOBJ)
- {
- stm_ofs = 0;
- }
- else
- {
- fz_warn(ctx, "expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
- stm_ofs = 0;
- }
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, obj);
- fz_rethrow(ctx);
- }
- if (onum) *onum = num;
- if (ogen) *ogen = gen;
- if (ostmofs) *ostmofs = stm_ofs;
- return obj;
- }
- pdf_obj *
- pdf_parse_ind_obj(fz_context *ctx, pdf_document *doc, fz_stream *file,
- int *onum, int *ogen, int64_t *ostmofs, int *try_repair)
- {
- return pdf_parse_ind_obj_or_newobj(ctx, doc, file, onum, ogen, ostmofs, try_repair, NULL);
- }
- pdf_obj *
- pdf_parse_journal_obj(fz_context *ctx, pdf_document *doc, fz_stream *stm,
- int *onum, fz_buffer **ostm, int *newobj)
- {
- pdf_obj *obj = NULL;
- pdf_token tok;
- pdf_lexbuf *buf = &doc->lexbuf.base;
- int64_t stmofs;
- *newobj = 0;
- obj = pdf_parse_ind_obj_or_newobj(ctx, doc, stm, onum, NULL, &stmofs, NULL, newobj);
- /* This will have consumed either the stream or the endobj keywords. */
- *ostm = NULL;
- if (stmofs)
- {
- fz_stream *stream = NULL;
- fz_var(stream);
- fz_try(ctx)
- {
- stream = fz_open_endstream_filter(ctx, stm, 0, stmofs);
- *ostm = fz_read_all(ctx, stream, 32);
- fz_drop_stream(ctx, stream);
- stream = NULL;
- fz_seek(ctx, stm, stmofs + (*ostm ? (*ostm)->len : 0), SEEK_SET);
- tok = pdf_lex(ctx, stm, buf);
- if (tok != PDF_TOK_ENDSTREAM)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'endstream' keyword");
- tok = pdf_lex(ctx, stm, buf);
- if (tok != PDF_TOK_ENDOBJ)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "expected 'endobj' keyword");
- }
- fz_always(ctx)
- fz_drop_stream(ctx, stream);
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, obj);
- fz_rethrow(ctx);
- }
- }
- return obj;
- }
|