| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "mupdf/pdf.h"
- #include <string.h>
- int
- pdf_obj_num_is_stream(fz_context *ctx, pdf_document *doc, int num)
- {
- pdf_xref_entry *entry;
- if (num <= 0 || num >= pdf_xref_len(ctx, doc))
- return 0;
- fz_try(ctx)
- entry = pdf_cache_object(ctx, doc, num);
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- return 0;
- }
- return entry->stm_ofs != 0 || entry->stm_buf;
- }
- int
- pdf_is_stream(fz_context *ctx, pdf_obj *ref)
- {
- pdf_document *doc = pdf_get_indirect_document(ctx, ref);
- if (doc)
- return pdf_obj_num_is_stream(ctx, doc, pdf_to_num(ctx, ref));
- return 0;
- }
- /*
- * Scan stream dictionary for an explicit /Crypt filter
- */
- static int
- pdf_stream_has_crypt(fz_context *ctx, pdf_obj *stm)
- {
- pdf_obj *filters;
- pdf_obj *obj;
- int i;
- filters = pdf_dict_geta(ctx, stm, PDF_NAME(Filter), PDF_NAME(F));
- if (filters)
- {
- if (pdf_name_eq(ctx, filters, PDF_NAME(Crypt)))
- return 1;
- if (pdf_is_array(ctx, filters))
- {
- int n = pdf_array_len(ctx, filters);
- for (i = 0; i < n; i++)
- {
- obj = pdf_array_get(ctx, filters, i);
- if (pdf_name_eq(ctx, obj, PDF_NAME(Crypt)))
- return 1;
- }
- }
- }
- return 0;
- }
- static fz_jbig2_globals *
- pdf_load_jbig2_globals(fz_context *ctx, pdf_obj *dict)
- {
- fz_jbig2_globals *globals;
- fz_buffer *buf = NULL;
- fz_var(buf);
- if ((globals = pdf_find_item(ctx, fz_drop_jbig2_globals_imp, dict)) != NULL)
- return globals;
- if (pdf_mark_obj(ctx, dict))
- fz_throw(ctx, FZ_ERROR_FORMAT, "cyclic reference when loading JBIG2 globals");
- fz_try(ctx)
- {
- buf = pdf_load_stream(ctx, dict);
- globals = fz_load_jbig2_globals(ctx, buf);
- if (globals)
- pdf_store_item(ctx, dict, globals, fz_buffer_storage(ctx, buf, NULL));
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, buf);
- pdf_unmark_obj(ctx, dict);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return globals;
- }
- static void
- build_compression_params(fz_context *ctx, pdf_obj *f, pdf_obj *p, fz_compression_params *params)
- {
- params->type = FZ_IMAGE_RAW;
- if (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) || pdf_name_eq(ctx, f, PDF_NAME(CCF)))
- {
- params->type = FZ_IMAGE_FAX;
- params->u.fax.k = pdf_dict_get_int_default(ctx, p, PDF_NAME(K), 0);
- params->u.fax.end_of_line = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfLine), 0);
- params->u.fax.encoded_byte_align = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EncodedByteAlign), 0);
- params->u.fax.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1728);
- params->u.fax.rows = pdf_dict_get_int_default(ctx, p, PDF_NAME(Rows), 0);
- params->u.fax.end_of_block = pdf_dict_get_bool_default(ctx, p, PDF_NAME(EndOfBlock), 1);
- params->u.fax.black_is_1 = pdf_dict_get_bool_default(ctx, p, PDF_NAME(BlackIs1), 0);
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) || pdf_name_eq(ctx, f, PDF_NAME(DCT)))
- {
- params->type = FZ_IMAGE_JPEG;
- params->u.jpeg.color_transform = pdf_dict_get_int_default(ctx, p, PDF_NAME(ColorTransform), -1);
- params->u.jpeg.invert_cmyk = 0;
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(RunLengthDecode)) || pdf_name_eq(ctx, f, PDF_NAME(RL)))
- {
- params->type = FZ_IMAGE_RLD;
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(FlateDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Fl)))
- {
- params->type = FZ_IMAGE_FLATE;
- params->u.flate.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
- params->u.flate.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
- params->u.flate.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
- params->u.flate.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(BrotliDecode)) || pdf_name_eq(ctx, f, PDF_NAME(Br)))
- {
- params->type = FZ_IMAGE_BROTLI;
- params->u.brotli.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
- params->u.brotli.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
- params->u.brotli.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
- params->u.brotli.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(LZWDecode)) || pdf_name_eq(ctx, f, PDF_NAME(LZW)))
- {
- params->type = FZ_IMAGE_LZW;
- params->u.lzw.predictor = pdf_dict_get_int_default(ctx, p, PDF_NAME(Predictor), 1);
- params->u.lzw.columns = pdf_dict_get_int_default(ctx, p, PDF_NAME(Columns), 1);
- params->u.lzw.colors = pdf_dict_get_int_default(ctx, p, PDF_NAME(Colors), 1);
- params->u.lzw.bpc = pdf_dict_get_int_default(ctx, p, PDF_NAME(BitsPerComponent), 8);
- params->u.lzw.early_change = pdf_dict_get_int_default(ctx, p, PDF_NAME(EarlyChange), 1);
- }
- else if (pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)))
- {
- pdf_obj *g = pdf_dict_get(ctx, p, PDF_NAME(JBIG2Globals));
- params->type = FZ_IMAGE_JBIG2;
- params->u.jbig2.globals = NULL;
- params->u.jbig2.embedded = 1; /* jbig2 streams are always embedded without file headers */
- if (g)
- {
- if (!pdf_is_stream(ctx, g))
- fz_warn(ctx, "jbig2 globals is not a stream, skipping globals");
- else
- params->u.jbig2.globals = pdf_load_jbig2_globals(ctx, g);
- }
- }
- }
- /*
- * Create a filter given a name and param dictionary.
- */
- static fz_stream *
- build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
- {
- fz_compression_params local_params;
- local_params.u.jbig2.globals = NULL;
- if (params == NULL)
- params = &local_params;
- if (!might_be_image &&
- (pdf_name_eq(ctx, f, PDF_NAME(CCITTFaxDecode)) ||
- pdf_name_eq(ctx, f, PDF_NAME(CCF)) ||
- pdf_name_eq(ctx, f, PDF_NAME(DCTDecode)) ||
- pdf_name_eq(ctx, f, PDF_NAME(DCT)) ||
- pdf_name_eq(ctx, f, PDF_NAME(JBIG2Decode)) ||
- pdf_name_eq(ctx, f, PDF_NAME(JPXDecode))))
- {
- fz_warn(ctx, "Can't open image only stream for non-image purposes");
- return fz_open_memory(ctx, (unsigned char *)"", 0);
- }
- build_compression_params(ctx, f, p, params);
- /* If we were using params we were passed in, and we successfully
- * recognised the image type, we can use the existing filter and
- * shortstop here. */
- if (params != &local_params && params->type != FZ_IMAGE_RAW)
- return fz_keep_stream(ctx, chain); /* nothing to do */
- else if (params->type == FZ_IMAGE_JBIG2)
- {
- fz_stream *stm;
- fz_try(ctx)
- stm = fz_open_image_decomp_stream(ctx, chain, params, NULL);
- fz_always(ctx)
- fz_drop_jbig2_globals(ctx, local_params.u.jbig2.globals);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return stm;
- }
- else if (params->type != FZ_IMAGE_RAW)
- return fz_open_image_decomp_stream(ctx, chain, params, NULL);
- else if (pdf_name_eq(ctx, f, PDF_NAME(ASCIIHexDecode)) || pdf_name_eq(ctx, f, PDF_NAME(AHx)))
- return fz_open_ahxd(ctx, chain);
- else if (pdf_name_eq(ctx, f, PDF_NAME(ASCII85Decode)) || pdf_name_eq(ctx, f, PDF_NAME(A85)))
- return fz_open_a85d(ctx, chain);
- else if (pdf_name_eq(ctx, f, PDF_NAME(JPXDecode)))
- return fz_keep_stream(ctx, chain); /* JPX decoding is special cased in the image loading code */
- else if (pdf_name_eq(ctx, f, PDF_NAME(Crypt)))
- {
- if (!doc->crypt)
- fz_warn(ctx, "crypt filter in unencrypted document");
- else
- {
- pdf_obj *name = pdf_dict_get(ctx, p, PDF_NAME(Name));
- if (pdf_is_name(ctx, name))
- return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen);
- }
- }
- else
- fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f));
- return fz_keep_stream(ctx, chain);
- }
- /* Build filter, and assume ownership of chain */
- static fz_stream *
- build_filter_drop(fz_context *ctx, fz_stream *tail, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params, int might_be_image)
- {
- fz_stream *head;
- fz_try(ctx)
- head = build_filter(ctx, tail, doc, f, p, num, gen, params, might_be_image);
- fz_always(ctx)
- fz_drop_stream(ctx, tail);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return head;
- }
- /*
- * Build a chain of filters given filter names and param dicts.
- * If chain is given, start filter chain with it.
- * Assume ownership of chain.
- */
- static fz_stream *
- build_filter_chain_drop(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
- {
- fz_var(chain);
- fz_try(ctx)
- {
- int i, n = pdf_array_len(ctx, fs);
- for (i = 0; i < n; i++)
- {
- pdf_obj *f = pdf_array_get(ctx, fs, i);
- pdf_obj *p = pdf_array_get(ctx, ps, i);
- chain = build_filter_drop(ctx, chain, doc, f, p, num, gen, (i == n-1 ? params : NULL), might_be_image);
- }
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- return chain;
- }
- static fz_stream *
- build_filter_chain(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *fs, pdf_obj *ps, int num, int gen, fz_compression_params *params, int might_be_image)
- {
- return build_filter_chain_drop(ctx, fz_keep_stream(ctx, chain), doc, fs, ps, num, gen, params, might_be_image);
- }
- /*
- * Build a filter for reading raw stream data.
- * This is a null filter to constrain reading to the stream length (and to
- * allow for other people accessing the file), followed by a decryption
- * filter.
- *
- * orig_num and orig_gen are used purely to seed the encryption.
- */
- static fz_stream *
- pdf_open_raw_filter(fz_context *ctx, fz_stream *file_stm, pdf_document *doc, pdf_obj *stmobj, int num, int *orig_num, int *orig_gen, int64_t offset)
- {
- pdf_xref_entry *x = NULL;
- fz_stream *null_stm, *crypt_stm;
- int hascrypt;
- int64_t len;
- if (num > 0 && num < pdf_xref_len(ctx, doc))
- {
- x = pdf_get_xref_entry(ctx, doc, num);
- }
- if (x == NULL)
- {
- /* We only end up here when called from pdf_open_stream_with_offset to parse new format XRef sections. */
- /* New style XRef sections must have generation number 0. */
- *orig_num = num;
- *orig_gen = 0;
- }
- else
- {
- *orig_num = x->num;
- *orig_gen = x->gen;
- if (x->stm_buf)
- return fz_open_buffer(ctx, x->stm_buf);
- }
- hascrypt = pdf_stream_has_crypt(ctx, stmobj);
- len = pdf_dict_get_int64(ctx, stmobj, PDF_NAME(Length));
- if (len < 0)
- len = 0;
- null_stm = fz_open_endstream_filter(ctx, file_stm, (uint64_t)len, offset);
- if (doc->crypt && !hascrypt)
- {
- fz_try(ctx)
- crypt_stm = pdf_open_crypt(ctx, null_stm, doc->crypt, *orig_num, *orig_gen);
- fz_always(ctx)
- fz_drop_stream(ctx, null_stm);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return crypt_stm;
- }
- return null_stm;
- }
- /*
- * Construct a filter to decode a stream, constraining
- * to stream length and decrypting.
- */
- static fz_stream *
- pdf_open_filter(fz_context *ctx, pdf_document *doc, fz_stream *file_stm, pdf_obj *stmobj, int num, int64_t offset, fz_compression_params *imparams, int might_be_image)
- {
- pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
- pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
- int orig_num, orig_gen;
- fz_stream *rstm, *fstm;
- rstm = pdf_open_raw_filter(ctx, file_stm, doc, stmobj, num, &orig_num, &orig_gen, offset);
- fz_try(ctx)
- {
- if (pdf_is_name(ctx, filters))
- fstm = build_filter(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
- else if (pdf_array_len(ctx, filters) > 0)
- fstm = build_filter_chain(ctx, rstm, doc, filters, params, orig_num, orig_gen, imparams, might_be_image);
- else
- {
- if (imparams)
- imparams->type = FZ_IMAGE_RAW;
- fstm = fz_keep_stream(ctx, rstm);
- }
- }
- fz_always(ctx)
- fz_drop_stream(ctx, rstm);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return fstm;
- }
- fz_stream *
- pdf_open_inline_stream(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *file_stm, fz_compression_params *imparams)
- {
- pdf_obj *filters = pdf_dict_geta(ctx, stmobj, PDF_NAME(Filter), PDF_NAME(F));
- pdf_obj *params = pdf_dict_geta(ctx, stmobj, PDF_NAME(DecodeParms), PDF_NAME(DP));
- if (pdf_is_name(ctx, filters))
- return build_filter(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
- else if (pdf_array_len(ctx, filters) > 0)
- return build_filter_chain(ctx, file_stm, doc, filters, params, 0, 0, imparams, 1);
- if (imparams)
- imparams->type = FZ_IMAGE_RAW;
- return fz_open_null_filter(ctx, file_stm, length, fz_tell(ctx, file_stm));
- }
- void
- pdf_load_compressed_inline_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict, int length, fz_stream *file_stm, int indexed, fz_compressed_image *image)
- {
- fz_stream *istm = NULL, *leech = NULL, *decomp = NULL;
- fz_pixmap *pixmap = NULL;
- fz_compressed_buffer *bc;
- int dummy_l2factor = 0;
- fz_var(istm);
- fz_var(leech);
- fz_var(decomp);
- fz_var(pixmap);
- bc = fz_new_compressed_buffer(ctx);
- fz_try(ctx)
- {
- bc->buffer = fz_new_buffer(ctx, 1024);
- istm = pdf_open_inline_stream(ctx, doc, dict, length, file_stm, &bc->params);
- leech = fz_open_leecher(ctx, istm, bc->buffer);
- decomp = fz_open_image_decomp_stream(ctx, leech, &bc->params, &dummy_l2factor);
- pixmap = fz_decomp_image_from_stream(ctx, decomp, image, NULL, indexed, 0, NULL);
- fz_set_compressed_image_buffer(ctx, image, bc);
- }
- fz_always(ctx)
- {
- fz_drop_stream(ctx, istm);
- fz_drop_stream(ctx, leech);
- fz_drop_stream(ctx, decomp);
- fz_drop_pixmap(ctx, pixmap);
- }
- fz_catch(ctx)
- {
- fz_drop_compressed_buffer(ctx, bc);
- fz_rethrow(ctx);
- }
- }
- fz_stream *
- pdf_open_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
- {
- pdf_xref_entry *x;
- int orig_num, orig_gen;
- x = pdf_cache_object(ctx, doc, num);
- if (x->stm_ofs == 0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- return pdf_open_raw_filter(ctx, doc->file, doc, x->obj, num, &orig_num, &orig_gen, x->stm_ofs);
- }
- static fz_stream *
- pdf_open_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int might_be_image)
- {
- pdf_xref_entry *x;
- x = pdf_cache_object(ctx, doc, num);
- if (x->stm_ofs == 0 && x->stm_buf == NULL)
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- return pdf_open_filter(ctx, doc, doc->file, x->obj, num, x->stm_ofs, params, might_be_image);
- }
- fz_stream *
- pdf_open_stream_number(fz_context *ctx, pdf_document *doc, int num)
- {
- return pdf_open_image_stream(ctx, doc, num, NULL, 1);
- }
- fz_stream *
- pdf_open_stream_with_offset(fz_context *ctx, pdf_document *doc, int num, pdf_obj *dict, int64_t stm_ofs)
- {
- if (stm_ofs == 0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- return pdf_open_filter(ctx, doc, doc->file, dict, num, stm_ofs, NULL, 1);
- }
- fz_buffer *
- pdf_load_raw_stream_number(fz_context *ctx, pdf_document *doc, int num)
- {
- fz_stream *stm;
- pdf_obj *dict;
- int64_t len;
- fz_buffer *buf = NULL;
- pdf_xref_entry *x;
- if (num > 0 && num < pdf_xref_len(ctx, doc))
- {
- x = pdf_get_xref_entry_no_null(ctx, doc, num);
- if (x->stm_buf)
- return fz_keep_buffer(ctx, x->stm_buf);
- }
- dict = pdf_load_object(ctx, doc, num);
- fz_try(ctx)
- len = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
- fz_always(ctx)
- pdf_drop_obj(ctx, dict);
- fz_catch(ctx)
- fz_rethrow(ctx);
- stm = pdf_open_raw_stream_number(ctx, doc, num);
- if (len < 0)
- len = 1024;
- fz_try(ctx)
- buf = fz_read_all(ctx, stm, (size_t)len);
- fz_always(ctx)
- fz_drop_stream(ctx, stm);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return buf;
- }
- static size_t
- pdf_guess_filter_length(size_t len, const char *filter)
- {
- size_t nlen = len;
- /* First ones get smaller, no overflow check required. */
- if (!strcmp(filter, "ASCIIHexDecode"))
- return len / 2;
- else if (!strcmp(filter, "ASCII85Decode"))
- return len * 4 / 5;
- if (!strcmp(filter, "FlateDecode"))
- nlen = len * 3;
- else if (!strcmp(filter, "BrotliDecode"))
- nlen = len * 4;
- else if (!strcmp(filter, "RunLengthDecode"))
- nlen = len * 3;
- else if (!strcmp(filter, "LZWDecode"))
- nlen = len * 2;
- /* Live with a bad estimate - we'll malloc up as we go, but
- * it's probably destined to fail anyway. */
- if (nlen < len)
- return len;
- return nlen;
- }
- /* Check if an entry has a cached stream and return whether it is directly
- * reusable. A buffer is directly reusable only if the stream is
- * uncompressed, or if it is compressed purely a compression method we can
- * return details of in fz_compression_params.
- *
- * If the stream is reusable return 1, and set params as required, otherwise
- * return 0. */
- static int
- can_reuse_buffer(fz_context *ctx, pdf_xref_entry *entry, fz_compression_params *params)
- {
- pdf_obj *f;
- pdf_obj *p;
- if (!entry || !entry->obj || !entry->stm_buf)
- return 0;
- if (params)
- params->type = FZ_IMAGE_RAW;
- f = pdf_dict_geta(ctx, entry->obj, PDF_NAME(Filter), PDF_NAME(F));
- /* If there are no filters, it's uncompressed, and we can use it */
- if (!f)
- return 1;
- p = pdf_dict_geta(ctx, entry->obj, PDF_NAME(DecodeParms), PDF_NAME(DP));
- if (pdf_is_array(ctx, f))
- {
- int len = pdf_array_len(ctx, f);
- /* Empty array of filters. Its uncompressed. We can cope. */
- if (len == 0)
- return 1;
- /* 1 filter is the most we can hope to cope with - if more,*/
- if (len != 1)
- return 0;
- p = pdf_array_get(ctx, p, 0);
- }
- if (pdf_is_null(ctx, f))
- return 1; /* Null filter is uncompressed */
- if (!pdf_is_name(ctx, f))
- return 0;
- /* There are filters, so unless we have the option of shortstopping,
- * we can't use the existing buffer. */
- if (!params)
- return 0;
- build_compression_params(ctx, f, p, params);
- return (params->type == FZ_IMAGE_RAW) ? 0 : 1;
- }
- static fz_buffer *
- pdf_load_image_stream(fz_context *ctx, pdf_document *doc, int num, fz_compression_params *params, int *truncated, size_t worst_case)
- {
- fz_stream *stm = NULL;
- pdf_obj *dict, *obj;
- int i, n;
- size_t len;
- fz_buffer *buf;
- fz_var(buf);
- if (num > 0 && num < pdf_xref_len(ctx, doc))
- {
- pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
- /* Return ref to existing buffer, but only if uncompressed,
- * or shortstoppable */
- if (can_reuse_buffer(ctx, entry, params))
- return fz_keep_buffer(ctx, entry->stm_buf);
- }
- dict = pdf_load_object(ctx, doc, num);
- fz_try(ctx)
- {
- int64_t ilen = pdf_dict_get_int64(ctx, dict, PDF_NAME(Length));
- if (ilen < 0)
- ilen = 0;
- len = (size_t)ilen;
- /* In 32 bit builds, we might find a length being too
- * large for a size_t. */
- if ((int64_t)len != ilen)
- fz_throw(ctx, FZ_ERROR_LIMIT, "Stream too large");
- obj = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
- len = pdf_guess_filter_length(len, pdf_to_name(ctx, obj));
- n = pdf_array_len(ctx, obj);
- for (i = 0; i < n; i++)
- len = pdf_guess_filter_length(len, pdf_array_get_name(ctx, obj, i));
- }
- fz_always(ctx)
- {
- pdf_drop_obj(ctx, dict);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- stm = pdf_open_image_stream(ctx, doc, num, params, 1);
- fz_try(ctx)
- {
- buf = fz_read_best(ctx, stm, len, truncated, worst_case);
- }
- fz_always(ctx)
- {
- fz_drop_stream(ctx, stm);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- return buf;
- }
- fz_buffer *
- pdf_load_stream_number(fz_context *ctx, pdf_document *doc, int num)
- {
- return pdf_load_image_stream(ctx, doc, num, NULL, NULL, 0);
- }
- fz_compressed_buffer *
- pdf_load_compressed_stream(fz_context *ctx, pdf_document *doc, int num, size_t worst_case)
- {
- fz_compressed_buffer *bc = fz_new_compressed_buffer(ctx);
- fz_try(ctx)
- {
- bc->buffer = pdf_load_image_stream(ctx, doc, num, &bc->params, NULL, worst_case);
- }
- fz_catch(ctx)
- {
- fz_free(ctx, bc);
- fz_rethrow(ctx);
- }
- return bc;
- }
- static fz_stream *
- pdf_open_object_array(fz_context *ctx, pdf_document *doc, pdf_obj *list)
- {
- fz_stream *stm;
- int i, n;
- n = pdf_array_len(ctx, list);
- stm = fz_open_concat(ctx, n, 1);
- for (i = 0; i < n; i++)
- {
- pdf_obj *obj = pdf_array_get(ctx, list, i);
- fz_try(ctx)
- fz_concat_push_drop(ctx, stm, pdf_open_stream(ctx, obj));
- fz_catch(ctx)
- {
- if (fz_caught(ctx) == FZ_ERROR_TRYLATER || fz_caught(ctx) == FZ_ERROR_SYSTEM)
- {
- fz_drop_stream(ctx, stm);
- fz_rethrow(ctx);
- }
- fz_report_error(ctx);
- fz_warn(ctx, "cannot load content stream part %d/%d", i + 1, n);
- }
- }
- return stm;
- }
- fz_stream *
- pdf_open_contents_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
- {
- int num;
- if (pdf_is_array(ctx, obj))
- return pdf_open_object_array(ctx, doc, obj);
- num = pdf_to_num(ctx, obj);
- if (pdf_is_stream(ctx, obj))
- return pdf_open_image_stream(ctx, doc, num, NULL, 0);
- fz_warn(ctx, "content stream is not a stream (%d 0 R)", num);
- return fz_open_memory(ctx, (unsigned char *)"", 0);
- }
- fz_buffer *pdf_load_raw_stream(fz_context *ctx, pdf_obj *ref)
- {
- if (pdf_is_stream(ctx, ref))
- return pdf_load_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- }
- fz_buffer *pdf_load_stream(fz_context *ctx, pdf_obj *ref)
- {
- if (pdf_is_stream(ctx, ref))
- return pdf_load_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- }
- fz_stream *pdf_open_raw_stream(fz_context *ctx, pdf_obj *ref)
- {
- if (pdf_is_stream(ctx, ref))
- return pdf_open_raw_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- }
- fz_stream *pdf_open_stream(fz_context *ctx, pdf_obj *ref)
- {
- if (pdf_is_stream(ctx, ref))
- return pdf_open_stream_number(ctx, pdf_get_indirect_document(ctx, ref), pdf_to_num(ctx, ref));
- fz_throw(ctx, FZ_ERROR_FORMAT, "object is not a stream");
- }
|