| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727 |
- // Copyright (C) 2023-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #ifdef HAVE_LIBARCHIVE
- #ifdef _WIN32
- #include "libarchive/archive.h"
- #include "libarchive/archive_entry.h"
- #else
- #include <archive.h>
- #include <archive_entry.h>
- #endif
- typedef struct
- {
- size_t len;
- uint8_t name[32];
- } entry_t;
- typedef struct
- {
- fz_archive super;
- struct archive *archive;
- int current_entry_idx;
- int entries_max;
- int entries_len;
- entry_t **entries;
- fz_context *ctx; /* safe! */
- uint8_t block[4096];
- } fz_libarchive_archive;
- static la_ssize_t
- libarchive_read(struct archive *a, void *client_data, const void **buf)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
- size_t z;
- uint8_t *p;
- size_t left;
- fz_context *ctx = arch->ctx;
- la_ssize_t ret = 0;
- fz_try(ctx)
- {
- z = fz_available(arch->ctx, arch->super.file, 1024);
- /* If we're at the EOF, can't read anything! */
- if (z == 0)
- break;
- /* If we have at least 1K, then just return the pointer to that
- * directly. */
- if (z >= 1024)
- {
- *buf = arch->super.file->rp;
- arch->super.file->rp += z;
- ret = (la_ssize_t)z;
- break;
- }
- /* If not, let's pull a large enough lump out. */
- left = sizeof(arch->block);
- p = arch->block;
- do
- {
- memcpy(p, arch->super.file->rp, z);
- p += z;
- arch->super.file->rp += z;
- left -= z;
- if (left)
- {
- z = fz_available(arch->ctx, arch->super.file, left);
- if (z > left)
- z = left;
- if (z == 0)
- break;
- }
- }
- while (left != 0);
- ret = p - arch->block;
- *buf = arch->block;
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return ret;
- }
- static la_int64_t
- libarchive_skip(struct archive *a, void *client_data, la_int64_t skip)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
- int64_t pos;
- fz_context *ctx = arch->ctx;
- fz_try(ctx)
- {
- pos = fz_tell(arch->ctx, arch->super.file);
- fz_seek(arch->ctx, arch->super.file, pos + skip, SEEK_SET);
- pos = fz_tell(arch->ctx, arch->super.file) - pos;
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return pos;
- }
- static la_int64_t
- libarchive_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)client_data;
- fz_context *ctx = arch->ctx;
- int64_t pos;
- fz_try(ctx)
- {
- fz_seek(arch->ctx, arch->super.file, offset, whence);
- pos = fz_tell(arch->ctx, arch->super.file);
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return pos;
- }
- static int
- libarchive_close(struct archive *a, void *client_data)
- {
- /* Nothing to do. Stream is dropped when the fz_archive is closed. */
- return ARCHIVE_OK;
- }
- static int
- libarchive_open(fz_context *ctx, fz_libarchive_archive *arch)
- {
- int r;
- arch->archive = archive_read_new();
- archive_read_support_filter_all(arch->archive);
- archive_read_support_format_all(arch->archive);
- arch->ctx = ctx;
- r = archive_read_set_seek_callback(arch->archive, libarchive_seek);
- if (r == ARCHIVE_OK)
- r = archive_read_open2(arch->archive, arch, NULL, libarchive_read, libarchive_skip, libarchive_close);
- arch->ctx = NULL;
- if (r != ARCHIVE_OK)
- {
- archive_read_free(arch->archive);
- arch->archive = NULL;
- }
- return r != ARCHIVE_OK;
- }
- static void
- libarchive_reset(fz_context *ctx, fz_libarchive_archive *arch)
- {
- if (arch->archive)
- {
- archive_read_free(arch->archive);
- arch->archive = NULL;
- }
- fz_seek(ctx, arch->super.file, 0, SEEK_SET);
- if (libarchive_open(ctx, arch))
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to restart archive traversal!");
- arch->current_entry_idx = 0;
- }
- static void
- drop_libarchive_archive(fz_context *ctx, fz_archive *arch_)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
- int i;
- archive_read_free(arch->archive);
- for (i = 0; i < arch->entries_len; ++i)
- fz_free(ctx, arch->entries[i]);
- fz_free(ctx, arch->entries);
- arch->archive = NULL;
- }
- int
- fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
- {
- fz_libarchive_archive arch;
- struct archive_entry *entry;
- int ret;
- arch.super.file = file;
- fz_seek(ctx, file, 0, SEEK_SET);
- /* Annoyingly, libarchive can say "sure, I can open this" only to
- * then fail when we try to read from it. We therefore need to
- * try to read at least 1 entry out to be sure. */
- ret = libarchive_open(ctx, &arch);
- if (ret == ARCHIVE_OK)
- {
- fz_var(ret);
- fz_try(ctx)
- {
- arch.ctx = ctx; /* safe */
- ret = archive_read_next_header(arch.archive, &entry);
- }
- fz_catch(ctx)
- {
- archive_read_free(arch.archive);
- fz_rethrow(ctx);
- }
- }
- archive_read_free(arch.archive);
- /* Do NOT return true if we get ARCHIVE_EOF. We will fail to recognise empty
- * archives, but the alternative is false positives. */
- return ret == ARCHIVE_OK;
- }
- static int
- lookup_archive_entry(fz_context *ctx, fz_libarchive_archive *arch, const char *name)
- {
- int idx;
- for (idx = 0; idx < arch->entries_len; idx++)
- {
- if (!strcmp(name, (const char *)arch->entries[idx]->name))
- return idx;
- }
- return -1;
- }
- static int has_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
- return lookup_archive_entry(ctx, arch, name) != -1;
- }
- static const char *list_libarchive_entry(fz_context *ctx, fz_archive *arch_, int idx)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
- if (idx < 0 || idx >= arch->entries_len)
- return NULL;
- return (const char *)arch->entries[idx]->name;
- }
- static int count_libarchive_entries(fz_context *ctx, fz_archive *arch_)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
- return arch->entries_len;
- }
- static fz_buffer *
- read_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- fz_libarchive_archive *arch = (fz_libarchive_archive *)arch_;
- fz_buffer *ubuf = NULL;
- int idx;
- struct archive_entry *entry;
- la_ssize_t ret;
- size_t size;
- idx = lookup_archive_entry(ctx, arch, name);
- if (idx < 0)
- return NULL;
- if (arch->current_entry_idx > idx)
- libarchive_reset(ctx, arch);
- fz_var(ubuf);
- arch->ctx = ctx;
- fz_try(ctx)
- {
- while (arch->current_entry_idx < idx)
- {
- int r = archive_read_next_header(arch->archive, &entry);
- if (r == ARCHIVE_OK)
- r = archive_read_data_skip(arch->archive);
- if (r != ARCHIVE_OK)
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to skip over archive entry");
- arch->current_entry_idx++;
- }
- /* This is the one we want. */
- if (archive_read_next_header(arch->archive, &entry) != ARCHIVE_OK)
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive entry header");
- arch->current_entry_idx++;
- size = arch->entries[idx]->len;
- ubuf = fz_new_buffer(ctx, size);
- ubuf->len = size;
- ret = archive_read_data(arch->archive, ubuf->data, size);
- if (ret < 0)
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read archive data");
- if ((size_t)ret != size)
- fz_warn(ctx, "Premature end of data reading archive entry data (%zu vs %zu)", (size_t)ubuf->len, (size_t)size);
- }
- fz_always(ctx)
- arch->ctx = NULL;
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, ubuf);
- fz_rethrow(ctx);
- }
- return ubuf;
- }
- static fz_stream *
- open_libarchive_entry(fz_context *ctx, fz_archive *arch_, const char *name)
- {
- fz_buffer *buf = read_libarchive_entry(ctx, arch_, name);
- fz_stream *stm = NULL;
- fz_try(ctx)
- stm = fz_open_buffer(ctx, buf);
- fz_always(ctx)
- fz_drop_buffer(ctx, buf);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return stm;
- }
- fz_archive *
- fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
- {
- fz_libarchive_archive *arch = fz_new_derived_archive(ctx, file, fz_libarchive_archive);
- int r;
- const char *path = NULL;
- char *free_path = NULL;
- fz_seek(ctx, file, 0, SEEK_SET);
- if (libarchive_open(ctx, arch) != ARCHIVE_OK)
- {
- fz_drop_archive(ctx, &arch->super);
- fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot recognize libarchive archive");
- }
- arch->super.format = "libarchive";
- arch->super.count_entries = count_libarchive_entries;
- arch->super.list_entry = list_libarchive_entry;
- arch->super.has_entry = has_libarchive_entry;
- arch->super.read_entry = read_libarchive_entry;
- arch->super.open_entry = open_libarchive_entry;
- arch->super.drop_archive = drop_libarchive_archive;
- fz_var(free_path);
- fz_try(ctx)
- {
- arch->ctx = ctx;
- /* Count the archive entries */
- do
- {
- struct archive_entry *entry;
- size_t z;
- r = archive_read_next_header(arch->archive, &entry);
- if (r == ARCHIVE_EOF)
- break;
- if (r != ARCHIVE_OK)
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Corrupt archive");
- free_path = NULL;
- path = archive_entry_pathname_utf8(entry);
- if (!path)
- {
- path = free_path = fz_utf8_from_wchar(ctx, archive_entry_pathname_w(entry));
- }
- if (!path)
- continue;
- if (arch->entries_len == arch->entries_max)
- {
- int new_max = arch->entries_max * 2;
- if (new_max == 0)
- new_max = 32;
- arch->entries = fz_realloc(ctx, arch->entries, sizeof(arch->entries[0]) * new_max);
- arch->entries_max = new_max;
- }
- z = strlen(path);
- arch->entries[arch->entries_len] = fz_malloc(ctx, sizeof(entry_t) - 32 + z + 1);
- memcpy(&arch->entries[arch->entries_len]->name[0], path, z+1);
- if (free_path)
- {
- fz_free(ctx, free_path);
- free_path = NULL;
- }
- arch->entries[arch->entries_len]->len = archive_entry_size(entry);
- arch->entries_len++;
- }
- while (r != ARCHIVE_EOF && r != ARCHIVE_FATAL);
- libarchive_reset(ctx, arch);
- }
- fz_always(ctx)
- {
- if (free_path)
- fz_free(ctx, free_path);
- }
- fz_catch(ctx)
- {
- arch->ctx = NULL;
- fz_drop_archive(ctx, &arch->super);
- fz_rethrow(ctx);
- }
- return &arch->super;
- }
- fz_archive *
- fz_open_libarchive_archive(fz_context *ctx, const char *filename)
- {
- fz_archive *tar = NULL;
- fz_stream *file;
- file = fz_open_file(ctx, filename);
- fz_try(ctx)
- tar = fz_open_libarchive_archive_with_stream(ctx, file);
- fz_always(ctx)
- fz_drop_stream(ctx, file);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return tar;
- }
- /* Universal decomp stream */
- typedef struct
- {
- fz_stream *chain;
- fz_context *ctx; /* Safe as not persistent. */
- struct archive *archive;
- struct archive_entry *entry;
- uint8_t block[4096];
- } fz_libarchived_state;
- static la_ssize_t
- libarchived_read(struct archive *a, void *client_data, const void **buf)
- {
- fz_libarchived_state *state = (fz_libarchived_state *)client_data;
- size_t z;
- uint8_t *p;
- size_t left;
- fz_context *ctx = state->ctx;
- la_ssize_t ret = 0;
- fz_try(ctx)
- {
- z = fz_available(ctx, state->chain, 1024);
- /* If we're at the EOF, can't read anything! */
- if (z == 0)
- break;
- /* If we have at least 1K, then just return the pointer to that
- * directly. */
- if (z >= 1024)
- {
- *buf = state->chain->rp;
- state->chain->rp += z;
- ret = (la_ssize_t)z;
- break;
- }
- /* If not, let's pull a large enough lump out. */
- left = sizeof(state->block);
- p = state->block;
- do
- {
- memcpy(p, state->chain->rp, z);
- p += z;
- state->chain->rp += z;
- left -= z;
- if (left)
- {
- z = fz_available(ctx, state->chain, left);
- if (z > left)
- z = left;
- if (z == 0)
- break;
- }
- }
- while (left != 0);
- ret = p - state->block;
- *buf = state->block;
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return ret;
- }
- static la_int64_t
- libarchived_skip(struct archive *a, void *client_data, la_int64_t skip)
- {
- fz_libarchived_state *state = (fz_libarchived_state *)client_data;
- int64_t pos;
- fz_context *ctx = state->ctx;
- fz_try(ctx)
- {
- pos = fz_tell(state->ctx, state->chain);
- fz_seek(state->ctx, state->chain, pos + skip, SEEK_SET);
- pos = fz_tell(state->ctx, state->chain) - pos;
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return pos;
- }
- static la_int64_t
- libarchived_seek(struct archive *a, void *client_data, la_int64_t offset, int whence)
- {
- fz_libarchived_state *state = (fz_libarchived_state *)client_data;
- fz_context *ctx = state->ctx;
- int64_t pos;
- fz_try(ctx)
- {
- fz_seek(ctx, state->chain, offset, whence);
- pos = fz_tell(ctx, state->chain);
- }
- fz_catch(ctx)
- {
- /* Ignore error */
- archive_set_error(a, ARCHIVE_FATAL, "%s", fz_convert_error(ctx, NULL));
- return -1;
- }
- return pos;
- }
- static int
- libarchived_close(struct archive *a, void *client_data)
- {
- /* Nothing to do. Stream is dropped when the fz_stream is dropped. */
- return ARCHIVE_OK;
- }
- static int
- next_libarchived(fz_context *ctx, fz_stream *stm, size_t required)
- {
- fz_libarchived_state *state = stm->state;
- la_ssize_t z;
- if (stm->eof)
- return EOF;
- z = archive_read_data(state->archive, state->block, sizeof(state->block));
- if (z < 0)
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to read compressed data");
- if (z == 0)
- {
- stm->eof = 1;
- return EOF;
- }
- stm->rp = state->block;
- stm->wp = state->block + z;
- return *stm->rp++;
- }
- static void
- close_libarchived(fz_context *ctx, void *state_)
- {
- fz_libarchived_state *state = (fz_libarchived_state *)state_;
- int code;
- state->ctx = ctx;
- code = archive_read_free(state->archive);
- state->ctx = NULL;
- if (code != ARCHIVE_OK)
- fz_warn(ctx, "libarchive error: archive_read_free: %d", code);
- fz_drop_stream(ctx, state->chain);
- fz_free(ctx, state);
- }
- fz_stream *
- fz_open_libarchived(fz_context *ctx, fz_stream *chain)
- {
- fz_libarchived_state *state;
- int r;
- state = fz_malloc_struct(ctx, fz_libarchived_state);
- state->chain = fz_keep_stream(ctx, chain);
- state->archive = archive_read_new();
- archive_read_support_filter_all(state->archive);
- archive_read_support_format_raw(state->archive);
- state->ctx = ctx;
- r = archive_read_set_seek_callback(state->archive, libarchived_seek);
- if (r == ARCHIVE_OK)
- r = archive_read_open2(state->archive, state, NULL, libarchived_read, libarchived_skip, libarchived_close);
- if (r != ARCHIVE_OK)
- {
- archive_read_free(state->archive);
- state->ctx = NULL;
- fz_drop_stream(ctx, state->chain);
- fz_free(ctx, state);
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
- }
- r = archive_filter_code(state->archive, 0);
- if (r == ARCHIVE_FILTER_NONE)
- {
- archive_read_free(state->archive);
- state->ctx = NULL;
- fz_drop_stream(ctx, state->chain);
- fz_free(ctx, state);
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
- }
- /* This is the one we want. */
- r = archive_read_next_header(state->archive, &state->entry);
- if (r != ARCHIVE_OK)
- {
- archive_read_free(state->archive);
- state->ctx = NULL;
- fz_drop_stream(ctx, state->chain);
- fz_free(ctx, state);
- fz_throw(ctx, FZ_ERROR_LIBRARY, "Failed to open archive");
- }
- return fz_new_stream(ctx, state, next_libarchived, close_libarchived);
- }
- #else
- int
- fz_is_libarchive_archive(fz_context *ctx, fz_stream *file)
- {
- static int warned = 0;
- if (!warned)
- {
- warned = 1;
- fz_warn(ctx, "libarchive support not included");
- }
- return 0;
- }
- fz_archive *
- fz_open_libarchive_archive_with_stream(fz_context *ctx, fz_stream *file)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
- }
- fz_archive *
- fz_open_libarchive_archive(fz_context *ctx, const char *filename)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
- }
- fz_stream *
- fz_open_libarchived(fz_context *ctx, fz_stream *chain)
- {
- fz_throw(ctx, FZ_ERROR_UNSUPPORTED, "libarchive support not included");
- }
- #endif
|