| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861 |
- // Copyright (C) 2023-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include <string.h>
- #include <limits.h>
- #define MAXREGSID 0xfffffffa
- #define NOSTREAM 0xffffffff
- #define MAXREGSECT 0xfffffffa
- #define DIRSECT 0xfffffffc
- #define FATSECT 0xfffffffd
- #define ENDOFCHAIN 0xfffffffe
- #define FREESECT 0xffffffff
- #undef DEBUG_DIRENTRIES
- typedef struct
- {
- char *name;
- uint32_t sector;
- uint64_t size;
- uint32_t l, r, d;
- /* Flag word used for various different things.
- * initially the type, then marked as to whether the DFS reached it
- * then finally the original node number for debug. */
- uint32_t t;
- } cfb_entry;
- typedef struct
- {
- fz_archive super;
- int max;
- int count;
- cfb_entry *entries;
- /* Header information from the file */
- uint16_t major;
- uint16_t sector_shift;
- uint32_t num_dir_sectors;
- uint32_t num_fat_sectors;
- uint32_t dir_sector0;
- uint32_t mini_fat_sector0;
- uint32_t num_mini_fat_sectors;
- uint32_t difat_sector0;
- uint32_t num_difat_sectors;
- uint32_t mini_stream_sector0;
- uint64_t mini_stream_len;
- uint32_t difat[109];
- uint32_t fatcache_sector;
- uint8_t fatcache[4096];
- uint32_t minifatcache_real_sector;
- uint32_t minifatcache_sector;
- uint8_t minifatcache[4096];
- } fz_cfb_archive;
- static void
- read(fz_context *ctx, fz_stream *stm, uint8_t *buf, size_t size)
- {
- size_t n = fz_read(ctx, stm, buf, size);
- if (n != size)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Short read in CFB handling");
- }
- static uint16_t
- get16(const uint8_t *b)
- {
- return b[0] + (b[1]<<8);
- }
- static uint32_t
- get32(const uint8_t *b)
- {
- return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
- }
- static uint64_t
- get64(const uint8_t *b)
- {
- return b[0] +
- (((uint64_t)b[1])<<8) +
- (((uint64_t)b[2])<<16) +
- (((uint64_t)b[3])<<24) +
- (((uint64_t)b[4])<<32) +
- (((uint64_t)b[5])<<40) +
- (((uint64_t)b[6])<<48) +
- (((uint64_t)b[7])<<56);
- }
- static uint64_t
- get_len(fz_context *ctx, fz_cfb_archive *cfb, const uint8_t *b)
- {
- uint64_t len = get64(b);
- /* In v3 files the top 32bits *should* be zero, but may not be. The
- * top bit of the lower 32bits should not be set though. */
- if (cfb->major == 3)
- {
- if (len & 0x80000000)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Illegal length in CFB");
- len &= 0xFFFFFFFFU;
- }
- return len;
- }
- static void
- sector_seek(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint32_t offset)
- {
- fz_seek(ctx, cfb->super.file, ((sector + (uint64_t)1)<<cfb->sector_shift)+offset, SEEK_SET);
- }
- static uint32_t
- read_difat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
- {
- uint32_t entries_per_sector;
- uint32_t sect;
- if (sector < 109)
- {
- return cfb->difat[sector];
- }
- sector -= 109;
- /* Run down the difat chain until we find the right sector. */
- entries_per_sector = (1<<(cfb->sector_shift-2)) - 1;
- sect = cfb->difat_sector0;
- while (sector > entries_per_sector)
- {
- sector_seek(ctx, cfb, sect, entries_per_sector * 4);
- sect = fz_read_uint32_le(ctx, cfb->super.file);
- sector -= entries_per_sector;
- }
- /* Now get the actual entry. */
- sector_seek(ctx, cfb, sect, sector * 4);
- return fz_read_uint32_le(ctx, cfb->super.file);
- }
- static uint32_t
- read_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
- {
- uint32_t sector_size = 1<<cfb->sector_shift;
- /* We want to read the entry for sector 'sector' from the FAT. This
- * will be in FAT sector 'fatsect'. */
- uint32_t fatsect = sector>>(cfb->sector_shift-2);
- /* FAT sector fatsect will be physical sector real_sect. */
- uint32_t real_sect = read_difat(ctx, cfb, fatsect);
- if (real_sect > MAXREGSECT)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt FAT");
- if (real_sect != cfb->fatcache_sector)
- {
- sector_seek(ctx, cfb, real_sect, 0);
- read(ctx, cfb->super.file, &cfb->fatcache[0], sector_size);
- cfb->fatcache_sector = real_sect;
- }
- sector &= (sector_size>>2)-1;
- return get32(&cfb->fatcache[sector*4]);
- }
- static uint32_t
- read_mini_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
- {
- uint32_t sector_size = 1<<cfb->sector_shift;
- /* A mini fat sector has lots of mini sector numbers in (each 4 bytes) */
- uint32_t mini_sectors_in_mini_fat_sector = (1<<(cfb->sector_shift-2));
- /* We want to read the entry for sector 'sector' from the mini FAT. This
- * will be in mini FAT sector 'minifatsect'. */
- uint32_t minifatsect = sector / mini_sectors_in_mini_fat_sector;
- uint32_t index_within_minifatsect = sector - minifatsect * mini_sectors_in_mini_fat_sector;
- int cache_valid = 1;
- /* minifatsect is a count of how many sectors we are into the mini fat stream.
- * minifatsect_real_sector is the physical section that that corresponds to. */
- /* If we're behind our cache position, start from scratch. */
- if (minifatsect < cfb->minifatcache_sector)
- {
- cfb->minifatcache_real_sector = cfb->mini_fat_sector0;
- cfb->minifatcache_sector = 0;
- cache_valid = 0;
- }
- /* Skip forward until we are at the right position. */
- while (minifatsect != cfb->minifatcache_sector)
- {
- cfb->minifatcache_real_sector = read_fat(ctx, cfb, cfb->minifatcache_real_sector);
- cfb->minifatcache_sector++;
- cache_valid = 0;
- }
- /* Prime the cache if we just moved */
- if (!cache_valid)
- {
- sector_seek(ctx, cfb, cfb->minifatcache_real_sector, 0);
- read(ctx, cfb->super.file, cfb->minifatcache, sector_size);
- }
- return get32(&cfb->minifatcache[index_within_minifatsect*4]);
- }
- static void drop_cfb_archive(fz_context *ctx, fz_archive *arch)
- {
- fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
- int i;
- for (i = 0; i < cfb->count; ++i)
- fz_free(ctx, cfb->entries[i].name);
- fz_free(ctx, cfb->entries);
- }
- static cfb_entry *lookup_cfb_entry(fz_context *ctx, fz_cfb_archive *cfb, const char *name)
- {
- int i;
- for (i = 0; i < cfb->count; i++)
- if (!fz_strcasecmp(name, cfb->entries[i].name))
- return &cfb->entries[i];
- return NULL;
- }
- typedef struct
- {
- fz_cfb_archive *archive;
- uint32_t first_sector;
- uint32_t next_sector;
- uint32_t next_sector_slow;
- uint32_t next_sector_slow_flag;
- uint64_t pos_at_next_sector;
- uint64_t size;
- fz_stream *mini_stream;
- uint8_t buffer[4096];
- } cfb_state;
- static void
- cfb_close(fz_context *ctx, void *state_)
- {
- cfb_state *state = (cfb_state *)state_;
- fz_drop_archive(ctx, &state->archive->super);
- fz_drop_stream(ctx, state->mini_stream);
- fz_free(ctx, state);
- }
- static int
- cfb_next(fz_context *ctx, fz_stream *stm, size_t required)
- {
- cfb_state *state = stm->state;
- fz_cfb_archive *cfb = state->archive;
- uint64_t sector_size = ((uint64_t)1)<<cfb->sector_shift;
- uint64_t desired_sector_pos;
- uint32_t pos_in_sector;
- uint32_t this_sector;
- if ((uint64_t)stm->pos >= state->size)
- stm->eof = 1;
- if (stm->eof)
- {
- stm->rp = stm->wp = state->buffer;
- return EOF;
- }
- pos_in_sector = stm->pos & (sector_size-1);
- desired_sector_pos = stm->pos & ~(sector_size-1);
- if (desired_sector_pos != state->pos_at_next_sector)
- {
- state->pos_at_next_sector = 0;
- state->next_sector = state->first_sector;
- state->next_sector_slow = state->first_sector;
- state->next_sector_slow_flag = 0;
- }
- this_sector = state->next_sector;
- while (desired_sector_pos >= state->pos_at_next_sector)
- {
- this_sector = state->next_sector;
- state->next_sector = read_fat(ctx, cfb, state->next_sector);
- state->pos_at_next_sector += sector_size;
- if (state->next_sector > MAXREGSECT)
- break;
- state->next_sector_slow_flag = !state->next_sector_slow_flag;
- if (state->next_sector_slow_flag == 0)
- state->next_sector_slow = read_fat(ctx, cfb, state->next_sector_slow);
- if (state->next_sector_slow == state->next_sector)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
- }
- if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
- if (this_sector > MAXREGSECT)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
- sector_seek(ctx, cfb, this_sector, 0);
- read(ctx, cfb->super.file, state->buffer, sector_size);
- stm->rp = state->buffer;
- stm->wp = stm->rp + sector_size;
- stm->pos = state->pos_at_next_sector;
- if ((uint64_t)stm->pos >= state->size)
- {
- stm->wp -= (stm->pos - state->size);
- stm->pos = state->size;
- }
- stm->rp += pos_in_sector;
- return *stm->rp++;
- }
- #define MINI_SECTOR_SHIFT 6
- #define MINI_SECTOR_SIZE (1<<MINI_SECTOR_SHIFT)
- static int
- cfb_next_mini(fz_context *ctx, fz_stream *stm, size_t required)
- {
- cfb_state *state = stm->state;
- fz_cfb_archive *cfb = state->archive;
- uint64_t desired_sector_pos;
- uint32_t pos_in_sector;
- uint32_t this_sector;
- if ((uint64_t)stm->pos >= state->size)
- stm->eof = 1;
- if (stm->eof)
- {
- stm->rp = stm->wp = state->buffer;
- return EOF;
- }
- /* Whenever we say 'sector' here, we mean 'mini sector'. */
- pos_in_sector = stm->pos & (MINI_SECTOR_SIZE-1);
- desired_sector_pos = stm->pos & ~(MINI_SECTOR_SIZE-1);
- if (desired_sector_pos != state->pos_at_next_sector)
- {
- state->pos_at_next_sector = 0;
- state->next_sector = state->first_sector;
- state->next_sector_slow = state->first_sector;
- state->next_sector_slow_flag = 0;
- }
- this_sector = state->next_sector;
- while (desired_sector_pos >= state->pos_at_next_sector)
- {
- this_sector = state->next_sector;
- state->next_sector = read_mini_fat(ctx, cfb, state->next_sector);
- state->pos_at_next_sector += MINI_SECTOR_SIZE;
- if (state->next_sector > MAXREGSECT)
- break;
- state->next_sector_slow_flag = !state->next_sector_slow_flag;
- if (state->next_sector_slow_flag == 0)
- state->next_sector_slow = read_mini_fat(ctx, cfb, state->next_sector_slow);
- if (state->next_sector_slow == state->next_sector)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
- }
- if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
- if (this_sector > MAXREGSECT)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
- fz_seek(ctx, state->mini_stream, ((uint64_t)this_sector) * MINI_SECTOR_SIZE, SEEK_SET);
- read(ctx, state->mini_stream, state->buffer, MINI_SECTOR_SIZE);
- stm->rp = state->buffer;
- stm->wp = stm->rp + MINI_SECTOR_SIZE;
- stm->pos += MINI_SECTOR_SIZE;
- if ((uint64_t)stm->pos >= state->size)
- {
- stm->wp -= (stm->pos - state->size);
- stm->pos = state->size;
- }
- stm->rp += pos_in_sector;
- return *stm->rp++;
- }
- static void cfb_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
- {
- cfb_state *state = stm->state;
- int64_t pos = stm->pos - (stm->wp - stm->rp);
- /* Convert to absolute pos */
- if (whence == 1)
- {
- offset += pos; /* Was relative to current pos */
- }
- else if (whence == 2)
- {
- offset += stm->pos; /* Was relative to end */
- }
- if (offset < 0)
- offset = 0;
- if ((uint64_t)offset > state->size)
- offset = (int64_t)state->size;
- stm->pos = offset;
- stm->rp = stm->wp = state->buffer;
- }
- static fz_stream *sector_stream(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint64_t size)
- {
- fz_stream *stm;
- cfb_state *state = fz_malloc_struct(ctx, cfb_state);
- state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
- state->pos_at_next_sector = 0;
- state->size = size;
- state->first_sector = sector;
- state->next_sector = state->first_sector;
- state->next_sector_slow = state->first_sector;
- state->next_sector_slow_flag = 0;
- stm = fz_new_stream(ctx, state, cfb_next, cfb_close);
- stm->seek = cfb_seek;
- return stm;
- }
- static fz_stream *open_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
- {
- fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
- cfb_entry *ent;
- fz_stream *stm;
- cfb_state *state;
- ent = lookup_cfb_entry(ctx, cfb, name);
- if (!ent)
- return NULL;
- if (ent->size >= 0x1000)
- {
- /* Working from entire sectors */
- return sector_stream(ctx, cfb, ent->sector, ent->size);
- }
- /* We're working from the mini stream. */
- state = fz_malloc_struct(ctx, cfb_state);
- fz_try(ctx)
- {
- /* Let's get a stream that gets us the mini stream, and then work from that. */
- state->mini_stream = sector_stream(ctx, cfb, cfb->mini_stream_sector0, cfb->mini_stream_len);
- state->first_sector = ent->sector;
- state->pos_at_next_sector = 0;
- state->size = ent->size;
- state->next_sector = state->first_sector;
- state->next_sector_slow = state->first_sector;
- state->next_sector_slow_flag = 0;
- state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
- }
- fz_catch(ctx)
- {
- fz_free(ctx, state);
- fz_rethrow(ctx);
- }
- stm = fz_new_stream(ctx, state, cfb_next_mini, cfb_close);
- stm->seek = cfb_seek;
- return stm;
- }
- static fz_buffer *read_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
- {
- fz_stream *stm;
- fz_buffer *buf = NULL;
- stm = open_cfb_entry(ctx, arch, name);
- if (!stm)
- return NULL;
- fz_try(ctx)
- buf = fz_read_all(ctx, stm, 1024);
- fz_always(ctx)
- fz_drop_stream(ctx, stm);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return buf;
- }
- static int has_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
- {
- fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
- cfb_entry *ent = lookup_cfb_entry(ctx, cfb, name);
- return ent != NULL;
- }
- static const char *list_cfb_entry(fz_context *ctx, fz_archive *arch, int idx)
- {
- fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
- if (idx < 0 || idx >= cfb->count)
- return NULL;
- return cfb->entries[idx].name;
- }
- static int count_cfb_entries(fz_context *ctx, fz_archive *arch)
- {
- fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
- return cfb->count;
- }
- static const uint8_t sig[8] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
- static const uint8_t zeros[16] = { 0 };
- int
- fz_is_cfb_archive(fz_context *ctx, fz_stream *file)
- {
- uint8_t data[nelem(sig)];
- size_t n;
- fz_seek(ctx, file, 0, SEEK_SET);
- n = fz_read(ctx, file, data, nelem(data));
- if (n != nelem(data))
- return 0;
- if (!memcmp(data, sig, nelem(sig)))
- return 1;
- return 0;
- }
- static void
- expect(fz_context *ctx, fz_stream *file, const uint8_t *pattern, size_t n, const char *msg)
- {
- uint8_t buffer[64];
- assert(sizeof(buffer) >= n);
- read(ctx, file, buffer, n);
- if (memcmp(buffer, pattern, n) != 0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB", msg);
- }
- static void
- expect16(fz_context *ctx, fz_stream *file, uint16_t v, const char *msg)
- {
- uint16_t u;
- u = fz_read_uint16_le(ctx, file);
- if (u != v)
- fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%04x != 0x%04x", msg, u, v);
- }
- static void
- expect32(fz_context *ctx, fz_stream *file, uint32_t v, const char *msg)
- {
- uint32_t u;
- u = fz_read_uint32_le(ctx, file);
- if (u != v)
- fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%08x != 0x%08x", msg, u, v);
- }
- #define REACHED 0xFFFFFFFF
- #define REACHED_KEEP 0xFFFFFFFE
- static void
- make_absolute(fz_context *ctx, fz_cfb_archive *cfb, char *prefix, int node, int depth)
- {
- uint32_t type;
- /* To avoid recursion where possible. */
- while (1)
- {
- if (node == (int)NOSTREAM)
- return;
- if (node < 0 || node >= cfb->count)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid tree");
- if (depth >= 32)
- fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree too deep");
- type = cfb->entries[node].t;
- if (type == REACHED || type == REACHED_KEEP)
- fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree has cycles");
- cfb->entries[node].t = (type == 2) ? REACHED_KEEP : REACHED;
- if (prefix)
- {
- size_t z0 = strlen(prefix);
- size_t z1 = strlen(cfb->entries[node].name);
- char *newname = fz_malloc(ctx, z0+z1+2);
- memcpy(newname, prefix, z0);
- newname[z0] = '/';
- memcpy(newname+z0+1, cfb->entries[node].name, z1+1);
- fz_free(ctx, cfb->entries[node].name);
- cfb->entries[node].name = newname;
- }
- if (cfb->entries[node].d == NOSTREAM && cfb->entries[node].r == NOSTREAM)
- {
- /* Handle 'l' without recursion, because there is no 'r' or 'd'. */
- node = cfb->entries[node].l;
- continue;
- }
- make_absolute(ctx, cfb, prefix, cfb->entries[node].l, depth+1);
- if (cfb->entries[node].d == NOSTREAM)
- {
- /* Handle 'r' without recursion, because there is no 'd'. */
- node = cfb->entries[node].r;
- continue;
- }
- make_absolute(ctx, cfb, prefix, cfb->entries[node].r, depth+1);
- /* Rather than recursing:
- * make_absolute(ctx, cfb, node == 0 ? NULL : cfb->entries[node].name, cfb->entries[node].d, depth+1);
- * instead just loop. */
- prefix = node == 0 ? NULL : cfb->entries[node].name;
- node = cfb->entries[node].d;
- }
- }
- static void
- absolutise_names(fz_context *ctx, fz_cfb_archive *cfb)
- {
- make_absolute(ctx, cfb, NULL, 0, 0);
- }
- static void
- strip_unused_names(fz_context *ctx, fz_cfb_archive *cfb)
- {
- int i, j;
- int n = cfb->count;
- /* Init i and j so that we always delete the root node. */
- fz_free(ctx, cfb->entries[0].name);
- for (i = 1, j = 0; i < n; i++)
- {
- if (cfb->entries[i].t == REACHED_KEEP)
- {
- if (i != j)
- cfb->entries[j] = cfb->entries[i];
- cfb->entries[j].t = i;
- j++;
- }
- else
- fz_free(ctx, cfb->entries[i].name);
- }
- cfb->count = j;
- }
- fz_archive *
- fz_open_cfb_archive_with_stream(fz_context *ctx, fz_stream *file)
- {
- fz_cfb_archive *cfb;
- uint8_t buffer[4096];
- uint32_t sector, slow_sector, slow_sector_flag;
- int i;
- if (!fz_is_cfb_archive(ctx, file))
- fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize cfb archive");
- cfb = fz_new_derived_archive(ctx, file, fz_cfb_archive);
- cfb->super.format = "cfb";
- cfb->super.count_entries = count_cfb_entries;
- cfb->super.list_entry = list_cfb_entry;
- cfb->super.has_entry = has_cfb_entry;
- cfb->super.read_entry = read_cfb_entry;
- cfb->super.open_entry = open_cfb_entry;
- cfb->super.drop_archive = drop_cfb_archive;
- fz_try(ctx)
- {
- fz_seek(ctx, file, 0, SEEK_SET);
- /* Read the header */
- expect(ctx, file, sig, 8, "Bad signature");
- expect(ctx, file, zeros, 16, "Bad CLSID");
- /* The minor version is SUPPOSED to be 0x3e, but we don't seem to be
- * able to rely on this. So just skip it. */
- (void)fz_read_uint16_le(ctx, file);
- cfb->major = fz_read_uint16_le(ctx, file);
- if (cfb->major != 3 && cfb->major != 4)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad major version of CFB: %d", cfb->major);
- expect16(ctx, file, 0xfffe, "Bad byte order");
- cfb->sector_shift = fz_read_uint16_le(ctx, file);
- if ((cfb->major == 3 && cfb->sector_shift != 9) ||
- (cfb->major == 4 && cfb->sector_shift != 12))
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad sector shift: %d", cfb->sector_shift);
- expect16(ctx, file, 6, "Bad mini section shift");
- expect(ctx, file, zeros, 6, "Bad padding");
- cfb->num_dir_sectors = fz_read_uint32_le(ctx, file);
- cfb->num_fat_sectors = fz_read_uint32_le(ctx, file);
- cfb->dir_sector0 = fz_read_uint32_le(ctx, file);
- (void)fz_read_uint32_le(ctx, file); /* Transaction signature number */
- expect32(ctx, file, 0x1000, "Bad mini stream cutoff size");
- cfb->mini_fat_sector0 = fz_read_uint32_le(ctx, file);
- cfb->num_mini_fat_sectors = fz_read_uint32_le(ctx, file);
- cfb->difat_sector0 = fz_read_uint32_le(ctx, file);
- cfb->num_difat_sectors = fz_read_uint32_le(ctx, file);
- for (i = 0; i < 109; i++)
- cfb->difat[i] = fz_read_uint32_le(ctx, file);
- cfb->fatcache_sector = (uint32_t)-1;
- cfb->minifatcache_sector = (uint32_t)-1;
- /* Read the directory entries. */
- /* On our first pass through, EVERYTHING goes into the entries. */
- sector = cfb->dir_sector0;
- slow_sector = sector;
- slow_sector_flag = 0;
- do
- {
- size_t z = ((size_t)1)<<cfb->sector_shift;
- size_t off;
- /* Fetch the sector. */
- fz_seek(ctx, file, ((int64_t)sector+1)<<cfb->sector_shift, SEEK_SET);
- read(ctx, file, buffer, z);
- for (off = 0; off < z; off += 128)
- {
- int count = 0;
- int type;
- int namelen = get16(buffer+off+64);
- if (namelen == 0)
- break;
- /* What flavour of object is this? */
- type = buffer[off+64+2];
- /* Ensure our entries list is long enough. */
- if (cfb->max == cfb->count)
- {
- int newmax = cfb->max * 2;
- if (newmax == 0)
- newmax = 32;
- cfb->entries = fz_realloc_array(ctx, cfb->entries, newmax, cfb_entry);
- cfb->max = newmax;
- }
- /* Count the name length in utf8 encoded bytes, including terminator. */
- for (i = 0; i < 64; i += 2)
- {
- int ucs = get16(buffer+off+i);
- if (ucs == 0)
- break;
- count += fz_runelen(ucs);
- }
- if (i+2 != namelen || i == 64)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed name in CFB directory");
- /* Copy the name. */
- cfb->entries[cfb->count++].name = fz_malloc(ctx, count + 1);
- count = 0;
- for (i = 0; i < 64; i += 2)
- {
- int ucs = buffer[off+i] + (buffer[off+i+1]<<8);
- if (ucs == 0)
- break;
- count += fz_runetochar(&cfb->entries[cfb->count-1].name[count], ucs);
- }
- cfb->entries[cfb->count-1].name[count] = 0;
- cfb->entries[cfb->count-1].sector = get32(buffer+off+128-12);
- cfb->entries[cfb->count-1].size = get_len(ctx, cfb, buffer+off+128-8);
- cfb->entries[cfb->count-1].l = get32(buffer+off+68);
- cfb->entries[cfb->count-1].r = get32(buffer+off+72);
- cfb->entries[cfb->count-1].d = get32(buffer+off+76);
- cfb->entries[cfb->count-1].t = type;
- #ifdef DEBUG_DIRENTRIES
- fz_write_printf(ctx, fz_stddbg(ctx), "%d: ", cfb->count-1);
- if (type == 1)
- fz_write_printf(ctx, fz_stddbg(ctx), "(storage) ");
- else if (type == 2)
- fz_write_printf(ctx, fz_stddbg(ctx), "(file) ");
- else if (type == 5)
- fz_write_printf(ctx, fz_stddbg(ctx), "(root) ");
- else
- fz_write_printf(ctx, fz_stddbg(ctx), "(%d?) ", type);
- fz_write_printf(ctx, fz_stddbg(ctx), "%q", cfb->entries[cfb->count-1].name);
- fz_write_printf(ctx, fz_stddbg(ctx), " @%x+%x\n", cfb->entries[cfb->count-1].sector, cfb->entries[cfb->count-1].size );
- if (cfb->entries[cfb->count-1].l <= MAXREGSID)
- fz_write_printf(ctx, fz_stddbg(ctx), "\tleft=%d\n", cfb->entries[cfb->count-1].l);
- if (cfb->entries[cfb->count-1].r <= MAXREGSID)
- fz_write_printf(ctx, fz_stddbg(ctx), "\tright=%d\n", cfb->entries[cfb->count-1].r);
- if (cfb->entries[cfb->count-1].d <= MAXREGSID)
- fz_write_printf(ctx, fz_stddbg(ctx), "\tchild=%d\n", cfb->entries[cfb->count-1].d);
- #endif
- /* Type 5 is just for the root. */
- if (type == 5)
- {
- cfb->mini_stream_sector0 = get32(buffer+off+128-12);
- cfb->mini_stream_len = get_len(ctx, cfb, buffer+off+128-8);
- }
- }
- /* To get the next sector, we need to read it from the FAT. */
- sector = read_fat(ctx, cfb, sector);
- slow_sector_flag = !slow_sector_flag;
- if (slow_sector_flag == 0)
- slow_sector = read_fat(ctx, cfb, slow_sector);
- if (slow_sector == sector)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT");
- }
- while (sector <= MAXREGSECT);
- absolutise_names(ctx, cfb);
- strip_unused_names(ctx, cfb);
- #ifdef DEBUG_DIRENTRIES
- for (i = 0; i < cfb->count; i++)
- fz_write_printf(ctx, fz_stddbg(ctx), "%d: %s (was %d)\n", i, cfb->entries[i].name, cfb->entries[i].t);
- #endif
- }
- fz_catch(ctx)
- {
- fz_drop_archive(ctx, &cfb->super);
- fz_rethrow(ctx);
- }
- return &cfb->super;
- }
- fz_archive *
- fz_open_cfb_archive(fz_context *ctx, const char *filename)
- {
- fz_archive *cfb = NULL;
- fz_stream *file;
- file = fz_open_file(ctx, filename);
- fz_try(ctx)
- cfb = fz_open_cfb_archive_with_stream(ctx, file);
- fz_always(ctx)
- fz_drop_stream(ctx, file);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return cfb;
- }
|