uncfb.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861
  1. // Copyright (C) 2023-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <limits.h>
  25. #define MAXREGSID 0xfffffffa
  26. #define NOSTREAM 0xffffffff
  27. #define MAXREGSECT 0xfffffffa
  28. #define DIRSECT 0xfffffffc
  29. #define FATSECT 0xfffffffd
  30. #define ENDOFCHAIN 0xfffffffe
  31. #define FREESECT 0xffffffff
  32. #undef DEBUG_DIRENTRIES
  33. typedef struct
  34. {
  35. char *name;
  36. uint32_t sector;
  37. uint64_t size;
  38. uint32_t l, r, d;
  39. /* Flag word used for various different things.
  40. * initially the type, then marked as to whether the DFS reached it
  41. * then finally the original node number for debug. */
  42. uint32_t t;
  43. } cfb_entry;
  44. typedef struct
  45. {
  46. fz_archive super;
  47. int max;
  48. int count;
  49. cfb_entry *entries;
  50. /* Header information from the file */
  51. uint16_t major;
  52. uint16_t sector_shift;
  53. uint32_t num_dir_sectors;
  54. uint32_t num_fat_sectors;
  55. uint32_t dir_sector0;
  56. uint32_t mini_fat_sector0;
  57. uint32_t num_mini_fat_sectors;
  58. uint32_t difat_sector0;
  59. uint32_t num_difat_sectors;
  60. uint32_t mini_stream_sector0;
  61. uint64_t mini_stream_len;
  62. uint32_t difat[109];
  63. uint32_t fatcache_sector;
  64. uint8_t fatcache[4096];
  65. uint32_t minifatcache_real_sector;
  66. uint32_t minifatcache_sector;
  67. uint8_t minifatcache[4096];
  68. } fz_cfb_archive;
  69. static void
  70. read(fz_context *ctx, fz_stream *stm, uint8_t *buf, size_t size)
  71. {
  72. size_t n = fz_read(ctx, stm, buf, size);
  73. if (n != size)
  74. fz_throw(ctx, FZ_ERROR_FORMAT, "Short read in CFB handling");
  75. }
  76. static uint16_t
  77. get16(const uint8_t *b)
  78. {
  79. return b[0] + (b[1]<<8);
  80. }
  81. static uint32_t
  82. get32(const uint8_t *b)
  83. {
  84. return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
  85. }
  86. static uint64_t
  87. get64(const uint8_t *b)
  88. {
  89. return b[0] +
  90. (((uint64_t)b[1])<<8) +
  91. (((uint64_t)b[2])<<16) +
  92. (((uint64_t)b[3])<<24) +
  93. (((uint64_t)b[4])<<32) +
  94. (((uint64_t)b[5])<<40) +
  95. (((uint64_t)b[6])<<48) +
  96. (((uint64_t)b[7])<<56);
  97. }
  98. static uint64_t
  99. get_len(fz_context *ctx, fz_cfb_archive *cfb, const uint8_t *b)
  100. {
  101. uint64_t len = get64(b);
  102. /* In v3 files the top 32bits *should* be zero, but may not be. The
  103. * top bit of the lower 32bits should not be set though. */
  104. if (cfb->major == 3)
  105. {
  106. if (len & 0x80000000)
  107. fz_throw(ctx, FZ_ERROR_FORMAT, "Illegal length in CFB");
  108. len &= 0xFFFFFFFFU;
  109. }
  110. return len;
  111. }
  112. static void
  113. sector_seek(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint32_t offset)
  114. {
  115. fz_seek(ctx, cfb->super.file, ((sector + (uint64_t)1)<<cfb->sector_shift)+offset, SEEK_SET);
  116. }
  117. static uint32_t
  118. read_difat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
  119. {
  120. uint32_t entries_per_sector;
  121. uint32_t sect;
  122. if (sector < 109)
  123. {
  124. return cfb->difat[sector];
  125. }
  126. sector -= 109;
  127. /* Run down the difat chain until we find the right sector. */
  128. entries_per_sector = (1<<(cfb->sector_shift-2)) - 1;
  129. sect = cfb->difat_sector0;
  130. while (sector > entries_per_sector)
  131. {
  132. sector_seek(ctx, cfb, sect, entries_per_sector * 4);
  133. sect = fz_read_uint32_le(ctx, cfb->super.file);
  134. sector -= entries_per_sector;
  135. }
  136. /* Now get the actual entry. */
  137. sector_seek(ctx, cfb, sect, sector * 4);
  138. return fz_read_uint32_le(ctx, cfb->super.file);
  139. }
  140. static uint32_t
  141. read_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
  142. {
  143. uint32_t sector_size = 1<<cfb->sector_shift;
  144. /* We want to read the entry for sector 'sector' from the FAT. This
  145. * will be in FAT sector 'fatsect'. */
  146. uint32_t fatsect = sector>>(cfb->sector_shift-2);
  147. /* FAT sector fatsect will be physical sector real_sect. */
  148. uint32_t real_sect = read_difat(ctx, cfb, fatsect);
  149. if (real_sect > MAXREGSECT)
  150. fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt FAT");
  151. if (real_sect != cfb->fatcache_sector)
  152. {
  153. sector_seek(ctx, cfb, real_sect, 0);
  154. read(ctx, cfb->super.file, &cfb->fatcache[0], sector_size);
  155. cfb->fatcache_sector = real_sect;
  156. }
  157. sector &= (sector_size>>2)-1;
  158. return get32(&cfb->fatcache[sector*4]);
  159. }
  160. static uint32_t
  161. read_mini_fat(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector)
  162. {
  163. uint32_t sector_size = 1<<cfb->sector_shift;
  164. /* A mini fat sector has lots of mini sector numbers in (each 4 bytes) */
  165. uint32_t mini_sectors_in_mini_fat_sector = (1<<(cfb->sector_shift-2));
  166. /* We want to read the entry for sector 'sector' from the mini FAT. This
  167. * will be in mini FAT sector 'minifatsect'. */
  168. uint32_t minifatsect = sector / mini_sectors_in_mini_fat_sector;
  169. uint32_t index_within_minifatsect = sector - minifatsect * mini_sectors_in_mini_fat_sector;
  170. int cache_valid = 1;
  171. /* minifatsect is a count of how many sectors we are into the mini fat stream.
  172. * minifatsect_real_sector is the physical section that that corresponds to. */
  173. /* If we're behind our cache position, start from scratch. */
  174. if (minifatsect < cfb->minifatcache_sector)
  175. {
  176. cfb->minifatcache_real_sector = cfb->mini_fat_sector0;
  177. cfb->minifatcache_sector = 0;
  178. cache_valid = 0;
  179. }
  180. /* Skip forward until we are at the right position. */
  181. while (minifatsect != cfb->minifatcache_sector)
  182. {
  183. cfb->minifatcache_real_sector = read_fat(ctx, cfb, cfb->minifatcache_real_sector);
  184. cfb->minifatcache_sector++;
  185. cache_valid = 0;
  186. }
  187. /* Prime the cache if we just moved */
  188. if (!cache_valid)
  189. {
  190. sector_seek(ctx, cfb, cfb->minifatcache_real_sector, 0);
  191. read(ctx, cfb->super.file, cfb->minifatcache, sector_size);
  192. }
  193. return get32(&cfb->minifatcache[index_within_minifatsect*4]);
  194. }
  195. static void drop_cfb_archive(fz_context *ctx, fz_archive *arch)
  196. {
  197. fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
  198. int i;
  199. for (i = 0; i < cfb->count; ++i)
  200. fz_free(ctx, cfb->entries[i].name);
  201. fz_free(ctx, cfb->entries);
  202. }
  203. static cfb_entry *lookup_cfb_entry(fz_context *ctx, fz_cfb_archive *cfb, const char *name)
  204. {
  205. int i;
  206. for (i = 0; i < cfb->count; i++)
  207. if (!fz_strcasecmp(name, cfb->entries[i].name))
  208. return &cfb->entries[i];
  209. return NULL;
  210. }
  211. typedef struct
  212. {
  213. fz_cfb_archive *archive;
  214. uint32_t first_sector;
  215. uint32_t next_sector;
  216. uint32_t next_sector_slow;
  217. uint32_t next_sector_slow_flag;
  218. uint64_t pos_at_next_sector;
  219. uint64_t size;
  220. fz_stream *mini_stream;
  221. uint8_t buffer[4096];
  222. } cfb_state;
  223. static void
  224. cfb_close(fz_context *ctx, void *state_)
  225. {
  226. cfb_state *state = (cfb_state *)state_;
  227. fz_drop_archive(ctx, &state->archive->super);
  228. fz_drop_stream(ctx, state->mini_stream);
  229. fz_free(ctx, state);
  230. }
  231. static int
  232. cfb_next(fz_context *ctx, fz_stream *stm, size_t required)
  233. {
  234. cfb_state *state = stm->state;
  235. fz_cfb_archive *cfb = state->archive;
  236. uint64_t sector_size = ((uint64_t)1)<<cfb->sector_shift;
  237. uint64_t desired_sector_pos;
  238. uint32_t pos_in_sector;
  239. uint32_t this_sector;
  240. if ((uint64_t)stm->pos >= state->size)
  241. stm->eof = 1;
  242. if (stm->eof)
  243. {
  244. stm->rp = stm->wp = state->buffer;
  245. return EOF;
  246. }
  247. pos_in_sector = stm->pos & (sector_size-1);
  248. desired_sector_pos = stm->pos & ~(sector_size-1);
  249. if (desired_sector_pos != state->pos_at_next_sector)
  250. {
  251. state->pos_at_next_sector = 0;
  252. state->next_sector = state->first_sector;
  253. state->next_sector_slow = state->first_sector;
  254. state->next_sector_slow_flag = 0;
  255. }
  256. this_sector = state->next_sector;
  257. while (desired_sector_pos >= state->pos_at_next_sector)
  258. {
  259. this_sector = state->next_sector;
  260. state->next_sector = read_fat(ctx, cfb, state->next_sector);
  261. state->pos_at_next_sector += sector_size;
  262. if (state->next_sector > MAXREGSECT)
  263. break;
  264. state->next_sector_slow_flag = !state->next_sector_slow_flag;
  265. if (state->next_sector_slow_flag == 0)
  266. state->next_sector_slow = read_fat(ctx, cfb, state->next_sector_slow);
  267. if (state->next_sector_slow == state->next_sector)
  268. fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
  269. }
  270. if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
  271. fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
  272. if (this_sector > MAXREGSECT)
  273. fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
  274. sector_seek(ctx, cfb, this_sector, 0);
  275. read(ctx, cfb->super.file, state->buffer, sector_size);
  276. stm->rp = state->buffer;
  277. stm->wp = stm->rp + sector_size;
  278. stm->pos = state->pos_at_next_sector;
  279. if ((uint64_t)stm->pos >= state->size)
  280. {
  281. stm->wp -= (stm->pos - state->size);
  282. stm->pos = state->size;
  283. }
  284. stm->rp += pos_in_sector;
  285. return *stm->rp++;
  286. }
  287. #define MINI_SECTOR_SHIFT 6
  288. #define MINI_SECTOR_SIZE (1<<MINI_SECTOR_SHIFT)
  289. static int
  290. cfb_next_mini(fz_context *ctx, fz_stream *stm, size_t required)
  291. {
  292. cfb_state *state = stm->state;
  293. fz_cfb_archive *cfb = state->archive;
  294. uint64_t desired_sector_pos;
  295. uint32_t pos_in_sector;
  296. uint32_t this_sector;
  297. if ((uint64_t)stm->pos >= state->size)
  298. stm->eof = 1;
  299. if (stm->eof)
  300. {
  301. stm->rp = stm->wp = state->buffer;
  302. return EOF;
  303. }
  304. /* Whenever we say 'sector' here, we mean 'mini sector'. */
  305. pos_in_sector = stm->pos & (MINI_SECTOR_SIZE-1);
  306. desired_sector_pos = stm->pos & ~(MINI_SECTOR_SIZE-1);
  307. if (desired_sector_pos != state->pos_at_next_sector)
  308. {
  309. state->pos_at_next_sector = 0;
  310. state->next_sector = state->first_sector;
  311. state->next_sector_slow = state->first_sector;
  312. state->next_sector_slow_flag = 0;
  313. }
  314. this_sector = state->next_sector;
  315. while (desired_sector_pos >= state->pos_at_next_sector)
  316. {
  317. this_sector = state->next_sector;
  318. state->next_sector = read_mini_fat(ctx, cfb, state->next_sector);
  319. state->pos_at_next_sector += MINI_SECTOR_SIZE;
  320. if (state->next_sector > MAXREGSECT)
  321. break;
  322. state->next_sector_slow_flag = !state->next_sector_slow_flag;
  323. if (state->next_sector_slow_flag == 0)
  324. state->next_sector_slow = read_mini_fat(ctx, cfb, state->next_sector_slow);
  325. if (state->next_sector_slow == state->next_sector)
  326. fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT chain");
  327. }
  328. if (state->next_sector > MAXREGSECT && state->next_sector != ENDOFCHAIN)
  329. fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected entry in FAT chain");
  330. if (this_sector > MAXREGSECT)
  331. fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected end of FAT chain");
  332. fz_seek(ctx, state->mini_stream, ((uint64_t)this_sector) * MINI_SECTOR_SIZE, SEEK_SET);
  333. read(ctx, state->mini_stream, state->buffer, MINI_SECTOR_SIZE);
  334. stm->rp = state->buffer;
  335. stm->wp = stm->rp + MINI_SECTOR_SIZE;
  336. stm->pos += MINI_SECTOR_SIZE;
  337. if ((uint64_t)stm->pos >= state->size)
  338. {
  339. stm->wp -= (stm->pos - state->size);
  340. stm->pos = state->size;
  341. }
  342. stm->rp += pos_in_sector;
  343. return *stm->rp++;
  344. }
  345. static void cfb_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence)
  346. {
  347. cfb_state *state = stm->state;
  348. int64_t pos = stm->pos - (stm->wp - stm->rp);
  349. /* Convert to absolute pos */
  350. if (whence == 1)
  351. {
  352. offset += pos; /* Was relative to current pos */
  353. }
  354. else if (whence == 2)
  355. {
  356. offset += stm->pos; /* Was relative to end */
  357. }
  358. if (offset < 0)
  359. offset = 0;
  360. if ((uint64_t)offset > state->size)
  361. offset = (int64_t)state->size;
  362. stm->pos = offset;
  363. stm->rp = stm->wp = state->buffer;
  364. }
  365. static fz_stream *sector_stream(fz_context *ctx, fz_cfb_archive *cfb, uint32_t sector, uint64_t size)
  366. {
  367. fz_stream *stm;
  368. cfb_state *state = fz_malloc_struct(ctx, cfb_state);
  369. state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
  370. state->pos_at_next_sector = 0;
  371. state->size = size;
  372. state->first_sector = sector;
  373. state->next_sector = state->first_sector;
  374. state->next_sector_slow = state->first_sector;
  375. state->next_sector_slow_flag = 0;
  376. stm = fz_new_stream(ctx, state, cfb_next, cfb_close);
  377. stm->seek = cfb_seek;
  378. return stm;
  379. }
  380. static fz_stream *open_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
  381. {
  382. fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
  383. cfb_entry *ent;
  384. fz_stream *stm;
  385. cfb_state *state;
  386. ent = lookup_cfb_entry(ctx, cfb, name);
  387. if (!ent)
  388. return NULL;
  389. if (ent->size >= 0x1000)
  390. {
  391. /* Working from entire sectors */
  392. return sector_stream(ctx, cfb, ent->sector, ent->size);
  393. }
  394. /* We're working from the mini stream. */
  395. state = fz_malloc_struct(ctx, cfb_state);
  396. fz_try(ctx)
  397. {
  398. /* Let's get a stream that gets us the mini stream, and then work from that. */
  399. state->mini_stream = sector_stream(ctx, cfb, cfb->mini_stream_sector0, cfb->mini_stream_len);
  400. state->first_sector = ent->sector;
  401. state->pos_at_next_sector = 0;
  402. state->size = ent->size;
  403. state->next_sector = state->first_sector;
  404. state->next_sector_slow = state->first_sector;
  405. state->next_sector_slow_flag = 0;
  406. state->archive = (fz_cfb_archive *)fz_keep_archive(ctx, &cfb->super);
  407. }
  408. fz_catch(ctx)
  409. {
  410. fz_free(ctx, state);
  411. fz_rethrow(ctx);
  412. }
  413. stm = fz_new_stream(ctx, state, cfb_next_mini, cfb_close);
  414. stm->seek = cfb_seek;
  415. return stm;
  416. }
  417. static fz_buffer *read_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
  418. {
  419. fz_stream *stm;
  420. fz_buffer *buf = NULL;
  421. stm = open_cfb_entry(ctx, arch, name);
  422. if (!stm)
  423. return NULL;
  424. fz_try(ctx)
  425. buf = fz_read_all(ctx, stm, 1024);
  426. fz_always(ctx)
  427. fz_drop_stream(ctx, stm);
  428. fz_catch(ctx)
  429. fz_rethrow(ctx);
  430. return buf;
  431. }
  432. static int has_cfb_entry(fz_context *ctx, fz_archive *arch, const char *name)
  433. {
  434. fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
  435. cfb_entry *ent = lookup_cfb_entry(ctx, cfb, name);
  436. return ent != NULL;
  437. }
  438. static const char *list_cfb_entry(fz_context *ctx, fz_archive *arch, int idx)
  439. {
  440. fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
  441. if (idx < 0 || idx >= cfb->count)
  442. return NULL;
  443. return cfb->entries[idx].name;
  444. }
  445. static int count_cfb_entries(fz_context *ctx, fz_archive *arch)
  446. {
  447. fz_cfb_archive *cfb = (fz_cfb_archive *) arch;
  448. return cfb->count;
  449. }
  450. static const uint8_t sig[8] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
  451. static const uint8_t zeros[16] = { 0 };
  452. int
  453. fz_is_cfb_archive(fz_context *ctx, fz_stream *file)
  454. {
  455. uint8_t data[nelem(sig)];
  456. size_t n;
  457. fz_seek(ctx, file, 0, SEEK_SET);
  458. n = fz_read(ctx, file, data, nelem(data));
  459. if (n != nelem(data))
  460. return 0;
  461. if (!memcmp(data, sig, nelem(sig)))
  462. return 1;
  463. return 0;
  464. }
  465. static void
  466. expect(fz_context *ctx, fz_stream *file, const uint8_t *pattern, size_t n, const char *msg)
  467. {
  468. uint8_t buffer[64];
  469. assert(sizeof(buffer) >= n);
  470. read(ctx, file, buffer, n);
  471. if (memcmp(buffer, pattern, n) != 0)
  472. fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB", msg);
  473. }
  474. static void
  475. expect16(fz_context *ctx, fz_stream *file, uint16_t v, const char *msg)
  476. {
  477. uint16_t u;
  478. u = fz_read_uint16_le(ctx, file);
  479. if (u != v)
  480. fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%04x != 0x%04x", msg, u, v);
  481. }
  482. static void
  483. expect32(fz_context *ctx, fz_stream *file, uint32_t v, const char *msg)
  484. {
  485. uint32_t u;
  486. u = fz_read_uint32_le(ctx, file);
  487. if (u != v)
  488. fz_throw(ctx, FZ_ERROR_FORMAT, "%s in CFB: 0x%08x != 0x%08x", msg, u, v);
  489. }
  490. #define REACHED 0xFFFFFFFF
  491. #define REACHED_KEEP 0xFFFFFFFE
  492. static void
  493. make_absolute(fz_context *ctx, fz_cfb_archive *cfb, char *prefix, int node, int depth)
  494. {
  495. uint32_t type;
  496. /* To avoid recursion where possible. */
  497. while (1)
  498. {
  499. if (node == (int)NOSTREAM)
  500. return;
  501. if (node < 0 || node >= cfb->count)
  502. fz_throw(ctx, FZ_ERROR_FORMAT, "Invalid tree");
  503. if (depth >= 32)
  504. fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree too deep");
  505. type = cfb->entries[node].t;
  506. if (type == REACHED || type == REACHED_KEEP)
  507. fz_throw(ctx, FZ_ERROR_FORMAT, "CBF Tree has cycles");
  508. cfb->entries[node].t = (type == 2) ? REACHED_KEEP : REACHED;
  509. if (prefix)
  510. {
  511. size_t z0 = strlen(prefix);
  512. size_t z1 = strlen(cfb->entries[node].name);
  513. char *newname = fz_malloc(ctx, z0+z1+2);
  514. memcpy(newname, prefix, z0);
  515. newname[z0] = '/';
  516. memcpy(newname+z0+1, cfb->entries[node].name, z1+1);
  517. fz_free(ctx, cfb->entries[node].name);
  518. cfb->entries[node].name = newname;
  519. }
  520. if (cfb->entries[node].d == NOSTREAM && cfb->entries[node].r == NOSTREAM)
  521. {
  522. /* Handle 'l' without recursion, because there is no 'r' or 'd'. */
  523. node = cfb->entries[node].l;
  524. continue;
  525. }
  526. make_absolute(ctx, cfb, prefix, cfb->entries[node].l, depth+1);
  527. if (cfb->entries[node].d == NOSTREAM)
  528. {
  529. /* Handle 'r' without recursion, because there is no 'd'. */
  530. node = cfb->entries[node].r;
  531. continue;
  532. }
  533. make_absolute(ctx, cfb, prefix, cfb->entries[node].r, depth+1);
  534. /* Rather than recursing:
  535. * make_absolute(ctx, cfb, node == 0 ? NULL : cfb->entries[node].name, cfb->entries[node].d, depth+1);
  536. * instead just loop. */
  537. prefix = node == 0 ? NULL : cfb->entries[node].name;
  538. node = cfb->entries[node].d;
  539. }
  540. }
  541. static void
  542. absolutise_names(fz_context *ctx, fz_cfb_archive *cfb)
  543. {
  544. make_absolute(ctx, cfb, NULL, 0, 0);
  545. }
  546. static void
  547. strip_unused_names(fz_context *ctx, fz_cfb_archive *cfb)
  548. {
  549. int i, j;
  550. int n = cfb->count;
  551. /* Init i and j so that we always delete the root node. */
  552. fz_free(ctx, cfb->entries[0].name);
  553. for (i = 1, j = 0; i < n; i++)
  554. {
  555. if (cfb->entries[i].t == REACHED_KEEP)
  556. {
  557. if (i != j)
  558. cfb->entries[j] = cfb->entries[i];
  559. cfb->entries[j].t = i;
  560. j++;
  561. }
  562. else
  563. fz_free(ctx, cfb->entries[i].name);
  564. }
  565. cfb->count = j;
  566. }
  567. fz_archive *
  568. fz_open_cfb_archive_with_stream(fz_context *ctx, fz_stream *file)
  569. {
  570. fz_cfb_archive *cfb;
  571. uint8_t buffer[4096];
  572. uint32_t sector, slow_sector, slow_sector_flag;
  573. int i;
  574. if (!fz_is_cfb_archive(ctx, file))
  575. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize cfb archive");
  576. cfb = fz_new_derived_archive(ctx, file, fz_cfb_archive);
  577. cfb->super.format = "cfb";
  578. cfb->super.count_entries = count_cfb_entries;
  579. cfb->super.list_entry = list_cfb_entry;
  580. cfb->super.has_entry = has_cfb_entry;
  581. cfb->super.read_entry = read_cfb_entry;
  582. cfb->super.open_entry = open_cfb_entry;
  583. cfb->super.drop_archive = drop_cfb_archive;
  584. fz_try(ctx)
  585. {
  586. fz_seek(ctx, file, 0, SEEK_SET);
  587. /* Read the header */
  588. expect(ctx, file, sig, 8, "Bad signature");
  589. expect(ctx, file, zeros, 16, "Bad CLSID");
  590. /* The minor version is SUPPOSED to be 0x3e, but we don't seem to be
  591. * able to rely on this. So just skip it. */
  592. (void)fz_read_uint16_le(ctx, file);
  593. cfb->major = fz_read_uint16_le(ctx, file);
  594. if (cfb->major != 3 && cfb->major != 4)
  595. fz_throw(ctx, FZ_ERROR_FORMAT, "Bad major version of CFB: %d", cfb->major);
  596. expect16(ctx, file, 0xfffe, "Bad byte order");
  597. cfb->sector_shift = fz_read_uint16_le(ctx, file);
  598. if ((cfb->major == 3 && cfb->sector_shift != 9) ||
  599. (cfb->major == 4 && cfb->sector_shift != 12))
  600. fz_throw(ctx, FZ_ERROR_FORMAT, "Bad sector shift: %d", cfb->sector_shift);
  601. expect16(ctx, file, 6, "Bad mini section shift");
  602. expect(ctx, file, zeros, 6, "Bad padding");
  603. cfb->num_dir_sectors = fz_read_uint32_le(ctx, file);
  604. cfb->num_fat_sectors = fz_read_uint32_le(ctx, file);
  605. cfb->dir_sector0 = fz_read_uint32_le(ctx, file);
  606. (void)fz_read_uint32_le(ctx, file); /* Transaction signature number */
  607. expect32(ctx, file, 0x1000, "Bad mini stream cutoff size");
  608. cfb->mini_fat_sector0 = fz_read_uint32_le(ctx, file);
  609. cfb->num_mini_fat_sectors = fz_read_uint32_le(ctx, file);
  610. cfb->difat_sector0 = fz_read_uint32_le(ctx, file);
  611. cfb->num_difat_sectors = fz_read_uint32_le(ctx, file);
  612. for (i = 0; i < 109; i++)
  613. cfb->difat[i] = fz_read_uint32_le(ctx, file);
  614. cfb->fatcache_sector = (uint32_t)-1;
  615. cfb->minifatcache_sector = (uint32_t)-1;
  616. /* Read the directory entries. */
  617. /* On our first pass through, EVERYTHING goes into the entries. */
  618. sector = cfb->dir_sector0;
  619. slow_sector = sector;
  620. slow_sector_flag = 0;
  621. do
  622. {
  623. size_t z = ((size_t)1)<<cfb->sector_shift;
  624. size_t off;
  625. /* Fetch the sector. */
  626. fz_seek(ctx, file, ((int64_t)sector+1)<<cfb->sector_shift, SEEK_SET);
  627. read(ctx, file, buffer, z);
  628. for (off = 0; off < z; off += 128)
  629. {
  630. int count = 0;
  631. int type;
  632. int namelen = get16(buffer+off+64);
  633. if (namelen == 0)
  634. break;
  635. /* What flavour of object is this? */
  636. type = buffer[off+64+2];
  637. /* Ensure our entries list is long enough. */
  638. if (cfb->max == cfb->count)
  639. {
  640. int newmax = cfb->max * 2;
  641. if (newmax == 0)
  642. newmax = 32;
  643. cfb->entries = fz_realloc_array(ctx, cfb->entries, newmax, cfb_entry);
  644. cfb->max = newmax;
  645. }
  646. /* Count the name length in utf8 encoded bytes, including terminator. */
  647. for (i = 0; i < 64; i += 2)
  648. {
  649. int ucs = get16(buffer+off+i);
  650. if (ucs == 0)
  651. break;
  652. count += fz_runelen(ucs);
  653. }
  654. if (i+2 != namelen || i == 64)
  655. fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed name in CFB directory");
  656. /* Copy the name. */
  657. cfb->entries[cfb->count++].name = fz_malloc(ctx, count + 1);
  658. count = 0;
  659. for (i = 0; i < 64; i += 2)
  660. {
  661. int ucs = buffer[off+i] + (buffer[off+i+1]<<8);
  662. if (ucs == 0)
  663. break;
  664. count += fz_runetochar(&cfb->entries[cfb->count-1].name[count], ucs);
  665. }
  666. cfb->entries[cfb->count-1].name[count] = 0;
  667. cfb->entries[cfb->count-1].sector = get32(buffer+off+128-12);
  668. cfb->entries[cfb->count-1].size = get_len(ctx, cfb, buffer+off+128-8);
  669. cfb->entries[cfb->count-1].l = get32(buffer+off+68);
  670. cfb->entries[cfb->count-1].r = get32(buffer+off+72);
  671. cfb->entries[cfb->count-1].d = get32(buffer+off+76);
  672. cfb->entries[cfb->count-1].t = type;
  673. #ifdef DEBUG_DIRENTRIES
  674. fz_write_printf(ctx, fz_stddbg(ctx), "%d: ", cfb->count-1);
  675. if (type == 1)
  676. fz_write_printf(ctx, fz_stddbg(ctx), "(storage) ");
  677. else if (type == 2)
  678. fz_write_printf(ctx, fz_stddbg(ctx), "(file) ");
  679. else if (type == 5)
  680. fz_write_printf(ctx, fz_stddbg(ctx), "(root) ");
  681. else
  682. fz_write_printf(ctx, fz_stddbg(ctx), "(%d?) ", type);
  683. fz_write_printf(ctx, fz_stddbg(ctx), "%q", cfb->entries[cfb->count-1].name);
  684. fz_write_printf(ctx, fz_stddbg(ctx), " @%x+%x\n", cfb->entries[cfb->count-1].sector, cfb->entries[cfb->count-1].size );
  685. if (cfb->entries[cfb->count-1].l <= MAXREGSID)
  686. fz_write_printf(ctx, fz_stddbg(ctx), "\tleft=%d\n", cfb->entries[cfb->count-1].l);
  687. if (cfb->entries[cfb->count-1].r <= MAXREGSID)
  688. fz_write_printf(ctx, fz_stddbg(ctx), "\tright=%d\n", cfb->entries[cfb->count-1].r);
  689. if (cfb->entries[cfb->count-1].d <= MAXREGSID)
  690. fz_write_printf(ctx, fz_stddbg(ctx), "\tchild=%d\n", cfb->entries[cfb->count-1].d);
  691. #endif
  692. /* Type 5 is just for the root. */
  693. if (type == 5)
  694. {
  695. cfb->mini_stream_sector0 = get32(buffer+off+128-12);
  696. cfb->mini_stream_len = get_len(ctx, cfb, buffer+off+128-8);
  697. }
  698. }
  699. /* To get the next sector, we need to read it from the FAT. */
  700. sector = read_fat(ctx, cfb, sector);
  701. slow_sector_flag = !slow_sector_flag;
  702. if (slow_sector_flag == 0)
  703. slow_sector = read_fat(ctx, cfb, slow_sector);
  704. if (slow_sector == sector)
  705. fz_throw(ctx, FZ_ERROR_FORMAT, "Loop in FAT");
  706. }
  707. while (sector <= MAXREGSECT);
  708. absolutise_names(ctx, cfb);
  709. strip_unused_names(ctx, cfb);
  710. #ifdef DEBUG_DIRENTRIES
  711. for (i = 0; i < cfb->count; i++)
  712. fz_write_printf(ctx, fz_stddbg(ctx), "%d: %s (was %d)\n", i, cfb->entries[i].name, cfb->entries[i].t);
  713. #endif
  714. }
  715. fz_catch(ctx)
  716. {
  717. fz_drop_archive(ctx, &cfb->super);
  718. fz_rethrow(ctx);
  719. }
  720. return &cfb->super;
  721. }
  722. fz_archive *
  723. fz_open_cfb_archive(fz_context *ctx, const char *filename)
  724. {
  725. fz_archive *cfb = NULL;
  726. fz_stream *file;
  727. file = fz_open_file(ctx, filename);
  728. fz_try(ctx)
  729. cfb = fz_open_cfb_archive_with_stream(ctx, file);
  730. fz_always(ctx)
  731. fz_drop_stream(ctx, file);
  732. fz_catch(ctx)
  733. fz_rethrow(ctx);
  734. return cfb;
  735. }