archive.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. enum
  25. {
  26. FZ_ARCHIVE_HANDLER_MAX = 32
  27. };
  28. struct fz_archive_handler_context
  29. {
  30. int refs;
  31. int count;
  32. const fz_archive_handler *handler[FZ_ARCHIVE_HANDLER_MAX];
  33. };
  34. fz_stream *
  35. fz_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
  36. {
  37. fz_stream *stream = fz_try_open_archive_entry(ctx, arch, name);
  38. if (stream == NULL)
  39. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
  40. return stream;
  41. }
  42. fz_stream *
  43. fz_try_open_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
  44. {
  45. char *local_name;
  46. fz_stream *stream = NULL;
  47. if (arch == NULL || !arch->open_entry)
  48. return NULL;
  49. local_name = fz_cleanname_strdup(ctx, name);
  50. fz_var(stream);
  51. fz_try(ctx)
  52. stream = arch->open_entry(ctx, arch, local_name);
  53. fz_always(ctx)
  54. fz_free(ctx, local_name);
  55. fz_catch(ctx)
  56. fz_rethrow(ctx);
  57. return stream;
  58. }
  59. fz_buffer *
  60. fz_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
  61. {
  62. fz_buffer *buf = fz_try_read_archive_entry(ctx, arch, name);
  63. if (buf == NULL)
  64. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find entry %s", name);
  65. return buf;
  66. }
  67. fz_buffer *
  68. fz_try_read_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
  69. {
  70. char *local_name;
  71. fz_buffer *buf = NULL;
  72. if (arch == NULL || !arch->read_entry || !arch->has_entry || name == NULL)
  73. return NULL;
  74. local_name = fz_cleanname_strdup(ctx, name);
  75. fz_var(buf);
  76. fz_try(ctx)
  77. {
  78. if (!arch->has_entry(ctx, arch, local_name))
  79. break;
  80. buf = arch->read_entry(ctx, arch, local_name);
  81. }
  82. fz_always(ctx)
  83. fz_free(ctx, local_name);
  84. fz_catch(ctx)
  85. fz_rethrow(ctx);
  86. return buf;
  87. }
  88. int
  89. fz_has_archive_entry(fz_context *ctx, fz_archive *arch, const char *name)
  90. {
  91. char *local_name;
  92. int res = 0;
  93. if (arch == NULL)
  94. return 0;
  95. if (!arch->has_entry)
  96. return 0;
  97. local_name = fz_cleanname_strdup(ctx, name);
  98. fz_var(res);
  99. fz_try(ctx)
  100. res = arch->has_entry(ctx, arch, local_name);
  101. fz_always(ctx)
  102. fz_free(ctx, local_name);
  103. fz_catch(ctx)
  104. fz_rethrow(ctx);
  105. return res;
  106. }
  107. const char *
  108. fz_list_archive_entry(fz_context *ctx, fz_archive *arch, int idx)
  109. {
  110. if (arch == 0)
  111. return NULL;
  112. if (!arch->list_entry)
  113. return NULL;
  114. return arch->list_entry(ctx, arch, idx);
  115. }
  116. int
  117. fz_count_archive_entries(fz_context *ctx, fz_archive *arch)
  118. {
  119. if (arch == NULL)
  120. return 0;
  121. if (!arch->count_entries)
  122. return 0;
  123. return arch->count_entries(ctx, arch);
  124. }
  125. const char *
  126. fz_archive_format(fz_context *ctx, fz_archive *arch)
  127. {
  128. if (arch == NULL)
  129. return "undefined";
  130. return arch->format;
  131. }
  132. fz_archive *
  133. fz_new_archive_of_size(fz_context *ctx, fz_stream *file, int size)
  134. {
  135. fz_archive *arch;
  136. arch = Memento_label(fz_calloc(ctx, 1, size), "fz_archive");
  137. arch->refs = 1;
  138. arch->file = fz_keep_stream(ctx, file);
  139. return arch;
  140. }
  141. fz_archive *
  142. fz_try_open_archive_with_stream(fz_context *ctx, fz_stream *file)
  143. {
  144. fz_archive *arch = NULL;
  145. int i;
  146. if (file == NULL)
  147. return NULL;
  148. for (i = 0; i < ctx->archive->count; i++)
  149. {
  150. fz_seek(ctx, file, 0, SEEK_SET);
  151. if (ctx->archive->handler[i]->recognize(ctx, file))
  152. {
  153. arch = ctx->archive->handler[i]->open(ctx, file);
  154. if (arch)
  155. return arch;
  156. }
  157. }
  158. return NULL;
  159. }
  160. fz_archive *
  161. fz_open_archive_with_stream(fz_context *ctx, fz_stream *file)
  162. {
  163. fz_archive *arch = fz_try_open_archive_with_stream(ctx, file);
  164. if (arch == NULL)
  165. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize archive");
  166. return arch;
  167. }
  168. fz_archive *
  169. fz_open_archive(fz_context *ctx, const char *filename)
  170. {
  171. fz_stream *file;
  172. fz_archive *arch = NULL;
  173. file = fz_open_file(ctx, filename);
  174. fz_try(ctx)
  175. arch = fz_open_archive_with_stream(ctx, file);
  176. fz_always(ctx)
  177. fz_drop_stream(ctx, file);
  178. fz_catch(ctx)
  179. fz_rethrow(ctx);
  180. return arch;
  181. }
  182. fz_archive *
  183. fz_keep_archive(fz_context *ctx, fz_archive *arch)
  184. {
  185. return (fz_archive *)fz_keep_imp(ctx, arch, &arch->refs);
  186. }
  187. void
  188. fz_drop_archive(fz_context *ctx, fz_archive *arch)
  189. {
  190. if (fz_drop_imp(ctx, arch, &arch->refs))
  191. {
  192. if (arch->drop_archive)
  193. arch->drop_archive(ctx, arch);
  194. fz_drop_stream(ctx, arch->file);
  195. fz_free(ctx, arch);
  196. }
  197. }
  198. /* In-memory archive using a fz_tree holding fz_buffers */
  199. typedef struct
  200. {
  201. fz_archive super;
  202. fz_tree *tree;
  203. } fz_tree_archive;
  204. static int has_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
  205. {
  206. fz_tree *tree = ((fz_tree_archive*)arch)->tree;
  207. fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
  208. return ent != NULL;
  209. }
  210. static fz_buffer *read_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
  211. {
  212. fz_tree *tree = ((fz_tree_archive*)arch)->tree;
  213. fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
  214. return fz_keep_buffer(ctx, ent);
  215. }
  216. static fz_stream *open_tree_entry(fz_context *ctx, fz_archive *arch, const char *name)
  217. {
  218. fz_tree *tree = ((fz_tree_archive*)arch)->tree;
  219. fz_buffer *ent = fz_tree_lookup(ctx, tree, name);
  220. return fz_open_buffer(ctx, ent);
  221. }
  222. static void drop_tree_archive_entry(fz_context *ctx, void *ent)
  223. {
  224. fz_drop_buffer(ctx, ent);
  225. }
  226. static void drop_tree_archive(fz_context *ctx, fz_archive *arch)
  227. {
  228. fz_tree *tree = ((fz_tree_archive*)arch)->tree;
  229. fz_drop_tree(ctx, tree, drop_tree_archive_entry);
  230. }
  231. fz_archive *
  232. fz_new_tree_archive(fz_context *ctx, fz_tree *tree)
  233. {
  234. fz_tree_archive *arch;
  235. arch = fz_new_derived_archive(ctx, NULL, fz_tree_archive);
  236. arch->super.format = "tree";
  237. arch->super.has_entry = has_tree_entry;
  238. arch->super.read_entry = read_tree_entry;
  239. arch->super.open_entry = open_tree_entry;
  240. arch->super.drop_archive = drop_tree_archive;
  241. arch->tree = tree;
  242. return &arch->super;
  243. }
  244. void
  245. fz_tree_archive_add_buffer(fz_context *ctx, fz_archive *arch_, const char *name, fz_buffer *buf)
  246. {
  247. fz_tree_archive *arch = (fz_tree_archive *)arch_;
  248. if (arch == NULL || arch->super.has_entry != has_tree_entry)
  249. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
  250. buf = fz_keep_buffer(ctx, buf);
  251. fz_try(ctx)
  252. arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
  253. fz_catch(ctx)
  254. {
  255. fz_drop_buffer(ctx, buf);
  256. fz_rethrow(ctx);
  257. }
  258. }
  259. void
  260. fz_tree_archive_add_data(fz_context *ctx, fz_archive *arch_, const char *name, const void *data, size_t size)
  261. {
  262. fz_tree_archive *arch = (fz_tree_archive *)arch_;
  263. fz_buffer *buf;
  264. if (arch == NULL || arch->super.has_entry != has_tree_entry)
  265. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot insert into a non-tree archive");
  266. buf = fz_new_buffer_from_copied_data(ctx, data, size);
  267. fz_try(ctx)
  268. arch->tree = fz_tree_insert(ctx, arch->tree, name, buf);
  269. fz_catch(ctx)
  270. {
  271. fz_drop_buffer(ctx, buf);
  272. fz_rethrow(ctx);
  273. }
  274. }
  275. typedef struct
  276. {
  277. fz_archive *arch;
  278. char *dir;
  279. } multi_archive_entry;
  280. typedef struct
  281. {
  282. fz_archive super;
  283. int len;
  284. int max;
  285. multi_archive_entry *sub;
  286. } fz_multi_archive;
  287. static int has_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
  288. {
  289. fz_multi_archive *arch = (fz_multi_archive *)arch_;
  290. int i;
  291. for (i = arch->len-1; i >= 0; i--)
  292. {
  293. multi_archive_entry *e = &arch->sub[i];
  294. const char *subname = name;
  295. if (e->dir)
  296. {
  297. size_t n = strlen(e->dir);
  298. if (strncmp(e->dir, name, n) != 0)
  299. continue;
  300. subname += n;
  301. }
  302. if (fz_has_archive_entry(ctx, arch->sub[i].arch, subname))
  303. return 1;
  304. }
  305. return 0;
  306. }
  307. static fz_buffer *read_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
  308. {
  309. fz_multi_archive *arch = (fz_multi_archive *)arch_;
  310. int i;
  311. fz_buffer *res = NULL;
  312. for (i = arch->len-1; i >= 0; i--)
  313. {
  314. multi_archive_entry *e = &arch->sub[i];
  315. const char *subname = name;
  316. if (e->dir)
  317. {
  318. size_t n = strlen(e->dir);
  319. if (strncmp(e->dir, name, n) != 0)
  320. continue;
  321. subname += n;
  322. }
  323. res = fz_try_read_archive_entry(ctx, arch->sub[i].arch, subname);
  324. if (res)
  325. break;
  326. }
  327. return res;
  328. }
  329. static fz_stream *open_multi_entry(fz_context *ctx, fz_archive *arch_, const char *name)
  330. {
  331. fz_multi_archive *arch = (fz_multi_archive *)arch_;
  332. int i;
  333. fz_stream *res = NULL;
  334. for (i = arch->len-1; i >= 0; i--)
  335. {
  336. multi_archive_entry *e = &arch->sub[i];
  337. const char *subname = name;
  338. if (e->dir)
  339. {
  340. size_t n = strlen(e->dir);
  341. if (strncmp(e->dir, name, n) != 0)
  342. continue;
  343. subname += n;
  344. }
  345. res = fz_open_archive_entry(ctx, arch->sub[i].arch, subname);
  346. if (res)
  347. break;
  348. }
  349. return res;
  350. }
  351. static void drop_multi_archive(fz_context *ctx, fz_archive *arch_)
  352. {
  353. fz_multi_archive *arch = (fz_multi_archive *)arch_;
  354. int i;
  355. for (i = arch->len-1; i >= 0; i--)
  356. {
  357. multi_archive_entry *e = &arch->sub[i];
  358. fz_free(ctx, e->dir);
  359. fz_drop_archive(ctx, e->arch);
  360. }
  361. fz_free(ctx, arch->sub);
  362. }
  363. fz_archive *
  364. fz_new_multi_archive(fz_context *ctx)
  365. {
  366. fz_multi_archive *arch;
  367. arch = fz_new_derived_archive(ctx, NULL, fz_multi_archive);
  368. arch->super.format = "multi";
  369. arch->super.has_entry = has_multi_entry;
  370. arch->super.read_entry = read_multi_entry;
  371. arch->super.open_entry = open_multi_entry;
  372. arch->super.drop_archive = drop_multi_archive;
  373. arch->max = 0;
  374. arch->len = 0;
  375. arch->sub = NULL;
  376. return &arch->super;
  377. }
  378. void
  379. fz_mount_multi_archive(fz_context *ctx, fz_archive *arch_, fz_archive *sub, const char *path)
  380. {
  381. fz_multi_archive *arch = (fz_multi_archive *)arch_;
  382. char *clean_path = NULL;
  383. if (arch->super.has_entry != has_multi_entry)
  384. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot mount within a non-multi archive");
  385. if (arch->len == arch->max)
  386. {
  387. int n = arch->max ? arch->max * 2 : 8;
  388. arch->sub = fz_realloc(ctx, arch->sub, sizeof(*arch->sub) * n);
  389. arch->max = n;
  390. }
  391. /* If we have a path, then strip any trailing slashes, and add just one. */
  392. if (path)
  393. {
  394. clean_path = fz_cleanname_strdup(ctx, path);
  395. if (clean_path[0] == '.' && clean_path[1] == 0)
  396. {
  397. fz_free(ctx, clean_path);
  398. clean_path = NULL;
  399. }
  400. else
  401. {
  402. /* Do a strcat without doing a strcat to avoid the compiler
  403. * complaining at us. We know that n here will be <= n above
  404. * so this is safe. */
  405. size_t n = strlen(clean_path);
  406. clean_path[n] = '/';
  407. clean_path[n + 1] = 0;
  408. }
  409. }
  410. arch->sub[arch->len].arch = fz_keep_archive(ctx, sub);
  411. arch->sub[arch->len].dir = clean_path;
  412. arch->len++;
  413. }
  414. static const fz_archive_handler fz_zip_archive_handler =
  415. {
  416. fz_is_zip_archive,
  417. fz_open_zip_archive_with_stream
  418. };
  419. static const fz_archive_handler fz_tar_archive_handler =
  420. {
  421. fz_is_tar_archive,
  422. fz_open_tar_archive_with_stream
  423. };
  424. const fz_archive_handler fz_libarchive_archive_handler =
  425. {
  426. fz_is_libarchive_archive,
  427. fz_open_libarchive_archive_with_stream
  428. };
  429. const fz_archive_handler fz_cfb_archive_handler =
  430. {
  431. fz_is_cfb_archive,
  432. fz_open_cfb_archive_with_stream
  433. };
  434. void fz_new_archive_handler_context(fz_context *ctx)
  435. {
  436. ctx->archive = fz_malloc_struct(ctx, fz_archive_handler_context);
  437. ctx->archive->refs = 1;
  438. fz_register_archive_handler(ctx, &fz_zip_archive_handler);
  439. fz_register_archive_handler(ctx, &fz_tar_archive_handler);
  440. #ifdef HAVE_LIBARCHIVE
  441. fz_register_archive_handler(ctx, &fz_libarchive_archive_handler);
  442. #endif
  443. fz_register_archive_handler(ctx, &fz_cfb_archive_handler);
  444. }
  445. fz_archive_handler_context *fz_keep_archive_handler_context(fz_context *ctx)
  446. {
  447. if (!ctx || !ctx->archive)
  448. return NULL;
  449. return fz_keep_imp(ctx, ctx->archive, &ctx->archive->refs);
  450. }
  451. void fz_drop_archive_handler_context(fz_context *ctx)
  452. {
  453. if (!ctx)
  454. return;
  455. if (fz_drop_imp(ctx, ctx->archive, &ctx->archive->refs))
  456. {
  457. fz_free(ctx, ctx->archive);
  458. ctx->archive = NULL;
  459. }
  460. }
  461. void fz_register_archive_handler(fz_context *ctx, const fz_archive_handler *handler)
  462. {
  463. fz_archive_handler_context *ac;
  464. int i;
  465. if (!handler)
  466. return;
  467. ac = ctx->archive;
  468. if (ac == NULL)
  469. fz_throw(ctx, FZ_ERROR_ARGUMENT, "archive handler list not found");
  470. for (i = 0; i < ac->count; i++)
  471. if (ac->handler[i] == handler)
  472. return;
  473. if (ac->count >= FZ_ARCHIVE_HANDLER_MAX)
  474. fz_throw(ctx, FZ_ERROR_LIMIT, "Too many archive handlers");
  475. ac->handler[ac->count++] = handler;
  476. }