untar.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <limits.h>
  25. #define TYPE_NORMAL_OLD '\0'
  26. #define TYPE_NORMAL '0'
  27. #define TYPE_CONTIGUOUS '7'
  28. #define TYPE_LONG_NAME 'L'
  29. typedef struct
  30. {
  31. char *name;
  32. int64_t offset;
  33. int size;
  34. } tar_entry;
  35. typedef struct
  36. {
  37. fz_archive super;
  38. int count;
  39. tar_entry *entries;
  40. } fz_tar_archive;
  41. static inline int isoctdigit(char c)
  42. {
  43. return c >= '0' && c <= '7';
  44. }
  45. static inline int64_t otoi(const char *s)
  46. {
  47. int64_t value = 0;
  48. while (*s && isoctdigit(*s))
  49. {
  50. value *= 8;
  51. value += (*s) - '0';
  52. s++;
  53. }
  54. return value;
  55. }
  56. static void drop_tar_archive(fz_context *ctx, fz_archive *arch)
  57. {
  58. fz_tar_archive *tar = (fz_tar_archive *) arch;
  59. int i;
  60. for (i = 0; i < tar->count; ++i)
  61. fz_free(ctx, tar->entries[i].name);
  62. fz_free(ctx, tar->entries);
  63. }
  64. static int is_zeroed(fz_context *ctx, unsigned char *buf, size_t size)
  65. {
  66. size_t off;
  67. for (off = 0; off < size; off++)
  68. if (buf[off] != 0)
  69. return 0;
  70. return 1;
  71. }
  72. static void ensure_tar_entries(fz_context *ctx, fz_tar_archive *tar)
  73. {
  74. fz_stream *file = tar->super.file;
  75. unsigned char record[512];
  76. char *longname = NULL;
  77. char name[101];
  78. char octsize[13];
  79. char typeflag;
  80. int64_t offset, blocks, size;
  81. size_t n;
  82. tar->count = 0;
  83. fz_seek(ctx, file, 0, SEEK_SET);
  84. while (1)
  85. {
  86. offset = fz_tell(ctx, file);
  87. n = fz_read(ctx, file, record, nelem(record));
  88. if (n == 0)
  89. break;
  90. if (n < nelem(record))
  91. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar record");
  92. if (is_zeroed(ctx, record, nelem(record)))
  93. continue;
  94. memcpy(name, record + 0, nelem(name) - 1);
  95. name[nelem(name) - 1] = '\0';
  96. memcpy(octsize, record + 124, nelem(octsize) - 1);
  97. octsize[nelem(octsize) - 1] = '\0';
  98. size = otoi(octsize);
  99. if (size > INT_MAX)
  100. fz_throw(ctx, FZ_ERROR_FORMAT, "tar archive entry too large");
  101. typeflag = (char) record[156];
  102. if (typeflag == TYPE_LONG_NAME)
  103. {
  104. longname = fz_malloc(ctx, size + 1);
  105. fz_try(ctx)
  106. {
  107. n = fz_read(ctx, file, (unsigned char *) longname, size);
  108. if (n < (size_t) size)
  109. fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in tar long name entry name");
  110. longname[size] = '\0';
  111. }
  112. fz_catch(ctx)
  113. {
  114. fz_free(ctx, longname);
  115. fz_rethrow(ctx);
  116. }
  117. fz_seek(ctx, file, 512 - (size % 512), 1);
  118. }
  119. if (typeflag != TYPE_NORMAL_OLD && typeflag != TYPE_NORMAL &&
  120. typeflag != TYPE_CONTIGUOUS && typeflag != TYPE_LONG_NAME)
  121. continue;
  122. blocks = (size + 511) / 512;
  123. fz_seek(ctx, file, blocks * 512, 1);
  124. tar->entries = fz_realloc_array(ctx, tar->entries, tar->count + 1, tar_entry);
  125. tar->entries[tar->count].offset = offset;
  126. tar->entries[tar->count].size = size;
  127. if (longname != NULL)
  128. {
  129. tar->entries[tar->count].name = longname;
  130. longname = NULL;
  131. }
  132. else
  133. tar->entries[tar->count].name = fz_strdup(ctx, name);
  134. tar->count++;
  135. }
  136. }
  137. static tar_entry *lookup_tar_entry(fz_context *ctx, fz_tar_archive *tar, const char *name)
  138. {
  139. int i;
  140. for (i = 0; i < tar->count; i++)
  141. if (!fz_strcasecmp(name, tar->entries[i].name))
  142. return &tar->entries[i];
  143. return NULL;
  144. }
  145. static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
  146. {
  147. fz_tar_archive *tar = (fz_tar_archive *) arch;
  148. fz_stream *file = tar->super.file;
  149. tar_entry *ent;
  150. ent = lookup_tar_entry(ctx, tar, name);
  151. if (!ent)
  152. return NULL;
  153. fz_seek(ctx, file, ent->offset + 512, 0);
  154. return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file));
  155. }
  156. static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
  157. {
  158. fz_tar_archive *tar = (fz_tar_archive *) arch;
  159. fz_stream *file = tar->super.file;
  160. fz_buffer *ubuf;
  161. tar_entry *ent;
  162. ent = lookup_tar_entry(ctx, tar, name);
  163. if (!ent)
  164. return NULL;
  165. ubuf = fz_new_buffer(ctx, ent->size);
  166. fz_try(ctx)
  167. {
  168. fz_seek(ctx, file, ent->offset + 512, 0);
  169. ubuf->len = fz_read(ctx, file, ubuf->data, ent->size);
  170. if (ubuf->len != (size_t)ent->size)
  171. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot read entire archive entry");
  172. }
  173. fz_catch(ctx)
  174. {
  175. fz_drop_buffer(ctx, ubuf);
  176. fz_rethrow(ctx);
  177. }
  178. return ubuf;
  179. }
  180. static int has_tar_entry(fz_context *ctx, fz_archive *arch, const char *name)
  181. {
  182. fz_tar_archive *tar = (fz_tar_archive *) arch;
  183. tar_entry *ent = lookup_tar_entry(ctx, tar, name);
  184. return ent != NULL;
  185. }
  186. static const char *list_tar_entry(fz_context *ctx, fz_archive *arch, int idx)
  187. {
  188. fz_tar_archive *tar = (fz_tar_archive *) arch;
  189. if (idx < 0 || idx >= tar->count)
  190. return NULL;
  191. return tar->entries[idx].name;
  192. }
  193. static int count_tar_entries(fz_context *ctx, fz_archive *arch)
  194. {
  195. fz_tar_archive *tar = (fz_tar_archive *) arch;
  196. return tar->count;
  197. }
  198. static int isoct(unsigned char *d, int n)
  199. {
  200. while (--n > 0)
  201. {
  202. unsigned char c = *d++;
  203. if (c < '0' || c > '7')
  204. return 0;
  205. }
  206. return (*d == 0);
  207. }
  208. static int
  209. check_v7(fz_context *ctx, fz_stream *file)
  210. {
  211. unsigned char data[512];
  212. size_t n;
  213. int i;
  214. fz_seek(ctx, file, 0, SEEK_SET);
  215. n = fz_read(ctx, file, data, nelem(data));
  216. if (n != nelem(data))
  217. return 0;
  218. /* Skip over name. */
  219. for (i = 0; i < 100 && data[i] != 0; i++);
  220. /* We want at least 1 byte of name, and a zero terminator. */
  221. if (i == 0 || i == 100)
  222. return 0;
  223. /* Skip over a run of zero terminators. */
  224. for (; i < 100 && data[i] == 0; i++);
  225. if (i != 100)
  226. return 0;
  227. return (isoct(data+100, 8) &&
  228. isoct(data+108, 8) &&
  229. isoct(data+116, 8) &&
  230. isoct(data+124, 12) &&
  231. isoct(data+136, 12) &&
  232. isoct(data+148, 8));
  233. }
  234. int
  235. fz_is_tar_archive(fz_context *ctx, fz_stream *file)
  236. {
  237. const unsigned char gnusignature[6] = { 'u', 's', 't', 'a', 'r', ' ' };
  238. const unsigned char paxsignature[6] = { 'u', 's', 't', 'a', 'r', '\0' };
  239. const unsigned char v7signature[6] = { '\0', '\0', '\0', '\0', '\0', '\0' };
  240. unsigned char data[6];
  241. size_t n;
  242. fz_seek(ctx, file, 257, 0);
  243. n = fz_read(ctx, file, data, nelem(data));
  244. if (n != nelem(data))
  245. return 0;
  246. if (!memcmp(data, gnusignature, nelem(gnusignature)))
  247. return 1;
  248. if (!memcmp(data, paxsignature, nelem(paxsignature)))
  249. return 1;
  250. if (!memcmp(data, v7signature, nelem(v7signature)))
  251. return check_v7(ctx, file);
  252. return 0;
  253. }
  254. fz_archive *
  255. fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file)
  256. {
  257. fz_tar_archive *tar;
  258. if (!fz_is_tar_archive(ctx, file))
  259. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize tar archive");
  260. tar = fz_new_derived_archive(ctx, file, fz_tar_archive);
  261. tar->super.format = "tar";
  262. tar->super.count_entries = count_tar_entries;
  263. tar->super.list_entry = list_tar_entry;
  264. tar->super.has_entry = has_tar_entry;
  265. tar->super.read_entry = read_tar_entry;
  266. tar->super.open_entry = open_tar_entry;
  267. tar->super.drop_archive = drop_tar_archive;
  268. fz_try(ctx)
  269. {
  270. ensure_tar_entries(ctx, tar);
  271. }
  272. fz_catch(ctx)
  273. {
  274. fz_drop_archive(ctx, &tar->super);
  275. fz_rethrow(ctx);
  276. }
  277. return &tar->super;
  278. }
  279. fz_archive *
  280. fz_open_tar_archive(fz_context *ctx, const char *filename)
  281. {
  282. fz_archive *tar = NULL;
  283. fz_stream *file;
  284. file = fz_open_file(ctx, filename);
  285. fz_try(ctx)
  286. tar = fz_open_tar_archive_with_stream(ctx, file);
  287. fz_always(ctx)
  288. fz_drop_stream(ctx, file);
  289. fz_catch(ctx)
  290. fz_rethrow(ctx);
  291. return tar;
  292. }