pdfinfo.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. /*
  23. * Information tool.
  24. * Print information about the input pdf.
  25. */
  26. #include "mupdf/fitz.h"
  27. #include "mupdf/pdf.h"
  28. #include <string.h>
  29. #include <stdlib.h>
  30. #include <stdio.h>
  31. enum
  32. {
  33. DIMENSIONS = 0x01,
  34. FONTS = 0x02,
  35. IMAGES = 0x04,
  36. SHADINGS = 0x08,
  37. PATTERNS = 0x10,
  38. XOBJS = 0x20,
  39. ZUGFERD = 0x40,
  40. ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS | ZUGFERD
  41. };
  42. struct info
  43. {
  44. int page;
  45. pdf_obj *pageref;
  46. union {
  47. struct {
  48. pdf_obj *obj;
  49. } info;
  50. struct {
  51. pdf_obj *obj;
  52. } crypt;
  53. struct {
  54. pdf_obj *obj;
  55. fz_rect *bbox;
  56. } dim;
  57. struct {
  58. pdf_obj *obj;
  59. pdf_obj *subtype;
  60. pdf_obj *name;
  61. pdf_obj *encoding;
  62. } font;
  63. struct {
  64. pdf_obj *obj;
  65. pdf_obj *width;
  66. pdf_obj *height;
  67. pdf_obj *bpc;
  68. pdf_obj *filter;
  69. pdf_obj *cs;
  70. pdf_obj *altcs;
  71. } image;
  72. struct {
  73. pdf_obj *obj;
  74. pdf_obj *type;
  75. } shading;
  76. struct {
  77. pdf_obj *obj;
  78. pdf_obj *type;
  79. pdf_obj *paint;
  80. pdf_obj *tiling;
  81. pdf_obj *shading;
  82. } pattern;
  83. struct {
  84. pdf_obj *obj;
  85. pdf_obj *groupsubtype;
  86. pdf_obj *reference;
  87. } form;
  88. } u;
  89. };
  90. typedef struct
  91. {
  92. pdf_document *doc;
  93. fz_context *ctx;
  94. fz_output *out;
  95. int pagecount;
  96. struct info *dim;
  97. int dims;
  98. struct info *font;
  99. int fonts;
  100. struct info *image;
  101. int images;
  102. struct info *shading;
  103. int shadings;
  104. struct info *pattern;
  105. int patterns;
  106. struct info *form;
  107. int forms;
  108. struct info *psobj;
  109. int psobjs;
  110. } globals;
  111. static void clearinfo(fz_context *ctx, globals *glo)
  112. {
  113. int i;
  114. if (glo->dim)
  115. {
  116. for (i = 0; i < glo->dims; i++)
  117. fz_free(ctx, glo->dim[i].u.dim.bbox);
  118. fz_free(ctx, glo->dim);
  119. glo->dim = NULL;
  120. glo->dims = 0;
  121. }
  122. if (glo->font)
  123. {
  124. fz_free(ctx, glo->font);
  125. glo->font = NULL;
  126. glo->fonts = 0;
  127. }
  128. if (glo->image)
  129. {
  130. fz_free(ctx, glo->image);
  131. glo->image = NULL;
  132. glo->images = 0;
  133. }
  134. if (glo->shading)
  135. {
  136. fz_free(ctx, glo->shading);
  137. glo->shading = NULL;
  138. glo->shadings = 0;
  139. }
  140. if (glo->pattern)
  141. {
  142. fz_free(ctx, glo->pattern);
  143. glo->pattern = NULL;
  144. glo->patterns = 0;
  145. }
  146. if (glo->form)
  147. {
  148. fz_free(ctx, glo->form);
  149. glo->form = NULL;
  150. glo->forms = 0;
  151. }
  152. if (glo->psobj)
  153. {
  154. fz_free(ctx, glo->psobj);
  155. glo->psobj = NULL;
  156. glo->psobjs = 0;
  157. }
  158. }
  159. static void closexref(fz_context *ctx, globals *glo)
  160. {
  161. if (glo->doc)
  162. {
  163. pdf_drop_document(ctx, glo->doc);
  164. glo->doc = NULL;
  165. }
  166. clearinfo(ctx, glo);
  167. }
  168. static void
  169. infousage(void)
  170. {
  171. fprintf(stderr,
  172. "usage: mutool info [options] file.pdf [pages]\n"
  173. "\t-p -\tpassword for decryption\n"
  174. "\t-F\tlist fonts\n"
  175. "\t-I\tlist images\n"
  176. "\t-M\tlist dimensions\n"
  177. "\t-P\tlist patterns\n"
  178. "\t-S\tlist shadings\n"
  179. "\t-X\tlist form and postscript xobjects\n"
  180. "\t-Z\tlist ZUGFeRD info\n"
  181. "\tpages\tcomma separated list of page numbers and ranges\n"
  182. );
  183. }
  184. static void
  185. showglobalinfo(fz_context *ctx, globals *glo)
  186. {
  187. pdf_obj *obj;
  188. fz_output *out = glo->out;
  189. pdf_document *doc = glo->doc;
  190. int version = pdf_version(ctx, doc);
  191. fz_write_printf(ctx, out, "\nPDF-%d.%d\n", version / 10, version % 10);
  192. obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
  193. if (obj)
  194. {
  195. fz_write_printf(ctx, out, "Info object (%d 0 R):\n", pdf_to_num(ctx, obj));
  196. pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
  197. }
  198. obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
  199. if (obj)
  200. {
  201. fz_write_printf(ctx, out, "\nEncryption object (%d 0 R):\n", pdf_to_num(ctx, obj));
  202. pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
  203. }
  204. fz_write_printf(ctx, out, "\nPages: %d\n\n", glo->pagecount);
  205. }
  206. static void
  207. gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref)
  208. {
  209. fz_rect bbox;
  210. pdf_obj *obj;
  211. float unit;
  212. int j;
  213. obj = pdf_dict_get(ctx, pageref, PDF_NAME(MediaBox));
  214. if (!pdf_is_array(ctx, obj))
  215. return;
  216. bbox = pdf_to_rect(ctx, obj);
  217. unit = pdf_dict_get_real_default(ctx, pageref, PDF_NAME(UserUnit), 1);
  218. bbox.x0 *= unit;
  219. bbox.y0 *= unit;
  220. bbox.x1 *= unit;
  221. bbox.y1 *= unit;
  222. for (j = 0; j < glo->dims; j++)
  223. if (!memcmp(glo->dim[j].u.dim.bbox, &bbox, sizeof (fz_rect)))
  224. break;
  225. if (j < glo->dims)
  226. return;
  227. glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info);
  228. glo->dims++;
  229. glo->dim[glo->dims - 1].page = page;
  230. glo->dim[glo->dims - 1].pageref = pageref;
  231. glo->dim[glo->dims - 1].u.dim.bbox = NULL;
  232. glo->dim[glo->dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect));
  233. memcpy(glo->dim[glo->dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect));
  234. return;
  235. }
  236. static void
  237. gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  238. {
  239. int i, n;
  240. n = pdf_dict_len(ctx, dict);
  241. for (i = 0; i < n; i++)
  242. {
  243. pdf_obj *fontdict = NULL;
  244. pdf_obj *subtype = NULL;
  245. pdf_obj *basefont = NULL;
  246. pdf_obj *name = NULL;
  247. pdf_obj *encoding = NULL;
  248. int k;
  249. fontdict = pdf_dict_get_val(ctx, dict, i);
  250. if (!pdf_is_dict(ctx, fontdict))
  251. {
  252. fz_warn(ctx, "not a font dict (%d 0 R)", pdf_to_num(ctx, fontdict));
  253. continue;
  254. }
  255. subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype));
  256. basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont));
  257. if (!basefont || pdf_is_null(ctx, basefont))
  258. name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name));
  259. encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding));
  260. if (pdf_is_dict(ctx, encoding))
  261. encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
  262. for (k = 0; k < glo->fonts; k++)
  263. if (!pdf_objcmp(ctx, glo->font[k].u.font.obj, fontdict))
  264. break;
  265. if (k < glo->fonts)
  266. continue;
  267. glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info);
  268. glo->fonts++;
  269. glo->font[glo->fonts - 1].page = page;
  270. glo->font[glo->fonts - 1].pageref = pageref;
  271. glo->font[glo->fonts - 1].u.font.obj = fontdict;
  272. glo->font[glo->fonts - 1].u.font.subtype = subtype;
  273. glo->font[glo->fonts - 1].u.font.name = basefont ? basefont : name;
  274. glo->font[glo->fonts - 1].u.font.encoding = encoding;
  275. }
  276. }
  277. static void
  278. gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  279. {
  280. int i, n;
  281. n = pdf_dict_len(ctx, dict);
  282. for (i = 0; i < n; i++)
  283. {
  284. pdf_obj *imagedict;
  285. pdf_obj *type;
  286. pdf_obj *width;
  287. pdf_obj *height;
  288. pdf_obj *bpc = NULL;
  289. pdf_obj *filter = NULL;
  290. pdf_obj *cs = NULL;
  291. pdf_obj *altcs;
  292. int k;
  293. imagedict = pdf_dict_get_val(ctx, dict, i);
  294. if (!pdf_is_dict(ctx, imagedict))
  295. {
  296. fz_warn(ctx, "not an image dict (%d 0 R)", pdf_to_num(ctx, imagedict));
  297. continue;
  298. }
  299. type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype));
  300. if (!pdf_name_eq(ctx, type, PDF_NAME(Image)))
  301. continue;
  302. filter = pdf_dict_get(ctx, imagedict, PDF_NAME(Filter));
  303. altcs = NULL;
  304. cs = pdf_dict_get(ctx, imagedict, PDF_NAME(ColorSpace));
  305. if (pdf_is_array(ctx, cs))
  306. {
  307. pdf_obj *cses = cs;
  308. cs = pdf_array_get(ctx, cses, 0);
  309. if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || pdf_name_eq(ctx, cs, PDF_NAME(Separation)))
  310. {
  311. altcs = pdf_array_get(ctx, cses, 2);
  312. if (pdf_is_array(ctx, altcs))
  313. altcs = pdf_array_get(ctx, altcs, 0);
  314. }
  315. }
  316. width = pdf_dict_get(ctx, imagedict, PDF_NAME(Width));
  317. height = pdf_dict_get(ctx, imagedict, PDF_NAME(Height));
  318. bpc = pdf_dict_get(ctx, imagedict, PDF_NAME(BitsPerComponent));
  319. for (k = 0; k < glo->images; k++)
  320. if (!pdf_objcmp(ctx, glo->image[k].u.image.obj, imagedict))
  321. break;
  322. if (k < glo->images)
  323. continue;
  324. glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info);
  325. glo->images++;
  326. glo->image[glo->images - 1].page = page;
  327. glo->image[glo->images - 1].pageref = pageref;
  328. glo->image[glo->images - 1].u.image.obj = imagedict;
  329. glo->image[glo->images - 1].u.image.width = width;
  330. glo->image[glo->images - 1].u.image.height = height;
  331. glo->image[glo->images - 1].u.image.bpc = bpc;
  332. glo->image[glo->images - 1].u.image.filter = filter;
  333. glo->image[glo->images - 1].u.image.cs = cs;
  334. glo->image[glo->images - 1].u.image.altcs = altcs;
  335. }
  336. }
  337. static void
  338. gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  339. {
  340. int i, n;
  341. n = pdf_dict_len(ctx, dict);
  342. for (i = 0; i < n; i++)
  343. {
  344. pdf_obj *xobjdict;
  345. pdf_obj *type;
  346. pdf_obj *subtype;
  347. pdf_obj *group;
  348. pdf_obj *groupsubtype;
  349. pdf_obj *reference;
  350. int k;
  351. xobjdict = pdf_dict_get_val(ctx, dict, i);
  352. if (!pdf_is_dict(ctx, xobjdict))
  353. {
  354. fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
  355. continue;
  356. }
  357. type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
  358. if (!pdf_name_eq(ctx, type, PDF_NAME(Form)))
  359. continue;
  360. subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
  361. if (!pdf_name_eq(ctx, subtype, PDF_NAME(PS)))
  362. continue;
  363. group = pdf_dict_get(ctx, xobjdict, PDF_NAME(Group));
  364. groupsubtype = pdf_dict_get(ctx, group, PDF_NAME(S));
  365. reference = pdf_dict_get(ctx, xobjdict, PDF_NAME(Ref));
  366. for (k = 0; k < glo->forms; k++)
  367. if (!pdf_objcmp(ctx, glo->form[k].u.form.obj, xobjdict))
  368. break;
  369. if (k < glo->forms)
  370. continue;
  371. glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info);
  372. glo->forms++;
  373. glo->form[glo->forms - 1].page = page;
  374. glo->form[glo->forms - 1].pageref = pageref;
  375. glo->form[glo->forms - 1].u.form.obj = xobjdict;
  376. glo->form[glo->forms - 1].u.form.groupsubtype = groupsubtype;
  377. glo->form[glo->forms - 1].u.form.reference = reference;
  378. }
  379. }
  380. static void
  381. gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  382. {
  383. int i, n;
  384. n = pdf_dict_len(ctx, dict);
  385. for (i = 0; i < n; i++)
  386. {
  387. pdf_obj *xobjdict;
  388. pdf_obj *type;
  389. pdf_obj *subtype;
  390. int k;
  391. xobjdict = pdf_dict_get_val(ctx, dict, i);
  392. if (!pdf_is_dict(ctx, xobjdict))
  393. {
  394. fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
  395. continue;
  396. }
  397. type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
  398. subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
  399. if (!pdf_name_eq(ctx, type, PDF_NAME(PS)) &&
  400. (!pdf_name_eq(ctx, type, PDF_NAME(Form)) || !pdf_name_eq(ctx, subtype, PDF_NAME(PS))))
  401. continue;
  402. for (k = 0; k < glo->psobjs; k++)
  403. if (!pdf_objcmp(ctx, glo->psobj[k].u.form.obj, xobjdict))
  404. break;
  405. if (k < glo->psobjs)
  406. continue;
  407. glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info);
  408. glo->psobjs++;
  409. glo->psobj[glo->psobjs - 1].page = page;
  410. glo->psobj[glo->psobjs - 1].pageref = pageref;
  411. glo->psobj[glo->psobjs - 1].u.form.obj = xobjdict;
  412. }
  413. }
  414. static void
  415. gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  416. {
  417. int i, n;
  418. n = pdf_dict_len(ctx, dict);
  419. for (i = 0; i < n; i++)
  420. {
  421. pdf_obj *shade;
  422. pdf_obj *type;
  423. int k;
  424. shade = pdf_dict_get_val(ctx, dict, i);
  425. if (!pdf_is_dict(ctx, shade))
  426. {
  427. fz_warn(ctx, "not a shading dict (%d 0 R)", pdf_to_num(ctx, shade));
  428. continue;
  429. }
  430. type = pdf_dict_get(ctx, shade, PDF_NAME(ShadingType));
  431. if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 7)
  432. {
  433. fz_warn(ctx, "not a shading type (%d 0 R)", pdf_to_num(ctx, shade));
  434. type = NULL;
  435. }
  436. for (k = 0; k < glo->shadings; k++)
  437. if (!pdf_objcmp(ctx, glo->shading[k].u.shading.obj, shade))
  438. break;
  439. if (k < glo->shadings)
  440. continue;
  441. glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info);
  442. glo->shadings++;
  443. glo->shading[glo->shadings - 1].page = page;
  444. glo->shading[glo->shadings - 1].pageref = pageref;
  445. glo->shading[glo->shadings - 1].u.shading.obj = shade;
  446. glo->shading[glo->shadings - 1].u.shading.type = type;
  447. }
  448. }
  449. static void
  450. gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
  451. {
  452. int i, n;
  453. n = pdf_dict_len(ctx, dict);
  454. for (i = 0; i < n; i++)
  455. {
  456. pdf_obj *patterndict;
  457. pdf_obj *type;
  458. pdf_obj *paint = NULL;
  459. pdf_obj *tiling = NULL;
  460. pdf_obj *shading = NULL;
  461. int k;
  462. patterndict = pdf_dict_get_val(ctx, dict, i);
  463. if (!pdf_is_dict(ctx, patterndict))
  464. {
  465. fz_warn(ctx, "not a pattern dict (%d 0 R)", pdf_to_num(ctx, patterndict));
  466. continue;
  467. }
  468. type = pdf_dict_get(ctx, patterndict, PDF_NAME(PatternType));
  469. if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 2)
  470. {
  471. fz_warn(ctx, "not a pattern type (%d 0 R)", pdf_to_num(ctx, patterndict));
  472. type = NULL;
  473. }
  474. if (pdf_to_int(ctx, type) == 1)
  475. {
  476. paint = pdf_dict_get(ctx, patterndict, PDF_NAME(PaintType));
  477. if (!pdf_is_int(ctx, paint) || pdf_to_int(ctx, paint) < 1 || pdf_to_int(ctx, paint) > 2)
  478. {
  479. fz_warn(ctx, "not a pattern paint type (%d 0 R)", pdf_to_num(ctx, patterndict));
  480. paint = NULL;
  481. }
  482. tiling = pdf_dict_get(ctx, patterndict, PDF_NAME(TilingType));
  483. if (!pdf_is_int(ctx, tiling) || pdf_to_int(ctx, tiling) < 1 || pdf_to_int(ctx, tiling) > 3)
  484. {
  485. fz_warn(ctx, "not a pattern tiling type (%d 0 R)", pdf_to_num(ctx, patterndict));
  486. tiling = NULL;
  487. }
  488. }
  489. else
  490. {
  491. shading = pdf_dict_get(ctx, patterndict, PDF_NAME(Shading));
  492. }
  493. for (k = 0; k < glo->patterns; k++)
  494. if (!pdf_objcmp(ctx, glo->pattern[k].u.pattern.obj, patterndict))
  495. break;
  496. if (k < glo->patterns)
  497. continue;
  498. glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info);
  499. glo->patterns++;
  500. glo->pattern[glo->patterns - 1].page = page;
  501. glo->pattern[glo->patterns - 1].pageref = pageref;
  502. glo->pattern[glo->patterns - 1].u.pattern.obj = patterndict;
  503. glo->pattern[glo->patterns - 1].u.pattern.type = type;
  504. glo->pattern[glo->patterns - 1].u.pattern.paint = paint;
  505. glo->pattern[glo->patterns - 1].u.pattern.tiling = tiling;
  506. glo->pattern[glo->patterns - 1].u.pattern.shading = shading;
  507. }
  508. }
  509. static void
  510. gatherresourceinfo(fz_context *ctx, pdf_mark_list *mark_list, globals *glo, int page, pdf_obj *obj, int show)
  511. {
  512. pdf_obj *rsrc;
  513. pdf_obj *pageref;
  514. pdf_obj *font;
  515. pdf_obj *xobj;
  516. pdf_obj *shade;
  517. pdf_obj *pattern;
  518. int i;
  519. /* stop on cyclic resource dependencies */
  520. if (pdf_mark_list_push(ctx, mark_list, obj))
  521. return;
  522. rsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
  523. pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
  524. if (!pageref)
  525. fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
  526. font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font));
  527. if (show & FONTS && font && !pdf_mark_list_push(ctx, mark_list, font))
  528. {
  529. int n;
  530. gatherfonts(ctx, glo, page, pageref, font);
  531. n = pdf_dict_len(ctx, font);
  532. for (i = 0; i < n; i++)
  533. {
  534. gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, font, i), show);
  535. }
  536. }
  537. xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject));
  538. if (show & (IMAGES|XOBJS) && xobj && !pdf_mark_list_push(ctx, mark_list, xobj))
  539. {
  540. int n;
  541. if (show & IMAGES)
  542. gatherimages(ctx, glo, page, pageref, xobj);
  543. if (show & XOBJS)
  544. {
  545. gatherforms(ctx, glo, page, pageref, xobj);
  546. gatherpsobjs(ctx, glo, page, pageref, xobj);
  547. }
  548. n = pdf_dict_len(ctx, xobj);
  549. for (i = 0; i < n; i++)
  550. {
  551. gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, xobj, i), show);
  552. }
  553. }
  554. shade = pdf_dict_get(ctx, rsrc, PDF_NAME(Shading));
  555. if (show & SHADINGS && shade && !pdf_mark_list_push(ctx, mark_list, shade))
  556. gathershadings(ctx, glo, page, pageref, shade);
  557. pattern = pdf_dict_get(ctx, rsrc, PDF_NAME(Pattern));
  558. if (show & PATTERNS && pattern && !pdf_mark_list_push(ctx, mark_list, pattern))
  559. {
  560. int n;
  561. gatherpatterns(ctx, glo, page, pageref, pattern);
  562. n = pdf_dict_len(ctx, pattern);
  563. for (i = 0; i < n; i++)
  564. {
  565. gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, pattern, i), show);
  566. }
  567. }
  568. }
  569. static void
  570. gatherpageinfo(fz_context *ctx, globals *glo, int page, int show)
  571. {
  572. pdf_mark_list mark_list;
  573. pdf_obj *pageref;
  574. pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
  575. if (!pageref)
  576. fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
  577. gatherdimensions(ctx, glo, page, pageref);
  578. pdf_mark_list_init(ctx, &mark_list);
  579. fz_try(ctx)
  580. gatherresourceinfo(ctx, &mark_list, glo, page, pageref, show);
  581. fz_always(ctx)
  582. pdf_mark_list_free(ctx, &mark_list);
  583. fz_catch(ctx)
  584. fz_rethrow(ctx);
  585. }
  586. static void
  587. printinfo(fz_context *ctx, globals *glo, char *filename, int show, int page)
  588. {
  589. int i;
  590. int j;
  591. fz_output *out = glo->out;
  592. #define PAGE_FMT_zu "\t%d\t(%d 0 R):\t"
  593. if (show & DIMENSIONS && glo->dims > 0)
  594. {
  595. fz_write_printf(ctx, out, "Mediaboxes (%d):\n", glo->dims);
  596. for (i = 0; i < glo->dims; i++)
  597. {
  598. fz_write_printf(ctx, out, PAGE_FMT_zu "[ %g %g %g %g ]\n",
  599. glo->dim[i].page,
  600. pdf_to_num(ctx, glo->dim[i].pageref),
  601. glo->dim[i].u.dim.bbox->x0,
  602. glo->dim[i].u.dim.bbox->y0,
  603. glo->dim[i].u.dim.bbox->x1,
  604. glo->dim[i].u.dim.bbox->y1);
  605. }
  606. fz_write_printf(ctx, out, "\n");
  607. }
  608. if (show & FONTS && glo->fonts > 0)
  609. {
  610. fz_write_printf(ctx, out, "Fonts (%d):\n", glo->fonts);
  611. for (i = 0; i < glo->fonts; i++)
  612. {
  613. fz_write_printf(ctx, out, PAGE_FMT_zu "%s '%s' %s%s(%d 0 R)\n",
  614. glo->font[i].page,
  615. pdf_to_num(ctx, glo->font[i].pageref),
  616. pdf_to_name(ctx, glo->font[i].u.font.subtype),
  617. pdf_to_name(ctx, glo->font[i].u.font.name),
  618. glo->font[i].u.font.encoding ? pdf_to_name(ctx, glo->font[i].u.font.encoding) : "",
  619. glo->font[i].u.font.encoding ? " " : "",
  620. pdf_to_num(ctx, glo->font[i].u.font.obj));
  621. }
  622. fz_write_printf(ctx, out, "\n");
  623. }
  624. if (show & IMAGES && glo->images > 0)
  625. {
  626. fz_write_printf(ctx, out, "Images (%d):\n", glo->images);
  627. for (i = 0; i < glo->images; i++)
  628. {
  629. char *cs = NULL;
  630. char *altcs = NULL;
  631. fz_write_printf(ctx, out, PAGE_FMT_zu "[ ",
  632. glo->image[i].page,
  633. pdf_to_num(ctx, glo->image[i].pageref));
  634. if (pdf_is_array(ctx, glo->image[i].u.image.filter))
  635. {
  636. int n = pdf_array_len(ctx, glo->image[i].u.image.filter);
  637. for (j = 0; j < n; j++)
  638. {
  639. pdf_obj *obj = pdf_array_get(ctx, glo->image[i].u.image.filter, j);
  640. char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
  641. if (strstr(filter, "Decode"))
  642. *(strstr(filter, "Decode")) = '\0';
  643. fz_write_printf(ctx, out, "%s%s",
  644. filter,
  645. j == pdf_array_len(ctx, glo->image[i].u.image.filter) - 1 ? "" : " ");
  646. fz_free(ctx, filter);
  647. }
  648. }
  649. else if (glo->image[i].u.image.filter)
  650. {
  651. pdf_obj *obj = glo->image[i].u.image.filter;
  652. char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
  653. if (strstr(filter, "Decode"))
  654. *(strstr(filter, "Decode")) = '\0';
  655. fz_write_printf(ctx, out, "%s", filter);
  656. fz_free(ctx, filter);
  657. }
  658. else
  659. fz_write_printf(ctx, out, "Raw");
  660. if (glo->image[i].u.image.cs)
  661. {
  662. cs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.cs));
  663. if (!strncmp(cs, "Device", 6))
  664. {
  665. size_t len = strlen(cs + 6);
  666. memmove(cs + 3, cs + 6, len + 1);
  667. cs[3 + len + 1] = '\0';
  668. }
  669. if (strstr(cs, "ICC"))
  670. fz_strlcpy(cs, "ICC", 4);
  671. if (strstr(cs, "Indexed"))
  672. fz_strlcpy(cs, "Idx", 4);
  673. if (strstr(cs, "Pattern"))
  674. fz_strlcpy(cs, "Pat", 4);
  675. if (strstr(cs, "Separation"))
  676. fz_strlcpy(cs, "Sep", 4);
  677. }
  678. if (glo->image[i].u.image.altcs)
  679. {
  680. altcs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.altcs));
  681. if (!strncmp(altcs, "Device", 6))
  682. {
  683. size_t len = strlen(altcs + 6);
  684. memmove(altcs + 3, altcs + 6, len + 1);
  685. altcs[3 + len + 1] = '\0';
  686. }
  687. if (strstr(altcs, "ICC"))
  688. fz_strlcpy(altcs, "ICC", 4);
  689. if (strstr(altcs, "Indexed"))
  690. fz_strlcpy(altcs, "Idx", 4);
  691. if (strstr(altcs, "Pattern"))
  692. fz_strlcpy(altcs, "Pat", 4);
  693. if (strstr(altcs, "Separation"))
  694. fz_strlcpy(altcs, "Sep", 4);
  695. }
  696. fz_write_printf(ctx, out, " ] %dx%d %dbpc %s%s%s (%d 0 R)\n",
  697. pdf_to_int(ctx, glo->image[i].u.image.width),
  698. pdf_to_int(ctx, glo->image[i].u.image.height),
  699. glo->image[i].u.image.bpc ? pdf_to_int(ctx, glo->image[i].u.image.bpc) : 1,
  700. glo->image[i].u.image.cs ? cs : "ImageMask",
  701. glo->image[i].u.image.altcs ? " " : "",
  702. glo->image[i].u.image.altcs ? altcs : "",
  703. pdf_to_num(ctx, glo->image[i].u.image.obj));
  704. fz_free(ctx, cs);
  705. fz_free(ctx, altcs);
  706. }
  707. fz_write_printf(ctx, out, "\n");
  708. }
  709. if (show & SHADINGS && glo->shadings > 0)
  710. {
  711. fz_write_printf(ctx, out, "Shading patterns (%d):\n", glo->shadings);
  712. for (i = 0; i < glo->shadings; i++)
  713. {
  714. char *shadingtype[] =
  715. {
  716. "",
  717. "Function",
  718. "Axial",
  719. "Radial",
  720. "Triangle mesh",
  721. "Lattice",
  722. "Coons patch",
  723. "Tensor patch",
  724. };
  725. fz_write_printf(ctx, out, PAGE_FMT_zu "%s (%d 0 R)\n",
  726. glo->shading[i].page,
  727. pdf_to_num(ctx, glo->shading[i].pageref),
  728. shadingtype[pdf_to_int(ctx, glo->shading[i].u.shading.type)],
  729. pdf_to_num(ctx, glo->shading[i].u.shading.obj));
  730. }
  731. fz_write_printf(ctx, out, "\n");
  732. }
  733. if (show & PATTERNS && glo->patterns > 0)
  734. {
  735. fz_write_printf(ctx, out, "Patterns (%d):\n", glo->patterns);
  736. for (i = 0; i < glo->patterns; i++)
  737. {
  738. if (pdf_to_int(ctx, glo->pattern[i].u.pattern.type) == 1)
  739. {
  740. char *painttype[] =
  741. {
  742. "",
  743. "Colored",
  744. "Uncolored",
  745. };
  746. char *tilingtype[] =
  747. {
  748. "",
  749. "Constant",
  750. "No distortion",
  751. "Constant/fast tiling",
  752. };
  753. fz_write_printf(ctx, out, PAGE_FMT_zu "Tiling %s %s (%d 0 R)\n",
  754. glo->pattern[i].page,
  755. pdf_to_num(ctx, glo->pattern[i].pageref),
  756. painttype[pdf_to_int(ctx, glo->pattern[i].u.pattern.paint)],
  757. tilingtype[pdf_to_int(ctx, glo->pattern[i].u.pattern.tiling)],
  758. pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
  759. }
  760. else
  761. {
  762. fz_write_printf(ctx, out, PAGE_FMT_zu "Shading %d 0 R (%d 0 R)\n",
  763. glo->pattern[i].page,
  764. pdf_to_num(ctx, glo->pattern[i].pageref),
  765. pdf_to_num(ctx, glo->pattern[i].u.pattern.shading),
  766. pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
  767. }
  768. }
  769. fz_write_printf(ctx, out, "\n");
  770. }
  771. if (show & XOBJS && glo->forms > 0)
  772. {
  773. fz_write_printf(ctx, out, "Form xobjects (%d):\n", glo->forms);
  774. for (i = 0; i < glo->forms; i++)
  775. {
  776. fz_write_printf(ctx, out, PAGE_FMT_zu "Form%s%s%s%s (%d 0 R)\n",
  777. glo->form[i].page,
  778. pdf_to_num(ctx, glo->form[i].pageref),
  779. glo->form[i].u.form.groupsubtype ? " " : "",
  780. glo->form[i].u.form.groupsubtype ? pdf_to_name(ctx, glo->form[i].u.form.groupsubtype) : "",
  781. glo->form[i].u.form.groupsubtype ? " Group" : "",
  782. glo->form[i].u.form.reference ? " Reference" : "",
  783. pdf_to_num(ctx, glo->form[i].u.form.obj));
  784. }
  785. fz_write_printf(ctx, out, "\n");
  786. }
  787. if (show & XOBJS && glo->psobjs > 0)
  788. {
  789. fz_write_printf(ctx, out, "Postscript xobjects (%d):\n", glo->psobjs);
  790. for (i = 0; i < glo->psobjs; i++)
  791. {
  792. fz_write_printf(ctx, out, PAGE_FMT_zu "(%d 0 R)\n",
  793. glo->psobj[i].page,
  794. pdf_to_num(ctx, glo->psobj[i].pageref),
  795. pdf_to_num(ctx, glo->psobj[i].u.form.obj));
  796. }
  797. fz_write_printf(ctx, out, "\n");
  798. }
  799. }
  800. static void
  801. showinfo(fz_context *ctx, globals *glo, char *filename, int show, const char *pagelist)
  802. {
  803. int page, spage, epage;
  804. int allpages;
  805. int pagecount;
  806. fz_output *out = glo->out;
  807. if (!glo->doc)
  808. {
  809. infousage();
  810. fz_throw(ctx, FZ_ERROR_GENERIC, "Cannot show info without document");
  811. }
  812. allpages = !strcmp(pagelist, "1-N");
  813. pagecount = pdf_count_pages(ctx, glo->doc);
  814. while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
  815. {
  816. if (allpages)
  817. fz_write_printf(ctx, out, "Retrieving info from pages %d-%d...\n", spage, epage);
  818. for (page = spage; page <= epage; page++)
  819. {
  820. gatherpageinfo(ctx, glo, page, show);
  821. if (!allpages)
  822. {
  823. fz_write_printf(ctx, out, "Page %d:\n", page);
  824. printinfo(ctx, glo, filename, show, page);
  825. fz_write_printf(ctx, out, "\n");
  826. clearinfo(ctx, glo);
  827. }
  828. }
  829. }
  830. if (allpages)
  831. printinfo(ctx, glo, filename, show, -1);
  832. }
  833. static void
  834. showzugferd(fz_context *ctx, globals *glo)
  835. {
  836. float version;
  837. fz_output *out = glo->out;
  838. enum pdf_zugferd_profile profile = pdf_zugferd_profile(ctx, glo->doc, &version);
  839. fz_buffer *buf;
  840. if (profile == PDF_NOT_ZUGFERD)
  841. {
  842. fz_write_printf(ctx, out, "Not a ZUGFeRD file.\n");
  843. return;
  844. }
  845. fz_write_printf(ctx, out, "ZUGFeRD version %g\n", version);
  846. fz_write_printf(ctx, out, "%s profile\n", pdf_zugferd_profile_to_string(ctx, profile));
  847. fz_write_printf(ctx, out, "Embedded XML:\n");
  848. buf = pdf_zugferd_xml(ctx, glo->doc);
  849. fz_write_buffer(ctx, out, buf);
  850. fz_drop_buffer(ctx, buf);
  851. fz_write_printf(ctx, out, "\n\n");
  852. }
  853. static void
  854. pdfinfo_info(fz_context *ctx, fz_output *out, char *filename, char *password, int show, char *argv[], int argc)
  855. {
  856. enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state;
  857. int argidx = 0;
  858. globals glo = { 0 };
  859. glo.out = out;
  860. glo.ctx = ctx;
  861. state = NO_FILE_OPENED;
  862. fz_try(ctx)
  863. {
  864. while (argidx < argc)
  865. {
  866. if (state == NO_FILE_OPENED || !fz_is_page_range(ctx, argv[argidx]))
  867. {
  868. if (state == NO_INFO_GATHERED)
  869. {
  870. showinfo(ctx, &glo, filename, show, "1-N");
  871. }
  872. closexref(ctx, &glo);
  873. filename = argv[argidx];
  874. fz_write_printf(ctx, out, "%s:\n", filename);
  875. glo.doc = pdf_open_document(glo.ctx, filename);
  876. if (pdf_needs_password(ctx, glo.doc))
  877. if (!pdf_authenticate_password(ctx, glo.doc, password))
  878. fz_throw(glo.ctx, FZ_ERROR_ARGUMENT, "cannot authenticate password: %s", filename);
  879. glo.pagecount = pdf_count_pages(ctx, glo.doc);
  880. showglobalinfo(ctx, &glo);
  881. state = NO_INFO_GATHERED;
  882. if (show & ZUGFERD)
  883. showzugferd(ctx, &glo);
  884. }
  885. else
  886. {
  887. showinfo(ctx, &glo, filename, show, argv[argidx]);
  888. state = INFO_SHOWN;
  889. }
  890. argidx++;
  891. }
  892. if (state == NO_INFO_GATHERED)
  893. showinfo(ctx, &glo, filename, show, "1-N");
  894. }
  895. fz_always(ctx)
  896. closexref(ctx, &glo);
  897. fz_catch(ctx)
  898. fz_rethrow(ctx);
  899. }
  900. int pdfinfo_main(int argc, char **argv)
  901. {
  902. char *filename = "";
  903. char *password = "";
  904. int show = ALL;
  905. int c;
  906. int ret;
  907. fz_context *ctx;
  908. while ((c = fz_getopt(argc, argv, "FISPXMZp:")) != -1)
  909. {
  910. switch (c)
  911. {
  912. case 'F': if (show == ALL) show = FONTS; else show |= FONTS; break;
  913. case 'I': if (show == ALL) show = IMAGES; else show |= IMAGES; break;
  914. case 'S': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break;
  915. case 'P': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break;
  916. case 'X': if (show == ALL) show = XOBJS; else show |= XOBJS; break;
  917. case 'M': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break;
  918. case 'Z': if (show == ALL) show = ZUGFERD; else show |= ZUGFERD; break;
  919. case 'p': password = fz_optarg; break;
  920. default:
  921. infousage();
  922. return 1;
  923. }
  924. }
  925. if (fz_optind == argc)
  926. {
  927. infousage();
  928. return 1;
  929. }
  930. ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
  931. if (!ctx)
  932. {
  933. fprintf(stderr, "cannot initialise context\n");
  934. exit(1);
  935. }
  936. ret = 0;
  937. fz_try(ctx)
  938. pdfinfo_info(ctx, fz_stdout(ctx), filename, password, show, &argv[fz_optind], argc-fz_optind);
  939. fz_catch(ctx)
  940. {
  941. fz_report_error(ctx);
  942. ret = 1;
  943. }
  944. fz_drop_context(ctx);
  945. return ret;
  946. }