pdftrim.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // Copyright (C) 2004-2023 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. /* PDF content trimming tool. */
  23. #include "mupdf/fitz.h"
  24. #include "mupdf/pdf.h"
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <ctype.h>
  29. typedef struct
  30. {
  31. fz_rect cullbox;
  32. int exclude;
  33. } culler_data_t;
  34. static int
  35. culler(fz_context *ctx, void *opaque, fz_rect r, fz_cull_type type)
  36. {
  37. culler_data_t *cd = (culler_data_t *)opaque;
  38. r = fz_intersect_rect(r, cd->cullbox);
  39. if (cd->exclude)
  40. {
  41. if (!fz_is_empty_rect(r))
  42. return 1;
  43. }
  44. else
  45. {
  46. if (fz_is_empty_rect(r))
  47. return 1;
  48. }
  49. return 0;
  50. }
  51. static void
  52. rewrite_page_streams(fz_context *ctx, pdf_document *doc, int page_num, fz_box_type box, float *margins, int exclude, int fallback)
  53. {
  54. pdf_page *page = pdf_load_page(ctx, doc, page_num);
  55. pdf_filter_options options = { 0 };
  56. pdf_filter_factory list[2] = { 0 };
  57. pdf_sanitize_filter_options sopts = { 0 };
  58. pdf_annot *annot;
  59. culler_data_t cd;
  60. cd.exclude = exclude;
  61. sopts.opaque = &cd;
  62. sopts.culler = culler;
  63. options.filters = list;
  64. options.recurse = 1;
  65. list[0].filter = pdf_new_sanitize_filter;
  66. list[0].options = &sopts;
  67. fz_try(ctx)
  68. {
  69. switch (box)
  70. {
  71. default:
  72. case FZ_MEDIA_BOX:
  73. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(MediaBox));
  74. break;
  75. case FZ_BLEED_BOX:
  76. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(BleedBox));
  77. break;
  78. case FZ_CROP_BOX:
  79. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(CropBox));
  80. break;
  81. case FZ_TRIM_BOX:
  82. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(TrimBox));
  83. break;
  84. case FZ_ART_BOX:
  85. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(ArtBox));
  86. break;
  87. }
  88. cd.cullbox.x0 += margins[3];
  89. cd.cullbox.y0 += margins[2];
  90. cd.cullbox.x1 -= margins[1];
  91. cd.cullbox.y1 -= margins[0];
  92. if (fz_is_empty_rect(cd.cullbox) && fallback && box != FZ_MEDIA_BOX)
  93. {
  94. fprintf(stderr, "Falling back to Mediabox for page %d\n", page_num);
  95. cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(MediaBox));
  96. }
  97. if (fz_is_empty_rect(cd.cullbox))
  98. {
  99. fprintf(stderr, "No box found for page %d\n", page_num);
  100. break;
  101. }
  102. pdf_filter_page_contents(ctx, doc, page, &options);
  103. for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
  104. pdf_filter_annot_contents(ctx, doc, annot, &options);
  105. }
  106. fz_always(ctx)
  107. fz_drop_page(ctx, &page->super);
  108. fz_catch(ctx)
  109. fz_rethrow(ctx);
  110. }
  111. static char *
  112. skip_comma(char *s)
  113. {
  114. while (isspace(*s))
  115. s++;
  116. if (*s == ',')
  117. s++;
  118. while (isspace(*s))
  119. s++;
  120. return s;
  121. }
  122. static void
  123. read_margins(float *margin, char *arg)
  124. {
  125. char *e;
  126. /* A single one reads for all margins. */
  127. margin[0] = fz_strtof(arg, &e);
  128. margin[1] = margin[2] = margin[3] = margin[0];
  129. e = skip_comma(e);
  130. if (*e == 0)
  131. return;
  132. /* 2 entries reads for V,H. */
  133. margin[1] = fz_strtof(e, &e);
  134. margin[3] = margin[1];
  135. e = skip_comma(e);
  136. if (*e == 0)
  137. return;
  138. /* 4 entries reads for T,R,B,L. */
  139. margin[2] = fz_strtof(e, &e);
  140. margin[3] = 0;
  141. e = skip_comma(e);
  142. if (*e == 0)
  143. return;
  144. margin[3] = fz_strtof(e, &e);
  145. }
  146. static int
  147. usage(void)
  148. {
  149. fprintf(stderr, "usage: mutool trim [options] <input filename>\n");
  150. fprintf(stderr, "\t-b -\tWhich box to trim to (MediaBox(default), CropBox, BleedBox, TrimBox, ArtBox)\n");
  151. fprintf(stderr, "\t-m -\tAdd margins to box (+ve for inwards, -ve outwards).\n");
  152. fprintf(stderr, "\t\t\t<All> or <V>,<H> or <T>,<R>,<B>,<L>\n");
  153. fprintf(stderr, "\t-e\tExclude contents of box, rather than include them\n");
  154. fprintf(stderr, "\t-f\tFallback to mediabox if specified box not available\n");
  155. fprintf(stderr, "\t-o -\tOutput file\n");
  156. return 1;
  157. }
  158. int pdftrim_main(int argc, char **argv)
  159. {
  160. fz_context *ctx = NULL;
  161. pdf_document *pdf = NULL;
  162. fz_document *doc = NULL;
  163. pdf_write_options opts = pdf_default_write_options;
  164. int n, i;
  165. char *infile = NULL;
  166. char *outputfile = NULL;
  167. int code = EXIT_SUCCESS;
  168. int exclude = 0;
  169. const char *boxname = NULL;
  170. fz_box_type box = FZ_CROP_BOX;
  171. int fallback = 0;
  172. float margins[4] = { 0 };
  173. int c;
  174. while ((c = fz_getopt(argc, argv, "b:o:efm:")) != -1)
  175. {
  176. switch (c)
  177. {
  178. default: return usage();
  179. case 'b': boxname = fz_optarg; break;
  180. case 'o': outputfile = fz_optarg; break;
  181. case 'e': exclude = 1; break;
  182. case 'f': fallback = 1; break;
  183. case 'm': read_margins(margins, fz_optarg); break;
  184. }
  185. }
  186. if (fz_optind == argc)
  187. return usage();
  188. infile = argv[fz_optind];
  189. if (boxname)
  190. {
  191. box = fz_box_type_from_string(boxname);
  192. if (box == FZ_UNKNOWN_BOX)
  193. {
  194. fprintf(stderr, "Unknown box %s specified!\n", boxname);
  195. return 1;
  196. }
  197. }
  198. /* Set up the options for the file saving. */
  199. #if 1
  200. opts.do_compress = 1;
  201. opts.do_compress_images = 1;
  202. opts.do_compress_fonts = 1;
  203. opts.do_garbage = 3;
  204. #else
  205. opts.do_compress = 0;
  206. opts.do_pretty = 1;
  207. opts.do_compress = 0;
  208. opts.do_compress_images = 1;
  209. opts.do_compress_fonts = 0;
  210. opts.do_garbage = 0;
  211. opts.do_clean = 1;
  212. #endif
  213. /* Create a MuPDF library context. */
  214. ctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
  215. if (!ctx)
  216. {
  217. fprintf(stderr, "Could not create global context.\n");
  218. return EXIT_FAILURE;
  219. }
  220. /* Register the document handlers (only really need PDF, but this is
  221. * the simplest way. */
  222. fz_register_document_handlers(ctx);
  223. fz_try(ctx)
  224. {
  225. /* Load the input document. */
  226. doc = fz_open_document(ctx, infile);
  227. /* Get a PDF specific pointer, and count the pages. */
  228. pdf = pdf_document_from_fz_document(ctx, doc);
  229. n = fz_count_pages(ctx, doc);
  230. for (i = 0; i < n; i++)
  231. rewrite_page_streams(ctx, pdf, i, box, margins, exclude, fallback);
  232. pdf_save_document(ctx, pdf, outputfile, &opts);
  233. }
  234. fz_always(ctx)
  235. {
  236. fz_drop_document(ctx, doc);
  237. }
  238. fz_catch(ctx)
  239. {
  240. fz_report_error(ctx);
  241. code = EXIT_FAILURE;
  242. }
  243. fz_drop_context(ctx);
  244. return code;
  245. }