pdfmerge.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. /*
  23. * PDF merge tool: Tool for merging pdf content.
  24. *
  25. * Simple test bed to work with merging pages from multiple PDFs into a single PDF.
  26. */
  27. #include "mupdf/fitz.h"
  28. #include "mupdf/pdf.h"
  29. #include <stdlib.h>
  30. #include <stdio.h>
  31. #include <string.h>
  32. static int usage(void)
  33. {
  34. fprintf(stderr,
  35. "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n"
  36. "\t-o -\tname of PDF file to create\n"
  37. "\t-O -\tcomma separated list of output options\n"
  38. "\tinput.pdf\tname of input file from which to copy pages\n"
  39. "\tpages\tcomma separated list of page numbers and ranges\n\n"
  40. );
  41. fputs(fz_pdf_write_options_usage, stderr);
  42. return 1;
  43. }
  44. static pdf_document *doc_des = NULL;
  45. static pdf_document *doc_src = NULL;
  46. int output_page_count = 0;
  47. static void page_merge(fz_context *ctx, int page_from, int page_to, pdf_graft_map *graft_map)
  48. {
  49. pdf_graft_mapped_page(ctx, graft_map, page_to - 1, doc_src, page_from - 1);
  50. }
  51. /*
  52. While we are processing, it_src tracks the current position we are copying from.
  53. items is the list of things we have stepped through to get to the current position.
  54. A prefix of these items may have already been copied across. copied_to_depth is
  55. the length of that prefix. 0 < = copied_to_depth <= len.
  56. */
  57. typedef struct
  58. {
  59. fz_context *ctx;
  60. fz_outline_iterator *it_dst;
  61. fz_outline_iterator *it_src;
  62. const char *range;
  63. int page_count;
  64. int max;
  65. int len;
  66. fz_outline_item *items;
  67. int copied_to_depth;
  68. int page_output_base;
  69. } cor_state;
  70. /* Given a range, and a page in the range 1 to count, return the position
  71. * which the page occupies in the output range (or 0 for not in range).
  72. * So page 12 within 10-20 would return 3.
  73. */
  74. static int
  75. position_in_range(fz_context *ctx, const char *range, int count, int page)
  76. {
  77. int start, end;
  78. int n = 0;
  79. while ((range = fz_parse_page_range(ctx, range, &start, &end, count)))
  80. {
  81. if (start < end)
  82. {
  83. if (start <= page && page <= end)
  84. return n + page - start + 1;
  85. n += end - start + 1;
  86. }
  87. else
  88. {
  89. if (end <= page && page <= start)
  90. return n + page - end + 1;
  91. n += start - end + 1;
  92. }
  93. }
  94. return 0;
  95. }
  96. static void
  97. copy_item(cor_state *cor)
  98. {
  99. fz_context *ctx = cor->ctx;
  100. while (cor->copied_to_depth < cor->len)
  101. {
  102. /* All items copied in a run get the same uri - that of the last one. */
  103. fz_outline_item item = cor->items[cor->copied_to_depth];
  104. item.uri = cor->items[cor->len-1].uri;
  105. fz_outline_iterator_insert(ctx, cor->it_dst, &item);
  106. cor->copied_to_depth++;
  107. fz_outline_iterator_prev(ctx, cor->it_dst);
  108. fz_outline_iterator_down(ctx, cor->it_dst);
  109. }
  110. }
  111. static char *
  112. rewrite_page(fz_context *ctx, const char *uri, int n)
  113. {
  114. const char *p;
  115. if (uri == NULL)
  116. return NULL;
  117. if (strncmp(uri, "#page=", 6) != 0)
  118. return fz_strdup(ctx, uri);
  119. p = strchr(uri+6, '&');
  120. if (p == NULL)
  121. return fz_asprintf(ctx, "#page=%d", n);
  122. return fz_asprintf(ctx, "#page=%d%s", n, p);
  123. }
  124. static void
  125. do_copy_outline_range(cor_state *cor)
  126. {
  127. fz_context *ctx = cor->ctx;
  128. do
  129. {
  130. int has_children;
  131. float x, y;
  132. fz_outline_item *item = fz_outline_iterator_item(ctx, cor->it_src);
  133. int page_num = fz_page_number_from_location(ctx, (fz_document *)doc_src, fz_resolve_link(ctx, (fz_document *)doc_src, item->uri, &x, &y));
  134. int page_in_range = position_in_range(ctx, cor->range, cor->page_count, page_num+1);
  135. int new_page_number = page_in_range + cor->page_output_base;
  136. if (cor->len == cor->max)
  137. {
  138. int newmax = cor->max ? cor->max * 2 : 8;
  139. cor->items = fz_realloc_array(ctx, cor->items, newmax, fz_outline_item);
  140. cor->max = newmax;
  141. }
  142. cor->len++;
  143. cor->items[cor->len-1].title = NULL;
  144. cor->items[cor->len-1].uri = NULL;
  145. cor->items[cor->len-1].is_open = item->is_open;
  146. cor->items[cor->len-1].title = item->title ? fz_strdup(ctx, item->title) : NULL;
  147. cor->items[cor->len-1].uri = rewrite_page(ctx, item->uri, new_page_number);
  148. if (page_in_range != 0)
  149. copy_item(cor);
  150. has_children = fz_outline_iterator_down(ctx, cor->it_src);
  151. if (has_children == 0)
  152. do_copy_outline_range(cor);
  153. if (has_children >= 0)
  154. fz_outline_iterator_up(ctx, cor->it_src);
  155. cor->len--;
  156. if (cor->copied_to_depth > cor->len)
  157. {
  158. cor->copied_to_depth = cor->len;
  159. fz_outline_iterator_up(ctx, cor->it_dst);
  160. }
  161. fz_outline_iterator_next(ctx, cor->it_dst);
  162. fz_free(ctx, cor->items[cor->len].title);
  163. fz_free(ctx, cor->items[cor->len].uri);
  164. }
  165. while (fz_outline_iterator_next(ctx, cor->it_src) == 0);
  166. }
  167. static void
  168. copy_outline_range(fz_context *ctx, fz_outline_iterator *it_dst, fz_outline_iterator *it_src, const char *range, int page_count, int page_output_base)
  169. {
  170. cor_state cor;
  171. cor.ctx = ctx;
  172. cor.it_dst = it_dst;
  173. cor.it_src = it_src;
  174. cor.max = 0;
  175. cor.len = 0;
  176. cor.copied_to_depth = 0;
  177. cor.range = range;
  178. cor.items = NULL;
  179. cor.page_count = page_count;
  180. cor.page_output_base = page_output_base;
  181. fz_try(ctx)
  182. do_copy_outline_range(&cor);
  183. fz_always(ctx)
  184. {
  185. int i;
  186. for (i = 0; i < cor.len; i++)
  187. {
  188. fz_free(ctx, cor.items[i].title);
  189. fz_free(ctx, cor.items[i].uri);
  190. }
  191. fz_free(ctx, cor.items);
  192. }
  193. fz_catch(ctx)
  194. fz_rethrow(ctx);
  195. }
  196. static void merge_range(fz_context *ctx, const char *range)
  197. {
  198. int start, end, i, count;
  199. pdf_graft_map *graft_map;
  200. const char *r;
  201. fz_outline_iterator *it_src = NULL;
  202. fz_outline_iterator *it_dst = NULL;
  203. int pages_merged = 0;
  204. count = pdf_count_pages(ctx, doc_src);
  205. graft_map = pdf_new_graft_map(ctx, doc_des);
  206. fz_var(it_src);
  207. fz_var(it_dst);
  208. fz_try(ctx)
  209. {
  210. r = range;
  211. while ((r = fz_parse_page_range(ctx, r, &start, &end, count)))
  212. {
  213. if (start < end)
  214. for (i = start; i <= end; ++i)
  215. {
  216. page_merge(ctx, i, 0, graft_map);
  217. pages_merged++;
  218. }
  219. else
  220. for (i = start; i >= end; --i)
  221. {
  222. page_merge(ctx, i, 0, graft_map);
  223. pages_merged++;
  224. }
  225. }
  226. it_src = fz_new_outline_iterator(ctx, (fz_document *)doc_src);
  227. if (it_src == NULL)
  228. break; /* Should never happen */
  229. it_dst = fz_new_outline_iterator(ctx, (fz_document *)doc_des);
  230. if (it_dst == NULL)
  231. break; /* Should never happen */
  232. /* Run to the end of it_dst. */
  233. if (fz_outline_iterator_item(ctx, it_dst) != NULL)
  234. {
  235. while (fz_outline_iterator_next(ctx, it_dst) == 0);
  236. }
  237. if (fz_outline_iterator_item(ctx, it_src) != NULL)
  238. copy_outline_range(ctx, it_dst, it_src, range, count, output_page_count);
  239. output_page_count += pages_merged;
  240. }
  241. fz_always(ctx)
  242. {
  243. fz_drop_outline_iterator(ctx, it_src);
  244. fz_drop_outline_iterator(ctx, it_dst);
  245. pdf_drop_graft_map(ctx, graft_map);
  246. }
  247. fz_catch(ctx)
  248. {
  249. fz_rethrow(ctx);
  250. }
  251. }
  252. int pdfmerge_main(int argc, char **argv)
  253. {
  254. pdf_write_options opts = pdf_default_write_options;
  255. char *output = "out.pdf";
  256. char *flags = "";
  257. char *input;
  258. int c;
  259. fz_context *ctx;
  260. while ((c = fz_getopt(argc, argv, "o:O:")) != -1)
  261. {
  262. switch (c)
  263. {
  264. case 'o': output = fz_optarg; break;
  265. case 'O': flags = fz_optarg; break;
  266. default: return usage();
  267. }
  268. }
  269. if (fz_optind == argc)
  270. return usage();
  271. ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
  272. if (!ctx)
  273. {
  274. fprintf(stderr, "error: Cannot initialize MuPDF context.\n");
  275. exit(1);
  276. }
  277. pdf_parse_write_options(ctx, &opts, flags);
  278. fz_try(ctx)
  279. {
  280. doc_des = pdf_create_document(ctx);
  281. }
  282. fz_catch(ctx)
  283. {
  284. fz_report_error(ctx);
  285. fz_log_error(ctx, "Cannot create destination document.");
  286. fz_flush_warnings(ctx);
  287. fz_drop_context(ctx);
  288. exit(1);
  289. }
  290. /* Step through the source files */
  291. while (fz_optind < argc)
  292. {
  293. doc_src = NULL;
  294. input = argv[fz_optind++];
  295. fz_try(ctx)
  296. {
  297. doc_src = pdf_open_document(ctx, input);
  298. if (fz_optind == argc || !fz_is_page_range(ctx, argv[fz_optind]))
  299. merge_range(ctx, "1-N");
  300. else
  301. merge_range(ctx, argv[fz_optind++]);
  302. }
  303. fz_always(ctx)
  304. pdf_drop_document(ctx, doc_src);
  305. fz_catch(ctx)
  306. {
  307. fz_report_error(ctx);
  308. fz_log_error_printf(ctx, "Cannot merge document '%s'.", input);
  309. }
  310. }
  311. if (fz_optind == argc)
  312. {
  313. fz_try(ctx)
  314. pdf_save_document(ctx, doc_des, output, &opts);
  315. fz_catch(ctx)
  316. {
  317. fz_report_error(ctx);
  318. fz_log_error_printf(ctx, "Cannot save output file: '%s'.", output);
  319. }
  320. }
  321. pdf_drop_document(ctx, doc_des);
  322. fz_flush_warnings(ctx);
  323. fz_drop_context(ctx);
  324. return 0;
  325. }