pdf-clean.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "pdf-annot-imp.h"
  24. #include <string.h>
  25. #include <assert.h>
  26. static void
  27. pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *xobj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
  28. static void
  29. pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
  30. static void
  31. pdf_filter_resources(fz_context *ctx, pdf_document *doc, pdf_obj *in_res, pdf_obj *res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
  32. {
  33. pdf_obj *obj;
  34. int i, n;
  35. if (!options->recurse)
  36. return;
  37. /* ExtGState */
  38. obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
  39. if (obj)
  40. {
  41. n = pdf_dict_len(ctx, obj);
  42. for (i = 0; i < n; i++)
  43. {
  44. pdf_obj *smask = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
  45. if (smask)
  46. {
  47. pdf_obj *g = pdf_dict_get(ctx, smask, PDF_NAME(G));
  48. if (g)
  49. {
  50. /* Transparency group XObject */
  51. pdf_filter_xobject(ctx, doc, g, in_res, options, cycle_up);
  52. }
  53. }
  54. }
  55. }
  56. /* Pattern */
  57. obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
  58. if (obj)
  59. {
  60. n = pdf_dict_len(ctx, obj);
  61. for (i = 0; i < n; i++)
  62. {
  63. pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
  64. if (pat && pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
  65. {
  66. pdf_filter_xobject(ctx, doc, pat, in_res, options, cycle_up);
  67. }
  68. }
  69. }
  70. /* XObject */
  71. if (!options->instance_forms)
  72. {
  73. obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
  74. if (obj)
  75. {
  76. n = pdf_dict_len(ctx, obj);
  77. for (i = 0; i < n; i++)
  78. {
  79. pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
  80. if (xobj && pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)) == PDF_NAME(Form))
  81. {
  82. pdf_filter_xobject(ctx, doc, xobj, in_res, options, cycle_up);
  83. }
  84. }
  85. }
  86. }
  87. /* Font */
  88. obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
  89. if (obj)
  90. {
  91. n = pdf_dict_len(ctx, obj);
  92. for (i = 0; i < n; i++)
  93. {
  94. pdf_obj *font = pdf_dict_get_val(ctx, obj, i);
  95. if (font && pdf_dict_get(ctx, font, PDF_NAME(Subtype)) == PDF_NAME(Type3))
  96. {
  97. pdf_filter_type3(ctx, doc, font, in_res, options, cycle_up);
  98. }
  99. }
  100. }
  101. }
  102. /*
  103. Clean a content stream's rendering operations, with an optional post
  104. processing step.
  105. Firstly, this filters the PDF operators used to avoid (some cases of)
  106. repetition, and leaves the content stream in a balanced state with an
  107. unchanged top level matrix etc. At the same time, the resources actually
  108. used are collected into a new resource dictionary.
  109. Next, the resources themselves are recursively cleaned (as appropriate)
  110. in the same way, if the 'recurse' flag is set.
  111. */
  112. static void
  113. pdf_filter_content_stream(
  114. fz_context *ctx,
  115. pdf_document *doc,
  116. pdf_obj *in_stm,
  117. pdf_obj *in_res,
  118. fz_matrix transform,
  119. pdf_filter_options *options,
  120. int struct_parents,
  121. fz_buffer **out_buf,
  122. pdf_obj **out_res,
  123. pdf_cycle_list *cycle_up)
  124. {
  125. pdf_processor *proc_buffer = NULL;
  126. pdf_processor *top = NULL;
  127. pdf_processor **list = NULL;
  128. int num_filters = 0;
  129. int i;
  130. fz_var(proc_buffer);
  131. *out_buf = NULL;
  132. *out_res = NULL;
  133. if (options->filters)
  134. for (; options->filters[num_filters].filter != NULL; num_filters++);
  135. if (num_filters > 0)
  136. list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
  137. fz_try(ctx)
  138. {
  139. *out_buf = fz_new_buffer(ctx, 1024);
  140. top = proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, options->ascii, options->newlines);
  141. if (num_filters > 0)
  142. {
  143. for (i = num_filters - 1; i >= 0; i--)
  144. top = list[i] = options->filters[i].filter(ctx, doc, top, struct_parents, transform, options, options->filters[i].options);
  145. }
  146. pdf_process_contents(ctx, top, doc, in_res, in_stm, NULL, out_res);
  147. pdf_close_processor(ctx, top);
  148. pdf_filter_resources(ctx, doc, in_res, *out_res, options, cycle_up);
  149. }
  150. fz_always(ctx)
  151. {
  152. for (i = 0; i < num_filters; i++)
  153. pdf_drop_processor(ctx, list[i]);
  154. pdf_drop_processor(ctx, proc_buffer);
  155. fz_free(ctx, list);
  156. }
  157. fz_catch(ctx)
  158. {
  159. fz_drop_buffer(ctx, *out_buf);
  160. *out_buf = NULL;
  161. pdf_drop_obj(ctx, *out_res);
  162. *out_res = NULL;
  163. fz_rethrow(ctx);
  164. }
  165. }
  166. /*
  167. Clean a Type 3 font's CharProcs content streams. This works almost
  168. exactly like pdf_filter_content_stream, but the resource dictionary is
  169. shared between all off the CharProcs.
  170. */
  171. static void
  172. pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
  173. {
  174. pdf_cycle_list cycle;
  175. pdf_processor *proc_buffer = NULL;
  176. pdf_processor *proc_filter = NULL;
  177. pdf_obj *in_res;
  178. pdf_obj *out_res = NULL;
  179. pdf_obj *charprocs;
  180. int i, n;
  181. int num_filters = 0;
  182. pdf_processor **list = NULL;
  183. fz_buffer *buffer = NULL;
  184. pdf_processor *top = NULL;
  185. pdf_obj *res = NULL;
  186. fz_buffer *new_buf = NULL;
  187. fz_var(out_res);
  188. fz_var(proc_buffer);
  189. fz_var(proc_filter);
  190. fz_var(buffer);
  191. fz_var(res);
  192. fz_var(new_buf);
  193. /* We cannot combine instancing with type3 fonts. The new names for
  194. * instanced form/image resources would clash, since they start over for
  195. * each content stream. This is not a problem for now, because we only
  196. * use instancing with redaction, and redaction doesn't clean type3
  197. * fonts.
  198. */
  199. assert(!options->instance_forms);
  200. /* Avoid recursive cycles! */
  201. if (pdf_cycle(ctx, &cycle, cycle_up, obj))
  202. return;
  203. if (options->filters)
  204. for (; options->filters[num_filters].filter != NULL; num_filters++);
  205. if (num_filters > 0)
  206. list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
  207. fz_try(ctx)
  208. {
  209. in_res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
  210. if (!in_res)
  211. in_res = page_res;
  212. buffer = fz_new_buffer(ctx, 1024);
  213. top = proc_buffer = pdf_new_buffer_processor(ctx, buffer, options->ascii, options->newlines);
  214. if (num_filters > 0)
  215. {
  216. for (i = num_filters - 1; i >= 0; i--)
  217. top = list[i] = options->filters[i].filter(ctx, doc, top, -1, fz_identity, options, options->filters[i].options);
  218. }
  219. pdf_processor_push_resources(ctx, top, in_res);
  220. charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
  221. n = pdf_dict_len(ctx, charprocs);
  222. for (i = 0; i < n; i++)
  223. {
  224. pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
  225. if (i > 0)
  226. {
  227. pdf_reset_processor(ctx, top);
  228. fz_clear_buffer(ctx, buffer);
  229. }
  230. pdf_process_raw_contents(ctx, top, doc, in_res, val, NULL);
  231. pdf_close_processor(ctx, top);
  232. if (!options->no_update)
  233. {
  234. new_buf = fz_clone_buffer(ctx, buffer);
  235. pdf_update_stream(ctx, doc, val, new_buf, 0);
  236. fz_drop_buffer(ctx, new_buf);
  237. new_buf = NULL;
  238. }
  239. }
  240. }
  241. fz_always(ctx)
  242. {
  243. res = pdf_processor_pop_resources(ctx, top);
  244. for (i = 0; i < num_filters; i++)
  245. pdf_drop_processor(ctx, list[i]);
  246. pdf_drop_processor(ctx, proc_buffer);
  247. fz_free(ctx, list);
  248. fz_drop_buffer(ctx, new_buf);
  249. fz_drop_buffer(ctx, buffer);
  250. }
  251. fz_catch(ctx)
  252. {
  253. pdf_drop_obj(ctx, res);
  254. fz_rethrow(ctx);
  255. }
  256. pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), res);
  257. }
  258. static void
  259. pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *stm, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
  260. {
  261. pdf_cycle_list cycle;
  262. int struct_parents;
  263. pdf_obj *new_res = NULL;
  264. fz_buffer *new_buf = NULL;
  265. pdf_obj *old_res;
  266. fz_var(new_buf);
  267. fz_var(new_res);
  268. // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
  269. struct_parents = pdf_dict_get_int_default(ctx, stm, PDF_NAME(StructParents), -1);
  270. old_res = pdf_dict_get(ctx, stm, PDF_NAME(Resources));
  271. if (!old_res)
  272. old_res = page_res;
  273. // TODO: don't clean objects more than once.
  274. /* Avoid recursive cycles! */
  275. if (pdf_cycle(ctx, &cycle, cycle_up, stm))
  276. return;
  277. fz_try(ctx)
  278. {
  279. pdf_filter_content_stream(ctx, doc, stm, old_res, fz_identity, options, struct_parents, &new_buf, &new_res, &cycle);
  280. if (!options->no_update)
  281. {
  282. pdf_update_stream(ctx, doc, stm, new_buf, 0);
  283. pdf_dict_put(ctx, stm, PDF_NAME(Resources), new_res);
  284. }
  285. }
  286. fz_always(ctx)
  287. {
  288. fz_drop_buffer(ctx, new_buf);
  289. pdf_drop_obj(ctx, new_res);
  290. }
  291. fz_catch(ctx)
  292. fz_rethrow(ctx);
  293. }
  294. pdf_obj *
  295. pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix transform, pdf_filter_options *options, pdf_cycle_list *cycle_up)
  296. {
  297. pdf_cycle_list cycle;
  298. pdf_document *doc = pdf_get_bound_document(ctx, old_xobj);
  299. pdf_obj *new_xobj;
  300. pdf_obj *new_res, *old_res;
  301. fz_buffer *new_buf;
  302. int struct_parents;
  303. fz_matrix matrix;
  304. fz_var(new_xobj);
  305. fz_var(new_buf);
  306. fz_var(new_res);
  307. // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
  308. // TODO for RJW: will we run into trouble by duplicating StructParents stuff?
  309. struct_parents = pdf_dict_get_int_default(ctx, old_xobj, PDF_NAME(StructParents), -1);
  310. old_res = pdf_dict_get(ctx, old_xobj, PDF_NAME(Resources));
  311. if (!old_res)
  312. old_res = page_res;
  313. if (pdf_cycle(ctx, &cycle, cycle_up, old_xobj))
  314. return pdf_keep_obj(ctx, old_xobj);
  315. matrix = pdf_dict_get_matrix(ctx, old_xobj, PDF_NAME(Matrix));
  316. transform = fz_concat(matrix, transform);
  317. fz_try(ctx)
  318. {
  319. new_xobj = pdf_add_object_drop(ctx, doc, pdf_copy_dict(ctx, old_xobj));
  320. pdf_filter_content_stream(ctx, doc, old_xobj, old_res, transform, options, struct_parents, &new_buf, &new_res, &cycle);
  321. if (!options->no_update)
  322. {
  323. pdf_update_stream(ctx, doc, new_xobj, new_buf, 0);
  324. pdf_dict_put(ctx, new_xobj, PDF_NAME(Resources), new_res);
  325. }
  326. }
  327. fz_always(ctx)
  328. {
  329. fz_drop_buffer(ctx, new_buf);
  330. pdf_drop_obj(ctx, new_res);
  331. }
  332. fz_catch(ctx)
  333. {
  334. pdf_drop_obj(ctx, new_xobj);
  335. fz_rethrow(ctx);
  336. }
  337. return new_xobj;
  338. }
  339. void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options)
  340. {
  341. pdf_obj *contents, *old_res;
  342. pdf_obj *new_res;
  343. fz_buffer *buffer;
  344. int struct_parents;
  345. struct_parents = pdf_dict_get_int_default(ctx, page->obj, PDF_NAME(StructParents), -1);
  346. contents = pdf_page_contents(ctx, page);
  347. old_res = pdf_page_resources(ctx, page);
  348. pdf_filter_content_stream(ctx, doc, contents, old_res, fz_identity, options, struct_parents, &buffer, &new_res, NULL);
  349. fz_try(ctx)
  350. {
  351. if (options->complete)
  352. options->complete(ctx, buffer, options->opaque);
  353. if (!options->no_update)
  354. {
  355. /* Always create a new stream object to replace the page contents. This is useful
  356. both if the contents is an array of streams, is entirely missing or if the contents
  357. are shared between pages. */
  358. contents = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1));
  359. pdf_dict_put_drop(ctx, page->obj, PDF_NAME(Contents), contents);
  360. pdf_update_stream(ctx, doc, contents, buffer, 0);
  361. pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), new_res);
  362. }
  363. }
  364. fz_always(ctx)
  365. {
  366. fz_drop_buffer(ctx, buffer);
  367. pdf_drop_obj(ctx, new_res);
  368. }
  369. fz_catch(ctx)
  370. fz_rethrow(ctx);
  371. }
  372. void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options)
  373. {
  374. pdf_obj *ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP));
  375. if (pdf_is_dict(ctx, ap))
  376. {
  377. int i, n = pdf_dict_len(ctx, ap);
  378. for (i = 0; i < n; i++)
  379. {
  380. pdf_obj *stm = pdf_dict_get_val(ctx, ap, i);
  381. if (pdf_is_stream(ctx, stm))
  382. {
  383. pdf_filter_xobject(ctx, doc, stm, NULL, options, NULL);
  384. }
  385. }
  386. }
  387. }
  388. /* REDACTIONS */
  389. struct redact_filter_state {
  390. pdf_filter_options filter_opts;
  391. pdf_sanitize_filter_options sanitize_opts;
  392. pdf_filter_factory filter_list[2];
  393. pdf_page *page;
  394. pdf_annot *target; // NULL if all
  395. int line_art;
  396. int text;
  397. };
  398. static void
  399. pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, void *opaque)
  400. {
  401. struct redact_filter_state *red = opaque;
  402. pdf_page *page = red->page;
  403. pdf_annot *annot;
  404. pdf_obj *qp;
  405. int i, n;
  406. fz_append_string(ctx, buf, " 0 g\n");
  407. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  408. {
  409. if (red->target != NULL && red->target != annot)
  410. continue;
  411. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  412. {
  413. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  414. n = pdf_array_len(ctx, qp);
  415. if (n > 0)
  416. {
  417. for (i = 0; i < n; i += 8)
  418. {
  419. fz_quad q = pdf_to_quad(ctx, qp, i);
  420. fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y);
  421. fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y);
  422. fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y);
  423. fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y);
  424. fz_append_string(ctx, buf, "f\n");
  425. }
  426. }
  427. else
  428. {
  429. fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  430. fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0);
  431. fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0);
  432. fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1);
  433. fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1);
  434. fz_append_string(ctx, buf, "f\n");
  435. }
  436. }
  437. }
  438. }
  439. static int
  440. pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox)
  441. {
  442. struct redact_filter_state *red = opaque;
  443. pdf_page *page = red->page;
  444. pdf_annot *annot;
  445. pdf_obj *qp;
  446. fz_rect r;
  447. fz_quad q;
  448. int i, n;
  449. float w, h;
  450. trm = fz_concat(trm, ctm);
  451. bbox = fz_transform_rect(bbox, trm);
  452. /* Shrink character bbox a bit */
  453. w = bbox.x1 - bbox.x0;
  454. h = bbox.y1 - bbox.y0;
  455. bbox.x0 += w / 10;
  456. bbox.x1 -= w / 10;
  457. bbox.y0 += h / 10;
  458. bbox.y1 -= h / 10;
  459. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  460. {
  461. if (red->target != NULL && red->target != annot)
  462. continue;
  463. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  464. {
  465. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  466. n = pdf_array_len(ctx, qp);
  467. /* Note, we test for the intersection being a valid rectangle, NOT
  468. * a non-empty one. This is because we can have 'empty' character
  469. * boxes (say for diacritics), that while 0 width, do have a defined
  470. * position on the plane, and hence inclusion makes sense. */
  471. if (n > 0)
  472. {
  473. for (i = 0; i < n; i += 8)
  474. {
  475. q = pdf_to_quad(ctx, qp, i);
  476. r = fz_rect_from_quad(q);
  477. if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
  478. return 1;
  479. }
  480. }
  481. else
  482. {
  483. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  484. if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
  485. return 1;
  486. }
  487. }
  488. }
  489. return 0;
  490. }
  491. static fz_pixmap *
  492. pdf_redact_image_imp(fz_context *ctx, fz_matrix ctm, fz_image *image, fz_pixmap *pixmap, fz_pixmap **pmask, fz_quad q)
  493. {
  494. fz_matrix inv_ctm;
  495. fz_irect r;
  496. int x, y, k, n, bpp;
  497. unsigned char white;
  498. fz_pixmap *mask = *pmask;
  499. int pixmap_cloned = 0;
  500. if (!pixmap)
  501. {
  502. fz_pixmap *original = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
  503. int imagemask = image->imagemask;
  504. fz_try(ctx)
  505. {
  506. pixmap = fz_clone_pixmap(ctx, original);
  507. if (imagemask)
  508. fz_invert_pixmap_alpha(ctx, pixmap);
  509. }
  510. fz_always(ctx)
  511. fz_drop_pixmap(ctx, original);
  512. fz_catch(ctx)
  513. fz_rethrow(ctx);
  514. pixmap_cloned = 1;
  515. }
  516. if (!mask && image->mask)
  517. {
  518. fz_pixmap *original = fz_get_pixmap_from_image(ctx, image->mask, NULL, NULL, NULL, NULL);
  519. fz_try(ctx)
  520. {
  521. mask = fz_clone_pixmap(ctx, original);
  522. *pmask = mask;
  523. }
  524. fz_always(ctx)
  525. {
  526. fz_drop_pixmap(ctx, original);
  527. }
  528. fz_catch(ctx)
  529. {
  530. if (pixmap_cloned)
  531. fz_drop_pixmap(ctx, pixmap);
  532. fz_rethrow(ctx);
  533. }
  534. }
  535. /* If we have a 1x1 image, to which a mask is being applied
  536. * then it's the mask we really want to change, not the
  537. * image. We might have just a small section of the image
  538. * being covered, and setting the whole thing to white
  539. * will blank stuff outside the desired area. */
  540. if (!mask || pixmap->w > 1 || pixmap->h > 1)
  541. {
  542. n = pixmap->n - pixmap->alpha;
  543. bpp = pixmap->n;
  544. if (fz_colorspace_is_subtractive(ctx, pixmap->colorspace))
  545. white = 0;
  546. else
  547. white = 255;
  548. inv_ctm = fz_post_scale(fz_invert_matrix(ctm), pixmap->w, pixmap->h);
  549. r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
  550. r.x0 = fz_clampi(r.x0, 0, pixmap->w);
  551. r.x1 = fz_clampi(r.x1, 0, pixmap->w);
  552. r.y1 = fz_clampi(pixmap->h - r.y1, 0, pixmap->h);
  553. r.y0 = fz_clampi(pixmap->h - r.y0, 0, pixmap->h);
  554. for (y = r.y1; y < r.y0; ++y)
  555. {
  556. for (x = r.x0; x < r.x1; ++x)
  557. {
  558. unsigned char *s = &pixmap->samples[(size_t)y * pixmap->stride + (size_t)x * bpp];
  559. for (k = 0; k < n; ++k)
  560. s[k] = white;
  561. if (pixmap->alpha)
  562. s[k] = 255;
  563. }
  564. }
  565. }
  566. if (mask)
  567. {
  568. inv_ctm = fz_post_scale(fz_invert_matrix(ctm), mask->w, mask->h);
  569. r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
  570. r.x0 = fz_clampi(r.x0, 0, mask->w);
  571. r.x1 = fz_clampi(r.x1, 0, mask->w);
  572. r.y1 = fz_clampi(mask->h - r.y1, 0, mask->h);
  573. r.y0 = fz_clampi(mask->h - r.y0, 0, mask->h);
  574. for (y = r.y1; y < r.y0; ++y)
  575. {
  576. unsigned char *s = &mask->samples[(size_t)y * mask->stride + (size_t)r.x0];
  577. memset(s, 0xff, r.x1-r.x0);
  578. }
  579. }
  580. return pixmap;
  581. }
  582. static fz_image *
  583. pdf_redact_image_filter_remove(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
  584. {
  585. fz_pixmap *redacted = NULL;
  586. struct redact_filter_state *red = opaque;
  587. pdf_page *page = red->page;
  588. pdf_annot *annot;
  589. pdf_obj *qp;
  590. fz_rect area;
  591. fz_rect r;
  592. int i, n;
  593. fz_var(redacted);
  594. area = fz_transform_rect(fz_unit_rect, ctm);
  595. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  596. {
  597. if (red->target != NULL && red->target != annot)
  598. continue;
  599. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  600. {
  601. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  602. n = pdf_array_len(ctx, qp);
  603. if (n > 0)
  604. {
  605. for (i = 0; i < n; i += 8)
  606. {
  607. r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
  608. r = fz_intersect_rect(r, area);
  609. if (!fz_is_empty_rect(r))
  610. return NULL;
  611. }
  612. }
  613. else
  614. {
  615. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  616. r = fz_intersect_rect(r, area);
  617. if (!fz_is_empty_rect(r))
  618. return NULL;
  619. }
  620. }
  621. }
  622. return fz_keep_image(ctx, image);
  623. }
  624. static fz_image *
  625. pdf_redact_image_filter_remove_invisible(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
  626. {
  627. fz_pixmap *redacted = NULL;
  628. struct redact_filter_state *red = opaque;
  629. pdf_page *page = red->page;
  630. pdf_annot *annot;
  631. pdf_obj *qp;
  632. fz_rect area;
  633. fz_rect r;
  634. int i, n;
  635. fz_var(redacted);
  636. area = fz_transform_rect(fz_unit_rect, ctm);
  637. /* Restrict the are of the image to that which can actually be seen. */
  638. area = fz_intersect_rect(area, clip);
  639. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  640. {
  641. if (red->target != NULL && red->target != annot)
  642. continue;
  643. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  644. {
  645. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  646. n = pdf_array_len(ctx, qp);
  647. if (n > 0)
  648. {
  649. for (i = 0; i < n; i += 8)
  650. {
  651. r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
  652. r = fz_intersect_rect(r, area);
  653. if (!fz_is_empty_rect(r))
  654. return NULL;
  655. }
  656. }
  657. else
  658. {
  659. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  660. r = fz_intersect_rect(r, area);
  661. if (!fz_is_empty_rect(r))
  662. return NULL;
  663. }
  664. }
  665. }
  666. return fz_keep_image(ctx, image);
  667. }
  668. static fz_image *
  669. pdf_redact_image_filter_pixels(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
  670. {
  671. fz_pixmap *redacted = NULL;
  672. fz_pixmap *mask = NULL;
  673. struct redact_filter_state *red = opaque;
  674. pdf_page *page = red->page;
  675. pdf_annot *annot;
  676. pdf_obj *qp;
  677. fz_quad area, q;
  678. fz_rect r;
  679. int i, n;
  680. fz_var(redacted);
  681. fz_var(mask);
  682. area = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm);
  683. /* First see if we can redact the image completely */
  684. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  685. {
  686. if (red->target != NULL && red->target != annot)
  687. continue;
  688. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  689. {
  690. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  691. n = pdf_array_len(ctx, qp);
  692. if (n > 0)
  693. {
  694. for (i = 0; i < n; i += 8)
  695. {
  696. q = pdf_to_quad(ctx, qp, i);
  697. if (fz_is_quad_inside_quad(area, q))
  698. return NULL;
  699. }
  700. }
  701. else
  702. {
  703. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  704. q = fz_quad_from_rect(r);
  705. if (fz_is_quad_inside_quad(area, q))
  706. return NULL;
  707. }
  708. }
  709. }
  710. /* Blank out redacted parts of the image if necessary */
  711. fz_try(ctx)
  712. {
  713. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  714. {
  715. if (red->target != NULL && red->target != annot)
  716. continue;
  717. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  718. {
  719. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  720. n = pdf_array_len(ctx, qp);
  721. if (n > 0)
  722. {
  723. for (i = 0; i < n; i += 8)
  724. {
  725. q = pdf_to_quad(ctx, qp, i);
  726. if (fz_is_quad_intersecting_quad(area, q))
  727. redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
  728. }
  729. }
  730. else
  731. {
  732. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  733. q = fz_quad_from_rect(r);
  734. if (fz_is_quad_intersecting_quad(area, q))
  735. redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
  736. }
  737. }
  738. }
  739. }
  740. fz_catch(ctx)
  741. {
  742. fz_drop_pixmap(ctx, redacted);
  743. fz_drop_pixmap(ctx, mask);
  744. fz_rethrow(ctx);
  745. }
  746. if (redacted)
  747. {
  748. int imagemask = image->imagemask;
  749. fz_image *imask = fz_keep_image(ctx, image->mask);
  750. fz_var(imask);
  751. fz_try(ctx)
  752. {
  753. if (mask)
  754. {
  755. fz_drop_image(ctx, imask);
  756. imask = NULL;
  757. imask = fz_new_image_from_pixmap(ctx, mask, NULL);
  758. }
  759. image = fz_new_image_from_pixmap(ctx, redacted, NULL);
  760. image->imagemask = imagemask;
  761. image->mask = imask;
  762. imask = NULL;
  763. }
  764. fz_always(ctx)
  765. {
  766. fz_drop_pixmap(ctx, redacted);
  767. fz_drop_pixmap(ctx, mask);
  768. fz_drop_image(ctx, imask);
  769. }
  770. fz_catch(ctx)
  771. fz_rethrow(ctx);
  772. return image;
  773. }
  774. return fz_keep_image(ctx, image);
  775. }
  776. /* Returns 0 if area does not intersect with any of our redactions.
  777. * Returns 2 if area is completely included within one of our redactions.
  778. * Returns 1 otherwise. */
  779. static int
  780. rect_touches_redactions(fz_context *ctx, fz_rect area, struct redact_filter_state *red)
  781. {
  782. pdf_annot *annot;
  783. pdf_obj *qp;
  784. fz_quad q;
  785. fz_rect r, s;
  786. int i, n;
  787. pdf_page *page = red->page;
  788. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  789. {
  790. if (red->target != NULL && red->target != annot)
  791. continue;
  792. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  793. {
  794. qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
  795. n = pdf_array_len(ctx, qp);
  796. if (n > 0)
  797. {
  798. for (i = 0; i < n; i += 8)
  799. {
  800. q = pdf_to_quad(ctx, qp, i);
  801. r = fz_rect_from_quad(q);
  802. s = fz_intersect_rect(r, area);
  803. if (!fz_is_empty_rect(s))
  804. {
  805. if (fz_contains_rect(r, area))
  806. return 2;
  807. return 1;
  808. }
  809. }
  810. }
  811. else
  812. {
  813. r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
  814. s = fz_intersect_rect(r, area);
  815. if (!fz_is_empty_rect(s))
  816. {
  817. if (fz_contains_rect(r, area))
  818. return 2;
  819. return 1;
  820. }
  821. }
  822. }
  823. }
  824. return 0;
  825. }
  826. static void
  827. pdf_redact_page_links(fz_context *ctx, struct redact_filter_state *red)
  828. {
  829. pdf_obj *annots;
  830. pdf_obj *link;
  831. fz_rect area;
  832. int k;
  833. annots = pdf_dict_get(ctx, red->page->obj, PDF_NAME(Annots));
  834. k = 0;
  835. while (k < pdf_array_len(ctx, annots))
  836. {
  837. link = pdf_array_get(ctx, annots, k);
  838. if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
  839. {
  840. area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
  841. if (rect_touches_redactions(ctx, area, red))
  842. {
  843. pdf_array_delete(ctx, annots, k);
  844. continue;
  845. }
  846. }
  847. ++k;
  848. }
  849. }
  850. static void
  851. pdf_redact_page_annotations(fz_context *ctx, struct redact_filter_state *red)
  852. {
  853. pdf_annot *annot;
  854. fz_rect area;
  855. restart:
  856. for (annot = pdf_first_annot(ctx, red->page); annot; annot = pdf_next_annot(ctx, annot))
  857. {
  858. if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
  859. {
  860. area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
  861. if (rect_touches_redactions(ctx, area, red))
  862. {
  863. pdf_delete_annot(ctx, red->page, annot);
  864. goto restart;
  865. }
  866. }
  867. }
  868. }
  869. static int culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
  870. {
  871. struct redact_filter_state *red = opaque;
  872. switch (type)
  873. {
  874. case FZ_CULL_PATH_FILL:
  875. case FZ_CULL_PATH_STROKE:
  876. case FZ_CULL_PATH_FILL_STROKE:
  877. case FZ_CULL_CLIP_PATH_FILL:
  878. case FZ_CULL_CLIP_PATH_STROKE:
  879. case FZ_CULL_CLIP_PATH_FILL_STROKE:
  880. if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_COVERED)
  881. return (rect_touches_redactions(ctx, bbox, red) == 2);
  882. else if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED)
  883. return (rect_touches_redactions(ctx, bbox, red) != 0);
  884. return 0;
  885. default:
  886. return 0;
  887. }
  888. }
  889. static
  890. void init_redact_filter(fz_context *ctx, pdf_redact_options *redact_opts, struct redact_filter_state *red, pdf_page *page, pdf_annot *target)
  891. {
  892. int black_boxes = redact_opts ? redact_opts->black_boxes : 0;
  893. int image_method = redact_opts ? redact_opts->image_method : PDF_REDACT_IMAGE_PIXELS;
  894. int line_art = redact_opts ? redact_opts->line_art : PDF_REDACT_LINE_ART_NONE;
  895. int text = redact_opts ? redact_opts->text : PDF_REDACT_TEXT_REMOVE;
  896. memset(&red->filter_opts, 0, sizeof red->filter_opts);
  897. memset(&red->sanitize_opts, 0, sizeof red->sanitize_opts);
  898. red->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
  899. red->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
  900. red->filter_opts.ascii = 1;
  901. red->filter_opts.opaque = red;
  902. red->filter_opts.filters = red->filter_list;
  903. if (black_boxes)
  904. red->filter_opts.complete = pdf_redact_end_page;
  905. red->line_art = line_art;
  906. red->text = text;
  907. red->sanitize_opts.opaque = red;
  908. if (text == PDF_REDACT_TEXT_REMOVE)
  909. red->sanitize_opts.text_filter = pdf_redact_text_filter;
  910. if (image_method == PDF_REDACT_IMAGE_PIXELS)
  911. red->sanitize_opts.image_filter = pdf_redact_image_filter_pixels;
  912. if (image_method == PDF_REDACT_IMAGE_REMOVE)
  913. red->sanitize_opts.image_filter = pdf_redact_image_filter_remove;
  914. if (image_method == PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE)
  915. red->sanitize_opts.image_filter = pdf_redact_image_filter_remove_invisible;
  916. red->sanitize_opts.culler = culler;
  917. red->filter_list[0].filter = pdf_new_sanitize_filter;
  918. red->filter_list[0].options = &red->sanitize_opts;
  919. red->filter_list[1].filter = NULL;
  920. red->filter_list[1].options = NULL;
  921. red->page = page;
  922. red->target = target;
  923. }
  924. static int
  925. pdf_apply_redaction_imp(fz_context *ctx, pdf_page *page, pdf_annot *target, pdf_redact_options *redact_opts)
  926. {
  927. pdf_annot *annot;
  928. int has_redactions = 0;
  929. struct redact_filter_state red;
  930. pdf_document *doc = page->doc;
  931. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) {
  932. if (target != NULL && target != annot)
  933. continue;
  934. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  935. has_redactions = 1;
  936. }
  937. if (!has_redactions)
  938. return 0;
  939. init_redact_filter(ctx, redact_opts, &red, page, target);
  940. if (target)
  941. pdf_begin_operation(ctx, doc, "Apply redaction");
  942. else
  943. pdf_begin_operation(ctx, doc, "Apply redactions on page");
  944. fz_try(ctx)
  945. {
  946. pdf_filter_page_contents(ctx, doc, page, &red.filter_opts);
  947. pdf_redact_page_links(ctx, &red);
  948. pdf_redact_page_annotations(ctx, &red);
  949. annot = pdf_first_annot(ctx, page);
  950. while (annot)
  951. {
  952. if (target == NULL || annot == target)
  953. {
  954. if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
  955. {
  956. pdf_delete_annot(ctx, page, annot);
  957. annot = pdf_first_annot(ctx, page);
  958. continue;
  959. }
  960. }
  961. annot = pdf_next_annot(ctx, annot);
  962. }
  963. doc->redacted = 1;
  964. pdf_end_operation(ctx, doc);
  965. }
  966. fz_catch(ctx)
  967. {
  968. pdf_abandon_operation(ctx, doc);
  969. fz_rethrow(ctx);
  970. }
  971. return 1;
  972. }
  973. int
  974. pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *redact_opts)
  975. {
  976. if (page == NULL || page->doc != doc)
  977. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't redact a page not from the doc");
  978. return pdf_apply_redaction_imp(ctx, page, NULL, redact_opts);
  979. }
  980. int
  981. pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *redact_opts)
  982. {
  983. return pdf_apply_redaction_imp(ctx, annot->page, annot, redact_opts);
  984. }
  985. /* Hard clipping of pages */
  986. struct clip_filter_state {
  987. pdf_filter_options filter_opts;
  988. pdf_sanitize_filter_options sanitize_opts;
  989. pdf_filter_factory filter_list[2];
  990. pdf_page *page;
  991. fz_rect clip;
  992. };
  993. static int clip_culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
  994. {
  995. struct clip_filter_state *hc = opaque;
  996. switch (type)
  997. {
  998. case FZ_CULL_PATH_FILL:
  999. case FZ_CULL_PATH_STROKE:
  1000. case FZ_CULL_PATH_FILL_STROKE:
  1001. case FZ_CULL_CLIP_PATH_FILL:
  1002. case FZ_CULL_CLIP_PATH_STROKE:
  1003. case FZ_CULL_CLIP_PATH_FILL_STROKE:
  1004. case FZ_CULL_GLYPH:
  1005. case FZ_CULL_IMAGE:
  1006. case FZ_CULL_SHADING:
  1007. return (fz_is_empty_rect(fz_intersect_rect(bbox, hc->clip)));
  1008. default:
  1009. return 0;
  1010. }
  1011. }
  1012. static
  1013. void init_clip_filter(fz_context *ctx, struct clip_filter_state *hc, pdf_page *page, fz_rect *clip)
  1014. {
  1015. memset(&hc->filter_opts, 0, sizeof hc->filter_opts);
  1016. memset(&hc->sanitize_opts, 0, sizeof hc->sanitize_opts);
  1017. hc->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
  1018. hc->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
  1019. hc->filter_opts.ascii = 0;
  1020. hc->filter_opts.opaque = hc;
  1021. hc->filter_opts.filters = hc->filter_list;
  1022. hc->clip = *clip;
  1023. hc->sanitize_opts.opaque = hc;
  1024. hc->sanitize_opts.culler = clip_culler;
  1025. hc->filter_list[0].filter = pdf_new_sanitize_filter;
  1026. hc->filter_list[0].options = &hc->sanitize_opts;
  1027. hc->filter_list[1].filter = NULL;
  1028. hc->filter_list[1].options = NULL;
  1029. hc->page = page;
  1030. }
  1031. static void
  1032. pdf_clip_page_links(fz_context *ctx, struct clip_filter_state *hc)
  1033. {
  1034. pdf_obj *annots;
  1035. pdf_obj *link;
  1036. fz_rect area;
  1037. int k;
  1038. annots = pdf_dict_get(ctx, hc->page->obj, PDF_NAME(Annots));
  1039. k = 0;
  1040. while (k < pdf_array_len(ctx, annots))
  1041. {
  1042. link = pdf_array_get(ctx, annots, k);
  1043. if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
  1044. {
  1045. area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
  1046. if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
  1047. {
  1048. pdf_array_delete(ctx, annots, k);
  1049. continue;
  1050. }
  1051. }
  1052. ++k;
  1053. }
  1054. }
  1055. static void
  1056. pdf_clip_page_annotations(fz_context *ctx, struct clip_filter_state *hc)
  1057. {
  1058. pdf_annot *annot;
  1059. fz_rect area;
  1060. restart:
  1061. for (annot = pdf_first_annot(ctx, hc->page); annot; annot = pdf_next_annot(ctx, annot))
  1062. {
  1063. if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
  1064. {
  1065. area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
  1066. if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
  1067. {
  1068. pdf_delete_annot(ctx, hc->page, annot);
  1069. goto restart;
  1070. }
  1071. }
  1072. }
  1073. }
  1074. void
  1075. pdf_clip_page(fz_context *ctx, pdf_page *page, fz_rect *clip)
  1076. {
  1077. pdf_document *doc;
  1078. struct clip_filter_state hc;
  1079. if (page == NULL)
  1080. return;
  1081. doc = page->doc;
  1082. init_clip_filter(ctx, &hc, page, clip);
  1083. pdf_begin_operation(ctx, doc, "Apply hard clip to page");
  1084. fz_try(ctx)
  1085. {
  1086. pdf_filter_page_contents(ctx, doc, page, &hc.filter_opts);
  1087. pdf_clip_page_links(ctx, &hc);
  1088. pdf_clip_page_annotations(ctx, &hc);
  1089. pdf_end_operation(ctx, doc);
  1090. }
  1091. fz_catch(ctx)
  1092. {
  1093. pdf_abandon_operation(ctx, doc);
  1094. fz_rethrow(ctx);
  1095. }
  1096. }