| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "pdf-annot-imp.h"
- #include <string.h>
- #include <assert.h>
- static void
- pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *xobj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
- static void
- pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up);
- static void
- pdf_filter_resources(fz_context *ctx, pdf_document *doc, pdf_obj *in_res, pdf_obj *res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
- {
- pdf_obj *obj;
- int i, n;
- if (!options->recurse)
- return;
- /* ExtGState */
- obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
- if (obj)
- {
- n = pdf_dict_len(ctx, obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *smask = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
- if (smask)
- {
- pdf_obj *g = pdf_dict_get(ctx, smask, PDF_NAME(G));
- if (g)
- {
- /* Transparency group XObject */
- pdf_filter_xobject(ctx, doc, g, in_res, options, cycle_up);
- }
- }
- }
- }
- /* Pattern */
- obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
- if (obj)
- {
- n = pdf_dict_len(ctx, obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
- if (pat && pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
- {
- pdf_filter_xobject(ctx, doc, pat, in_res, options, cycle_up);
- }
- }
- }
- /* XObject */
- if (!options->instance_forms)
- {
- obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
- if (obj)
- {
- n = pdf_dict_len(ctx, obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
- if (xobj && pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)) == PDF_NAME(Form))
- {
- pdf_filter_xobject(ctx, doc, xobj, in_res, options, cycle_up);
- }
- }
- }
- }
- /* Font */
- obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
- if (obj)
- {
- n = pdf_dict_len(ctx, obj);
- for (i = 0; i < n; i++)
- {
- pdf_obj *font = pdf_dict_get_val(ctx, obj, i);
- if (font && pdf_dict_get(ctx, font, PDF_NAME(Subtype)) == PDF_NAME(Type3))
- {
- pdf_filter_type3(ctx, doc, font, in_res, options, cycle_up);
- }
- }
- }
- }
- /*
- Clean a content stream's rendering operations, with an optional post
- processing step.
- Firstly, this filters the PDF operators used to avoid (some cases of)
- repetition, and leaves the content stream in a balanced state with an
- unchanged top level matrix etc. At the same time, the resources actually
- used are collected into a new resource dictionary.
- Next, the resources themselves are recursively cleaned (as appropriate)
- in the same way, if the 'recurse' flag is set.
- */
- static void
- pdf_filter_content_stream(
- fz_context *ctx,
- pdf_document *doc,
- pdf_obj *in_stm,
- pdf_obj *in_res,
- fz_matrix transform,
- pdf_filter_options *options,
- int struct_parents,
- fz_buffer **out_buf,
- pdf_obj **out_res,
- pdf_cycle_list *cycle_up)
- {
- pdf_processor *proc_buffer = NULL;
- pdf_processor *top = NULL;
- pdf_processor **list = NULL;
- int num_filters = 0;
- int i;
- fz_var(proc_buffer);
- *out_buf = NULL;
- *out_res = NULL;
- if (options->filters)
- for (; options->filters[num_filters].filter != NULL; num_filters++);
- if (num_filters > 0)
- list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
- fz_try(ctx)
- {
- *out_buf = fz_new_buffer(ctx, 1024);
- top = proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, options->ascii, options->newlines);
- if (num_filters > 0)
- {
- for (i = num_filters - 1; i >= 0; i--)
- top = list[i] = options->filters[i].filter(ctx, doc, top, struct_parents, transform, options, options->filters[i].options);
- }
- pdf_process_contents(ctx, top, doc, in_res, in_stm, NULL, out_res);
- pdf_close_processor(ctx, top);
- pdf_filter_resources(ctx, doc, in_res, *out_res, options, cycle_up);
- }
- fz_always(ctx)
- {
- for (i = 0; i < num_filters; i++)
- pdf_drop_processor(ctx, list[i]);
- pdf_drop_processor(ctx, proc_buffer);
- fz_free(ctx, list);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, *out_buf);
- *out_buf = NULL;
- pdf_drop_obj(ctx, *out_res);
- *out_res = NULL;
- fz_rethrow(ctx);
- }
- }
- /*
- Clean a Type 3 font's CharProcs content streams. This works almost
- exactly like pdf_filter_content_stream, but the resource dictionary is
- shared between all off the CharProcs.
- */
- static void
- pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
- {
- pdf_cycle_list cycle;
- pdf_processor *proc_buffer = NULL;
- pdf_processor *proc_filter = NULL;
- pdf_obj *in_res;
- pdf_obj *out_res = NULL;
- pdf_obj *charprocs;
- int i, n;
- int num_filters = 0;
- pdf_processor **list = NULL;
- fz_buffer *buffer = NULL;
- pdf_processor *top = NULL;
- pdf_obj *res = NULL;
- fz_buffer *new_buf = NULL;
- fz_var(out_res);
- fz_var(proc_buffer);
- fz_var(proc_filter);
- fz_var(buffer);
- fz_var(res);
- fz_var(new_buf);
- /* We cannot combine instancing with type3 fonts. The new names for
- * instanced form/image resources would clash, since they start over for
- * each content stream. This is not a problem for now, because we only
- * use instancing with redaction, and redaction doesn't clean type3
- * fonts.
- */
- assert(!options->instance_forms);
- /* Avoid recursive cycles! */
- if (pdf_cycle(ctx, &cycle, cycle_up, obj))
- return;
- if (options->filters)
- for (; options->filters[num_filters].filter != NULL; num_filters++);
- if (num_filters > 0)
- list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *));
- fz_try(ctx)
- {
- in_res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
- if (!in_res)
- in_res = page_res;
- buffer = fz_new_buffer(ctx, 1024);
- top = proc_buffer = pdf_new_buffer_processor(ctx, buffer, options->ascii, options->newlines);
- if (num_filters > 0)
- {
- for (i = num_filters - 1; i >= 0; i--)
- top = list[i] = options->filters[i].filter(ctx, doc, top, -1, fz_identity, options, options->filters[i].options);
- }
- pdf_processor_push_resources(ctx, top, in_res);
- charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
- n = pdf_dict_len(ctx, charprocs);
- for (i = 0; i < n; i++)
- {
- pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
- if (i > 0)
- {
- pdf_reset_processor(ctx, top);
- fz_clear_buffer(ctx, buffer);
- }
- pdf_process_raw_contents(ctx, top, doc, in_res, val, NULL);
- pdf_close_processor(ctx, top);
- if (!options->no_update)
- {
- new_buf = fz_clone_buffer(ctx, buffer);
- pdf_update_stream(ctx, doc, val, new_buf, 0);
- fz_drop_buffer(ctx, new_buf);
- new_buf = NULL;
- }
- }
- }
- fz_always(ctx)
- {
- res = pdf_processor_pop_resources(ctx, top);
- for (i = 0; i < num_filters; i++)
- pdf_drop_processor(ctx, list[i]);
- pdf_drop_processor(ctx, proc_buffer);
- fz_free(ctx, list);
- fz_drop_buffer(ctx, new_buf);
- fz_drop_buffer(ctx, buffer);
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, res);
- fz_rethrow(ctx);
- }
- pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), res);
- }
- static void
- pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *stm, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up)
- {
- pdf_cycle_list cycle;
- int struct_parents;
- pdf_obj *new_res = NULL;
- fz_buffer *new_buf = NULL;
- pdf_obj *old_res;
- fz_var(new_buf);
- fz_var(new_res);
- // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
- struct_parents = pdf_dict_get_int_default(ctx, stm, PDF_NAME(StructParents), -1);
- old_res = pdf_dict_get(ctx, stm, PDF_NAME(Resources));
- if (!old_res)
- old_res = page_res;
- // TODO: don't clean objects more than once.
- /* Avoid recursive cycles! */
- if (pdf_cycle(ctx, &cycle, cycle_up, stm))
- return;
- fz_try(ctx)
- {
- pdf_filter_content_stream(ctx, doc, stm, old_res, fz_identity, options, struct_parents, &new_buf, &new_res, &cycle);
- if (!options->no_update)
- {
- pdf_update_stream(ctx, doc, stm, new_buf, 0);
- pdf_dict_put(ctx, stm, PDF_NAME(Resources), new_res);
- }
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, new_buf);
- pdf_drop_obj(ctx, new_res);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- pdf_obj *
- pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix transform, pdf_filter_options *options, pdf_cycle_list *cycle_up)
- {
- pdf_cycle_list cycle;
- pdf_document *doc = pdf_get_bound_document(ctx, old_xobj);
- pdf_obj *new_xobj;
- pdf_obj *new_res, *old_res;
- fz_buffer *new_buf;
- int struct_parents;
- fz_matrix matrix;
- fz_var(new_xobj);
- fz_var(new_buf);
- fz_var(new_res);
- // TODO for RJW: XObject can also be a StructParent; how do we handle that case?
- // TODO for RJW: will we run into trouble by duplicating StructParents stuff?
- struct_parents = pdf_dict_get_int_default(ctx, old_xobj, PDF_NAME(StructParents), -1);
- old_res = pdf_dict_get(ctx, old_xobj, PDF_NAME(Resources));
- if (!old_res)
- old_res = page_res;
- if (pdf_cycle(ctx, &cycle, cycle_up, old_xobj))
- return pdf_keep_obj(ctx, old_xobj);
- matrix = pdf_dict_get_matrix(ctx, old_xobj, PDF_NAME(Matrix));
- transform = fz_concat(matrix, transform);
- fz_try(ctx)
- {
- new_xobj = pdf_add_object_drop(ctx, doc, pdf_copy_dict(ctx, old_xobj));
- pdf_filter_content_stream(ctx, doc, old_xobj, old_res, transform, options, struct_parents, &new_buf, &new_res, &cycle);
- if (!options->no_update)
- {
- pdf_update_stream(ctx, doc, new_xobj, new_buf, 0);
- pdf_dict_put(ctx, new_xobj, PDF_NAME(Resources), new_res);
- }
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, new_buf);
- pdf_drop_obj(ctx, new_res);
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, new_xobj);
- fz_rethrow(ctx);
- }
- return new_xobj;
- }
- void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options)
- {
- pdf_obj *contents, *old_res;
- pdf_obj *new_res;
- fz_buffer *buffer;
- int struct_parents;
- struct_parents = pdf_dict_get_int_default(ctx, page->obj, PDF_NAME(StructParents), -1);
- contents = pdf_page_contents(ctx, page);
- old_res = pdf_page_resources(ctx, page);
- pdf_filter_content_stream(ctx, doc, contents, old_res, fz_identity, options, struct_parents, &buffer, &new_res, NULL);
- fz_try(ctx)
- {
- if (options->complete)
- options->complete(ctx, buffer, options->opaque);
- if (!options->no_update)
- {
- /* Always create a new stream object to replace the page contents. This is useful
- both if the contents is an array of streams, is entirely missing or if the contents
- are shared between pages. */
- contents = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1));
- pdf_dict_put_drop(ctx, page->obj, PDF_NAME(Contents), contents);
- pdf_update_stream(ctx, doc, contents, buffer, 0);
- pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), new_res);
- }
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, buffer);
- pdf_drop_obj(ctx, new_res);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options)
- {
- pdf_obj *ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP));
- if (pdf_is_dict(ctx, ap))
- {
- int i, n = pdf_dict_len(ctx, ap);
- for (i = 0; i < n; i++)
- {
- pdf_obj *stm = pdf_dict_get_val(ctx, ap, i);
- if (pdf_is_stream(ctx, stm))
- {
- pdf_filter_xobject(ctx, doc, stm, NULL, options, NULL);
- }
- }
- }
- }
- /* REDACTIONS */
- struct redact_filter_state {
- pdf_filter_options filter_opts;
- pdf_sanitize_filter_options sanitize_opts;
- pdf_filter_factory filter_list[2];
- pdf_page *page;
- pdf_annot *target; // NULL if all
- int line_art;
- int text;
- };
- static void
- pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, void *opaque)
- {
- struct redact_filter_state *red = opaque;
- pdf_page *page = red->page;
- pdf_annot *annot;
- pdf_obj *qp;
- int i, n;
- fz_append_string(ctx, buf, " 0 g\n");
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- fz_quad q = pdf_to_quad(ctx, qp, i);
- fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y);
- fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y);
- fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y);
- fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y);
- fz_append_string(ctx, buf, "f\n");
- }
- }
- else
- {
- fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0);
- fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0);
- fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1);
- fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1);
- fz_append_string(ctx, buf, "f\n");
- }
- }
- }
- }
- static int
- pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox)
- {
- struct redact_filter_state *red = opaque;
- pdf_page *page = red->page;
- pdf_annot *annot;
- pdf_obj *qp;
- fz_rect r;
- fz_quad q;
- int i, n;
- float w, h;
- trm = fz_concat(trm, ctm);
- bbox = fz_transform_rect(bbox, trm);
- /* Shrink character bbox a bit */
- w = bbox.x1 - bbox.x0;
- h = bbox.y1 - bbox.y0;
- bbox.x0 += w / 10;
- bbox.x1 -= w / 10;
- bbox.y0 += h / 10;
- bbox.y1 -= h / 10;
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- /* Note, we test for the intersection being a valid rectangle, NOT
- * a non-empty one. This is because we can have 'empty' character
- * boxes (say for diacritics), that while 0 width, do have a defined
- * position on the plane, and hence inclusion makes sense. */
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- q = pdf_to_quad(ctx, qp, i);
- r = fz_rect_from_quad(q);
- if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
- return 1;
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- if (fz_is_valid_rect(fz_intersect_rect(bbox, r)))
- return 1;
- }
- }
- }
- return 0;
- }
- static fz_pixmap *
- pdf_redact_image_imp(fz_context *ctx, fz_matrix ctm, fz_image *image, fz_pixmap *pixmap, fz_pixmap **pmask, fz_quad q)
- {
- fz_matrix inv_ctm;
- fz_irect r;
- int x, y, k, n, bpp;
- unsigned char white;
- fz_pixmap *mask = *pmask;
- int pixmap_cloned = 0;
- if (!pixmap)
- {
- fz_pixmap *original = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL);
- int imagemask = image->imagemask;
- fz_try(ctx)
- {
- pixmap = fz_clone_pixmap(ctx, original);
- if (imagemask)
- fz_invert_pixmap_alpha(ctx, pixmap);
- }
- fz_always(ctx)
- fz_drop_pixmap(ctx, original);
- fz_catch(ctx)
- fz_rethrow(ctx);
- pixmap_cloned = 1;
- }
- if (!mask && image->mask)
- {
- fz_pixmap *original = fz_get_pixmap_from_image(ctx, image->mask, NULL, NULL, NULL, NULL);
- fz_try(ctx)
- {
- mask = fz_clone_pixmap(ctx, original);
- *pmask = mask;
- }
- fz_always(ctx)
- {
- fz_drop_pixmap(ctx, original);
- }
- fz_catch(ctx)
- {
- if (pixmap_cloned)
- fz_drop_pixmap(ctx, pixmap);
- fz_rethrow(ctx);
- }
- }
- /* If we have a 1x1 image, to which a mask is being applied
- * then it's the mask we really want to change, not the
- * image. We might have just a small section of the image
- * being covered, and setting the whole thing to white
- * will blank stuff outside the desired area. */
- if (!mask || pixmap->w > 1 || pixmap->h > 1)
- {
- n = pixmap->n - pixmap->alpha;
- bpp = pixmap->n;
- if (fz_colorspace_is_subtractive(ctx, pixmap->colorspace))
- white = 0;
- else
- white = 255;
- inv_ctm = fz_post_scale(fz_invert_matrix(ctm), pixmap->w, pixmap->h);
- r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
- r.x0 = fz_clampi(r.x0, 0, pixmap->w);
- r.x1 = fz_clampi(r.x1, 0, pixmap->w);
- r.y1 = fz_clampi(pixmap->h - r.y1, 0, pixmap->h);
- r.y0 = fz_clampi(pixmap->h - r.y0, 0, pixmap->h);
- for (y = r.y1; y < r.y0; ++y)
- {
- for (x = r.x0; x < r.x1; ++x)
- {
- unsigned char *s = &pixmap->samples[(size_t)y * pixmap->stride + (size_t)x * bpp];
- for (k = 0; k < n; ++k)
- s[k] = white;
- if (pixmap->alpha)
- s[k] = 255;
- }
- }
- }
- if (mask)
- {
- inv_ctm = fz_post_scale(fz_invert_matrix(ctm), mask->w, mask->h);
- r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm));
- r.x0 = fz_clampi(r.x0, 0, mask->w);
- r.x1 = fz_clampi(r.x1, 0, mask->w);
- r.y1 = fz_clampi(mask->h - r.y1, 0, mask->h);
- r.y0 = fz_clampi(mask->h - r.y0, 0, mask->h);
- for (y = r.y1; y < r.y0; ++y)
- {
- unsigned char *s = &mask->samples[(size_t)y * mask->stride + (size_t)r.x0];
- memset(s, 0xff, r.x1-r.x0);
- }
- }
- return pixmap;
- }
- static fz_image *
- pdf_redact_image_filter_remove(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
- {
- fz_pixmap *redacted = NULL;
- struct redact_filter_state *red = opaque;
- pdf_page *page = red->page;
- pdf_annot *annot;
- pdf_obj *qp;
- fz_rect area;
- fz_rect r;
- int i, n;
- fz_var(redacted);
- area = fz_transform_rect(fz_unit_rect, ctm);
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
- r = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(r))
- return NULL;
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- r = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(r))
- return NULL;
- }
- }
- }
- return fz_keep_image(ctx, image);
- }
- static fz_image *
- pdf_redact_image_filter_remove_invisible(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
- {
- fz_pixmap *redacted = NULL;
- struct redact_filter_state *red = opaque;
- pdf_page *page = red->page;
- pdf_annot *annot;
- pdf_obj *qp;
- fz_rect area;
- fz_rect r;
- int i, n;
- fz_var(redacted);
- area = fz_transform_rect(fz_unit_rect, ctm);
- /* Restrict the are of the image to that which can actually be seen. */
- area = fz_intersect_rect(area, clip);
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i));
- r = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(r))
- return NULL;
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- r = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(r))
- return NULL;
- }
- }
- }
- return fz_keep_image(ctx, image);
- }
- static fz_image *
- pdf_redact_image_filter_pixels(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip)
- {
- fz_pixmap *redacted = NULL;
- fz_pixmap *mask = NULL;
- struct redact_filter_state *red = opaque;
- pdf_page *page = red->page;
- pdf_annot *annot;
- pdf_obj *qp;
- fz_quad area, q;
- fz_rect r;
- int i, n;
- fz_var(redacted);
- fz_var(mask);
- area = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm);
- /* First see if we can redact the image completely */
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- q = pdf_to_quad(ctx, qp, i);
- if (fz_is_quad_inside_quad(area, q))
- return NULL;
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- q = fz_quad_from_rect(r);
- if (fz_is_quad_inside_quad(area, q))
- return NULL;
- }
- }
- }
- /* Blank out redacted parts of the image if necessary */
- fz_try(ctx)
- {
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- q = pdf_to_quad(ctx, qp, i);
- if (fz_is_quad_intersecting_quad(area, q))
- redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- q = fz_quad_from_rect(r);
- if (fz_is_quad_intersecting_quad(area, q))
- redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q);
- }
- }
- }
- }
- fz_catch(ctx)
- {
- fz_drop_pixmap(ctx, redacted);
- fz_drop_pixmap(ctx, mask);
- fz_rethrow(ctx);
- }
- if (redacted)
- {
- int imagemask = image->imagemask;
- fz_image *imask = fz_keep_image(ctx, image->mask);
- fz_var(imask);
- fz_try(ctx)
- {
- if (mask)
- {
- fz_drop_image(ctx, imask);
- imask = NULL;
- imask = fz_new_image_from_pixmap(ctx, mask, NULL);
- }
- image = fz_new_image_from_pixmap(ctx, redacted, NULL);
- image->imagemask = imagemask;
- image->mask = imask;
- imask = NULL;
- }
- fz_always(ctx)
- {
- fz_drop_pixmap(ctx, redacted);
- fz_drop_pixmap(ctx, mask);
- fz_drop_image(ctx, imask);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- return image;
- }
- return fz_keep_image(ctx, image);
- }
- /* Returns 0 if area does not intersect with any of our redactions.
- * Returns 2 if area is completely included within one of our redactions.
- * Returns 1 otherwise. */
- static int
- rect_touches_redactions(fz_context *ctx, fz_rect area, struct redact_filter_state *red)
- {
- pdf_annot *annot;
- pdf_obj *qp;
- fz_quad q;
- fz_rect r, s;
- int i, n;
- pdf_page *page = red->page;
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (red->target != NULL && red->target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
- n = pdf_array_len(ctx, qp);
- if (n > 0)
- {
- for (i = 0; i < n; i += 8)
- {
- q = pdf_to_quad(ctx, qp, i);
- r = fz_rect_from_quad(q);
- s = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(s))
- {
- if (fz_contains_rect(r, area))
- return 2;
- return 1;
- }
- }
- }
- else
- {
- r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
- s = fz_intersect_rect(r, area);
- if (!fz_is_empty_rect(s))
- {
- if (fz_contains_rect(r, area))
- return 2;
- return 1;
- }
- }
- }
- }
- return 0;
- }
- static void
- pdf_redact_page_links(fz_context *ctx, struct redact_filter_state *red)
- {
- pdf_obj *annots;
- pdf_obj *link;
- fz_rect area;
- int k;
- annots = pdf_dict_get(ctx, red->page->obj, PDF_NAME(Annots));
- k = 0;
- while (k < pdf_array_len(ctx, annots))
- {
- link = pdf_array_get(ctx, annots, k);
- if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
- {
- area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
- if (rect_touches_redactions(ctx, area, red))
- {
- pdf_array_delete(ctx, annots, k);
- continue;
- }
- }
- ++k;
- }
- }
- static void
- pdf_redact_page_annotations(fz_context *ctx, struct redact_filter_state *red)
- {
- pdf_annot *annot;
- fz_rect area;
- restart:
- for (annot = pdf_first_annot(ctx, red->page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
- {
- area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
- if (rect_touches_redactions(ctx, area, red))
- {
- pdf_delete_annot(ctx, red->page, annot);
- goto restart;
- }
- }
- }
- }
- static int culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
- {
- struct redact_filter_state *red = opaque;
- switch (type)
- {
- case FZ_CULL_PATH_FILL:
- case FZ_CULL_PATH_STROKE:
- case FZ_CULL_PATH_FILL_STROKE:
- case FZ_CULL_CLIP_PATH_FILL:
- case FZ_CULL_CLIP_PATH_STROKE:
- case FZ_CULL_CLIP_PATH_FILL_STROKE:
- if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_COVERED)
- return (rect_touches_redactions(ctx, bbox, red) == 2);
- else if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED)
- return (rect_touches_redactions(ctx, bbox, red) != 0);
- return 0;
- default:
- return 0;
- }
- }
- static
- void init_redact_filter(fz_context *ctx, pdf_redact_options *redact_opts, struct redact_filter_state *red, pdf_page *page, pdf_annot *target)
- {
- int black_boxes = redact_opts ? redact_opts->black_boxes : 0;
- int image_method = redact_opts ? redact_opts->image_method : PDF_REDACT_IMAGE_PIXELS;
- int line_art = redact_opts ? redact_opts->line_art : PDF_REDACT_LINE_ART_NONE;
- int text = redact_opts ? redact_opts->text : PDF_REDACT_TEXT_REMOVE;
- memset(&red->filter_opts, 0, sizeof red->filter_opts);
- memset(&red->sanitize_opts, 0, sizeof red->sanitize_opts);
- red->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
- red->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
- red->filter_opts.ascii = 1;
- red->filter_opts.opaque = red;
- red->filter_opts.filters = red->filter_list;
- if (black_boxes)
- red->filter_opts.complete = pdf_redact_end_page;
- red->line_art = line_art;
- red->text = text;
- red->sanitize_opts.opaque = red;
- if (text == PDF_REDACT_TEXT_REMOVE)
- red->sanitize_opts.text_filter = pdf_redact_text_filter;
- if (image_method == PDF_REDACT_IMAGE_PIXELS)
- red->sanitize_opts.image_filter = pdf_redact_image_filter_pixels;
- if (image_method == PDF_REDACT_IMAGE_REMOVE)
- red->sanitize_opts.image_filter = pdf_redact_image_filter_remove;
- if (image_method == PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE)
- red->sanitize_opts.image_filter = pdf_redact_image_filter_remove_invisible;
- red->sanitize_opts.culler = culler;
- red->filter_list[0].filter = pdf_new_sanitize_filter;
- red->filter_list[0].options = &red->sanitize_opts;
- red->filter_list[1].filter = NULL;
- red->filter_list[1].options = NULL;
- red->page = page;
- red->target = target;
- }
- static int
- pdf_apply_redaction_imp(fz_context *ctx, pdf_page *page, pdf_annot *target, pdf_redact_options *redact_opts)
- {
- pdf_annot *annot;
- int has_redactions = 0;
- struct redact_filter_state red;
- pdf_document *doc = page->doc;
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) {
- if (target != NULL && target != annot)
- continue;
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- has_redactions = 1;
- }
- if (!has_redactions)
- return 0;
- init_redact_filter(ctx, redact_opts, &red, page, target);
- if (target)
- pdf_begin_operation(ctx, doc, "Apply redaction");
- else
- pdf_begin_operation(ctx, doc, "Apply redactions on page");
- fz_try(ctx)
- {
- pdf_filter_page_contents(ctx, doc, page, &red.filter_opts);
- pdf_redact_page_links(ctx, &red);
- pdf_redact_page_annotations(ctx, &red);
- annot = pdf_first_annot(ctx, page);
- while (annot)
- {
- if (target == NULL || annot == target)
- {
- if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
- {
- pdf_delete_annot(ctx, page, annot);
- annot = pdf_first_annot(ctx, page);
- continue;
- }
- }
- annot = pdf_next_annot(ctx, annot);
- }
- doc->redacted = 1;
- pdf_end_operation(ctx, doc);
- }
- fz_catch(ctx)
- {
- pdf_abandon_operation(ctx, doc);
- fz_rethrow(ctx);
- }
- return 1;
- }
- int
- pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *redact_opts)
- {
- if (page == NULL || page->doc != doc)
- fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't redact a page not from the doc");
- return pdf_apply_redaction_imp(ctx, page, NULL, redact_opts);
- }
- int
- pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *redact_opts)
- {
- return pdf_apply_redaction_imp(ctx, annot->page, annot, redact_opts);
- }
- /* Hard clipping of pages */
- struct clip_filter_state {
- pdf_filter_options filter_opts;
- pdf_sanitize_filter_options sanitize_opts;
- pdf_filter_factory filter_list[2];
- pdf_page *page;
- fz_rect clip;
- };
- static int clip_culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type)
- {
- struct clip_filter_state *hc = opaque;
- switch (type)
- {
- case FZ_CULL_PATH_FILL:
- case FZ_CULL_PATH_STROKE:
- case FZ_CULL_PATH_FILL_STROKE:
- case FZ_CULL_CLIP_PATH_FILL:
- case FZ_CULL_CLIP_PATH_STROKE:
- case FZ_CULL_CLIP_PATH_FILL_STROKE:
- case FZ_CULL_GLYPH:
- case FZ_CULL_IMAGE:
- case FZ_CULL_SHADING:
- return (fz_is_empty_rect(fz_intersect_rect(bbox, hc->clip)));
- default:
- return 0;
- }
- }
- static
- void init_clip_filter(fz_context *ctx, struct clip_filter_state *hc, pdf_page *page, fz_rect *clip)
- {
- memset(&hc->filter_opts, 0, sizeof hc->filter_opts);
- memset(&hc->sanitize_opts, 0, sizeof hc->sanitize_opts);
- hc->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */
- hc->filter_opts.instance_forms = 1; /* redact xobjects with instancing */
- hc->filter_opts.ascii = 0;
- hc->filter_opts.opaque = hc;
- hc->filter_opts.filters = hc->filter_list;
- hc->clip = *clip;
- hc->sanitize_opts.opaque = hc;
- hc->sanitize_opts.culler = clip_culler;
- hc->filter_list[0].filter = pdf_new_sanitize_filter;
- hc->filter_list[0].options = &hc->sanitize_opts;
- hc->filter_list[1].filter = NULL;
- hc->filter_list[1].options = NULL;
- hc->page = page;
- }
- static void
- pdf_clip_page_links(fz_context *ctx, struct clip_filter_state *hc)
- {
- pdf_obj *annots;
- pdf_obj *link;
- fz_rect area;
- int k;
- annots = pdf_dict_get(ctx, hc->page->obj, PDF_NAME(Annots));
- k = 0;
- while (k < pdf_array_len(ctx, annots))
- {
- link = pdf_array_get(ctx, annots, k);
- if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link))
- {
- area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect));
- if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
- {
- pdf_array_delete(ctx, annots, k);
- continue;
- }
- }
- ++k;
- }
- }
- static void
- pdf_clip_page_annotations(fz_context *ctx, struct clip_filter_state *hc)
- {
- pdf_annot *annot;
- fz_rect area;
- restart:
- for (annot = pdf_first_annot(ctx, hc->page); annot; annot = pdf_next_annot(ctx, annot))
- {
- if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT)
- {
- area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect));
- if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip)))
- {
- pdf_delete_annot(ctx, hc->page, annot);
- goto restart;
- }
- }
- }
- }
- void
- pdf_clip_page(fz_context *ctx, pdf_page *page, fz_rect *clip)
- {
- pdf_document *doc;
- struct clip_filter_state hc;
- if (page == NULL)
- return;
- doc = page->doc;
- init_clip_filter(ctx, &hc, page, clip);
- pdf_begin_operation(ctx, doc, "Apply hard clip to page");
- fz_try(ctx)
- {
- pdf_filter_page_contents(ctx, doc, page, &hc.filter_opts);
- pdf_clip_page_links(ctx, &hc);
- pdf_clip_page_annotations(ctx, &hc);
- pdf_end_operation(ctx, doc);
- }
- fz_catch(ctx)
- {
- pdf_abandon_operation(ctx, doc);
- fz_rethrow(ctx);
- }
- }
|