| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "mupdf/pdf.h"
- /* Define the following for some debugging output. */
- #undef DEBUG_SUBSETTING
- typedef struct gstate
- {
- struct gstate *next;
- int current_font;
- pdf_font_desc *font;
- } gstate;
- typedef struct resources_stack
- {
- struct resources_stack *next;
- pdf_obj *res;
- } resources_stack;
- typedef struct
- {
- int num;
- int gen;
- int is_ttf;
- int is_cidfont;
- pdf_obj *fontfile;
- unsigned char digest[16];
- fz_int_heap gids;
- fz_int_heap cids;
- /* Pointers back to the top level fonts that refer to this. */
- int max;
- int len;
- pdf_obj **font;
- } font_usage_t;
- typedef struct
- {
- int max;
- int len;
- font_usage_t *font;
- } fonts_usage_t;
- typedef struct
- {
- pdf_processor super;
- resources_stack *rstack;
- fonts_usage_t *usage;
- gstate *gs;
- } pdf_font_analysis_processor;
- static void
- pop_gstate(fz_context *ctx, pdf_font_analysis_processor *p)
- {
- gstate *gs = p->gs;
- gstate *old;
- if (gs == NULL)
- return;
- old = gs->next;
- pdf_drop_font(ctx, gs->font);
- fz_free(ctx, gs);
- p->gs = old;
- }
- static void
- drop_processor(fz_context *ctx, pdf_processor *proc)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- while (p->rstack)
- {
- resources_stack *stk = p->rstack;
- p->rstack = stk->next;
- pdf_drop_obj(ctx, stk->res);
- fz_free(ctx, stk);
- }
- while (p->gs)
- pop_gstate(ctx, p);
- }
- static void
- push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
- resources_stack *stk = fz_malloc_struct(ctx, resources_stack);
- stk->next = p->rstack;
- p->rstack = stk;
- fz_try(ctx)
- {
- stk->res = pdf_keep_obj(ctx, res);
- }
- fz_catch(ctx)
- {
- pdf_drop_obj(ctx, stk->res);
- p->rstack = stk->next;
- fz_free(ctx, stk);
- fz_rethrow(ctx);
- }
- }
- static pdf_obj *
- pop_resources(fz_context *ctx, pdf_processor *proc)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
- resources_stack *stk = p->rstack;
- pdf_obj *res = p->rstack->res;
- p->rstack = stk->next;
- fz_free(ctx, stk);
- return res;
- }
- static void
- font_analysis_Q(fz_context *ctx, pdf_processor *proc)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- pop_gstate(ctx, p);
- }
- static void
- font_analysis_q(fz_context *ctx, pdf_processor *proc)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- gstate *gs = p->gs;
- gstate *new_gs = fz_malloc_struct(ctx, gstate);
- p->gs = new_gs;
- if (gs)
- {
- *new_gs = *gs;
- new_gs->next = gs;
- }
- pdf_keep_font(ctx, new_gs->font);
- }
- static void
- font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- pdf_obj *dict = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name);
- pdf_obj *subtype, *fontdesc;
- pdf_obj *fontfile = NULL;
- pdf_obj *key;
- int num, gen, i;
- int is_cidfont = 0;
- int is_ttf = 0;
- unsigned char digest[16];
- p->gs->current_font = -1; /* unknown font! */
- if (dict == NULL)
- return;
- /* We can have multiple fonts that rely on the same underlying fontfile
- * object. Therefore, resolve down to that. */
- subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
- if (subtype == PDF_NAME(Type1) || subtype == PDF_NAME(MMType1))
- {
- // fontfile subtype should be Type1C for us to be able to subset it
- key = PDF_NAME(FontFile);
- fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
- fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile));
- is_cidfont = 0;
- is_ttf = 0;
- }
- else if (subtype == PDF_NAME(TrueType))
- {
- key = PDF_NAME(FontFile2);
- fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
- fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
- is_cidfont = 0;
- is_ttf = 1;
- }
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
- {
- dict = pdf_array_get(ctx, pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)), 0);
- subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
- fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
- if (subtype == PDF_NAME(CIDFontType0))
- {
- // fontfile subtype is either CIDFontType0C or OpenType
- key = PDF_NAME(FontFile3);
- fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile3));
- subtype = pdf_dict_get(ctx, fontfile, PDF_NAME(Subtype));
- if (subtype == PDF_NAME(CIDFontType0C))
- {
- is_cidfont = 1;
- is_ttf = 0;
- }
- else if (subtype == PDF_NAME(OpenType))
- {
- is_cidfont = 1;
- is_ttf = 1;
- }
- else
- {
- fontfile = NULL;
- }
- }
- else if (subtype == PDF_NAME(CIDFontType2))
- {
- key = PDF_NAME(FontFile2);
- fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
- is_cidfont = 1;
- is_ttf = 1;
- }
- }
- if (!fontfile)
- {
- #ifdef DEBUG_SUBSETTING
- fz_write_printf(ctx, fz_stddbg(ctx), "No embedded file found for font of subtype %s\n", pdf_to_name(ctx, subtype));
- #endif
- return;
- }
- num = pdf_to_num(ctx, fontfile);
- gen = pdf_to_gen(ctx, fontfile);
- for (i = 0; i < p->usage->len; i++)
- {
- if (p->usage->font[i].num == num &&
- p->usage->font[i].gen == gen)
- break;
- }
- fz_font_digest(ctx, font->font, digest);
- /* Check for duplicate fonts. (Fonts in the document that have
- * the font stream included multiple times as different objects).
- * This can happen with naive insertion routines. */
- if (i == p->usage->len)
- {
- for (i = 0; i < p->usage->len; i++)
- {
- if (memcmp(digest, p->usage->font[i].digest, 16) == 0)
- {
- pdf_dict_put(ctx, fontdesc, key, p->usage->font[i].fontfile);
- break;
- }
- }
- }
- pdf_drop_font(ctx, p->gs->font);
- p->gs->font = pdf_keep_font(ctx, font);
- p->gs->current_font = i;
- if (i < p->usage->len)
- {
- int j;
- for (j = 0; j < p->usage->font[i].len; j++)
- {
- if (pdf_objcmp(ctx, p->usage->font[i].font[j], dict) == 0)
- return;
- }
- if (p->usage->font[i].len == p->usage->font[i].max)
- {
- int newmax = p->usage->font[i].max * 2;
- p->usage->font[i].font = fz_realloc(ctx, p->usage->font[i].font, sizeof(*p->usage->font[i].font) * newmax);
- p->usage->font[i].max = newmax;
- }
- p->usage->font[i].font[j] = pdf_keep_obj(ctx, dict);
- p->usage->font[i].len++;
- return;
- }
- if (p->usage->max == p->usage->len)
- {
- int n = p->usage->max * 2;
- if (n == 0)
- n = 32;
- p->usage->font = (font_usage_t *)fz_realloc(ctx, p->usage->font, sizeof(*p->usage->font) * n);
- p->usage->max = n;
- }
- p->usage->font[i].is_ttf = is_ttf;
- p->usage->font[i].is_cidfont = is_cidfont;
- p->usage->font[i].fontfile = pdf_keep_obj(ctx, fontfile);
- p->usage->font[i].num = num;
- p->usage->font[i].gen = gen;
- p->usage->font[i].cids.len = 0;
- p->usage->font[i].cids.max = 0;
- p->usage->font[i].cids.heap = NULL;
- p->usage->font[i].gids.len = 0;
- p->usage->font[i].gids.max = 0;
- p->usage->font[i].gids.heap = NULL;
- p->usage->font[i].len = 0;
- p->usage->font[i].max = 0;
- p->usage->font[i].font = NULL;
- memcpy(p->usage->font[i].digest, digest, 16);
- p->usage->len++;
- p->usage->font[i].font = fz_malloc(ctx, sizeof(*p->usage->font[i].font) * 4);
- p->usage->font[i].len = 1;
- p->usage->font[i].max = 4;
- p->usage->font[i].font[0] = pdf_keep_obj(ctx, dict);
- }
- static void
- show_char(fz_context *ctx, font_usage_t *font, int cid, int gid)
- {
- fz_int_heap_insert(ctx, &font->cids, cid);
- fz_int_heap_insert(ctx, &font->gids, gid);
- }
- static void
- show_string(fz_context *ctx, pdf_font_analysis_processor *p, unsigned char *buf, size_t len)
- {
- gstate *gs = p->gs;
- pdf_font_desc *fontdesc = gs->font;
- size_t pos = 0;
- font_usage_t *font;
- // Not an embedded font!
- if (gs->current_font < 0 || fontdesc == NULL)
- return;
- font = &p->usage->font[gs->current_font];
- while (pos < len)
- {
- unsigned int cpt;
- int inc = pdf_decode_cmap(fontdesc->encoding, &buf[pos], &buf[len], &cpt);
- int cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
- if (cid >= 0)
- {
- int gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
- show_char(ctx, font, cid, gid);
- }
- pos += inc;
- }
- }
- static void
- show_text(fz_context *ctx, pdf_font_analysis_processor *p, pdf_obj *text)
- {
- gstate *gs = p->gs;
- pdf_font_desc *fontdesc;
- int i, n;
- if (!gs)
- return;
- fontdesc = gs->font;
- if (!fontdesc)
- return;
- if (pdf_is_string(ctx, text))
- {
- show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text));
- }
- else if (pdf_is_array(ctx, text))
- {
- n = pdf_array_len(ctx, text);
- for (i = 0; i < n; i++)
- {
- pdf_obj *item = pdf_array_get(ctx, text, i);
- if (pdf_is_string(ctx, item))
- {
- show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, item), pdf_to_str_len(ctx, item));
- }
- }
- }
- }
- static void
- font_analysis_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- show_text(ctx, p, array);
- }
- static void
- font_analysis_Tj(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
- {
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- show_string(ctx, p, (unsigned char *)str, len);
- }
- static void
- font_analysis_squote(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
- {
- /* Note, we convert all T' operators to (maybe) a T* and a Tj */
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- show_string(ctx, p, (unsigned char *)str, len);
- }
- static void
- font_analysis_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len)
- {
- /* Note, we convert all T" operators to (maybe) a T*,
- * (maybe) Tc, (maybe) Tw and a Tj. */
- pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
- show_string(ctx, p, (unsigned char*)str, len);
- }
- static void
- font_analysis_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj)
- {
- pdf_font_analysis_processor *pr = (pdf_font_analysis_processor *)proc;
- pdf_document *doc = pdf_get_bound_document(ctx, xobj);
- pdf_obj *resources = pdf_xobject_resources(ctx, xobj);
- if (!resources)
- resources = pr->rstack->res;
- pdf_process_contents(ctx, (pdf_processor*)pr, doc, resources, xobj, NULL, NULL);
- }
- static pdf_processor *
- pdf_new_font_analysis_processor(fz_context *ctx, fonts_usage_t *usage)
- {
- pdf_font_analysis_processor *proc = (pdf_font_analysis_processor *)pdf_new_processor(ctx, sizeof *proc);
- proc->super.drop_processor = drop_processor;
- proc->super.push_resources = push_resources;
- proc->super.pop_resources = pop_resources;
- proc->super.op_Do_form = font_analysis_Do_form;
- proc->super.op_Tf = font_analysis_Tf;
- proc->super.op_Tj = font_analysis_Tj;
- proc->super.op_TJ = font_analysis_TJ;
- proc->super.op_squote = font_analysis_squote;
- proc->super.op_dquote = font_analysis_dquote;
- proc->super.op_q = font_analysis_q;
- proc->super.op_Q = font_analysis_Q;
- fz_try(ctx)
- proc->gs = fz_malloc_struct(ctx, gstate);
- fz_catch(ctx)
- {
- fz_free(ctx, proc);
- fz_rethrow(ctx);
- }
- proc->gs->current_font = -1; // no font set yet
- proc->usage = usage;
- return &proc->super;
- }
- static void
- examine_page(fz_context *ctx, pdf_document *doc, pdf_page *page, fonts_usage_t *usage)
- {
- pdf_processor *proc = pdf_new_font_analysis_processor(ctx, usage);
- pdf_obj *contents = pdf_page_contents(ctx, page);
- pdf_obj *resources = pdf_page_resources(ctx, page);
- pdf_annot *annot, *widget;
- fz_try(ctx)
- {
- pdf_process_contents(ctx, proc, doc, resources, contents, NULL, NULL);
- pdf_processor_push_resources(ctx, proc, resources);
- for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
- pdf_process_annot(ctx, proc, annot, NULL);
- for (widget = pdf_first_widget(ctx, page); widget; widget = pdf_next_widget(ctx, widget))
- pdf_process_annot(ctx, proc, widget, NULL);
- pdf_close_processor(ctx, proc);
- }
- fz_always(ctx)
- {
- pdf_drop_processor(ctx, proc);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
- static void
- subset_ttf(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
- {
- fz_buffer *buf = pdf_load_stream(ctx, fontfile);
- fz_buffer *newbuf = NULL;
- if (buf->len == 0)
- {
- fz_drop_buffer(ctx, buf);
- return;
- }
- fz_var(newbuf);
- fz_try(ctx)
- {
- newbuf = fz_subset_ttf_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
- pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
- pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, newbuf);
- fz_drop_buffer(ctx, buf);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- subset_cff(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
- {
- fz_buffer *buf = pdf_load_stream(ctx, fontfile);
- fz_buffer *newbuf = NULL;
- if (buf->len == 0)
- {
- fz_drop_buffer(ctx, buf);
- return;
- }
- fz_var(newbuf);
- fz_try(ctx)
- {
- newbuf = fz_subset_cff_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
- pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
- pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, newbuf);
- fz_drop_buffer(ctx, buf);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- }
- static void
- do_adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font, int n)
- {
- pdf_obj *obj = font->font[n];
- int old_firstchar = pdf_dict_get_int(ctx, obj, PDF_NAME(FirstChar));
- pdf_obj *old_widths = pdf_dict_get(ctx, obj, PDF_NAME(Widths));
- int new_firstchar = font->cids.heap[0];
- int new_lastchar = font->cids.heap[font->cids.len-1];
- pdf_obj *widths;
- int i;
- pdf_dict_put_int(ctx, obj, PDF_NAME(FirstChar), new_firstchar);
- pdf_dict_put_int(ctx, obj, PDF_NAME(LastChar), new_lastchar);
- if (old_widths)
- {
- int j = 0;
- widths = pdf_new_array(ctx, doc, new_lastchar - new_firstchar + 1);
- for (i = new_firstchar; i <= new_lastchar; i++)
- {
- if (font->cids.heap[j] == i)
- {
- pdf_array_push_int(ctx, widths, pdf_array_get_int(ctx, old_widths, i - old_firstchar));
- j++;
- }
- else
- pdf_array_push_int(ctx, widths, 0);
- }
- pdf_dict_put_drop(ctx, obj, PDF_NAME(Widths), widths);
- }
- }
- static void
- adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font)
- {
- int i;
- for (i = 0; i < font->len; i++)
- do_adjust_simple_font(ctx, doc, font, i);
- }
- static pdf_obj *
- get_fontdesc(fz_context *ctx, pdf_obj *font)
- {
- pdf_obj *fontdesc = pdf_dict_get(ctx, font, PDF_NAME(FontDescriptor));
- if (fontdesc)
- return fontdesc;
- return pdf_dict_get(ctx, pdf_array_get(ctx, pdf_dict_get(ctx, font, PDF_NAME(DescendantFonts)), 0), PDF_NAME(FontDescriptor));
- }
- static void
- prefix_font_name(fz_context *ctx, pdf_document *doc, pdf_obj *font, pdf_obj *file)
- {
- fz_buffer *buf;
- uint32_t digest[4], v;
- pdf_obj *fontdesc = get_fontdesc(ctx, font);
- const char *name = pdf_dict_get_name(ctx, fontdesc, PDF_NAME(FontName));
- char new_name[256];
- size_t len;
- /* If there is no name, just exit. Possibly should throw here. */
- if (name == NULL)
- return;
- len = strlen(name);
- if (len > 6 && name[6] == '+')
- return; /* Already a subset name */
- buf = pdf_load_stream(ctx, file);
- fz_md5_buffer(ctx, buf, (uint8_t *)digest);
- fz_drop_buffer(ctx, buf);
- v = digest[0] ^ digest[1] ^ digest[2] ^ digest[3];
- new_name[0] = 'A' + (v % 26);
- v /= 26;
- new_name[1] = 'A' + (v % 26);
- v /= 26;
- new_name[2] = 'A' + (v % 26);
- v /= 26;
- new_name[3] = 'A' + (v % 26);
- v /= 26;
- new_name[4] = 'A' + (v % 26);
- v /= 26;
- new_name[5] = 'A' + (v % 26);
- new_name[6] = '+';
- memcpy(new_name+7, name, len > sizeof(new_name)-8 ? sizeof(new_name)-8 : len+1);
- new_name[sizeof(new_name)-1] = 0;
- pdf_dict_put_name(ctx, fontdesc, PDF_NAME(FontName), new_name);
- }
- static int
- get_symbolic(fz_context *ctx, font_usage_t *font)
- {
- int i, flags, symbolic, symbolic2;
- pdf_obj *fontdesc;
- if (!font || font->len == 0)
- return 0;
- fontdesc = pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor));
- flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
- symbolic = (!!(flags & 4)) | ((flags & 32) == 0);
- for (i = 1; i < font->len; i++)
- {
- fontdesc = pdf_dict_get(ctx, font->font[i], PDF_NAME(FontDescriptor));
- flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
- symbolic2 = (!!(flags & 4)) | ((flags & 32) == 0);
- if (symbolic != symbolic2)
- {
- fz_warn(ctx, "Font cannot be both symbolic and non-symbolic. Skipping subsetting.");
- return -1;
- }
- }
- return symbolic;
- }
- static pdf_obj *get_subtype(fz_context *ctx, font_usage_t *font)
- {
- /* If we can get the subtype from the fontfile, great. Use that. */
- pdf_obj *subtype = pdf_dict_get(ctx, font->fontfile, PDF_NAME(Subtype));
- int i;
- if (subtype != NULL)
- return subtype;
- /* Otherwise we'll have to get it from the font objects, and they'd
- * all better agree. */
- if (font->len == 0)
- return NULL;
- subtype = pdf_dict_get(ctx, font->font[0], PDF_NAME(Subtype));
- for (i = 1; i < font->len; i++)
- {
- pdf_obj *subtype2 = pdf_dict_get(ctx, font->font[i], PDF_NAME(Subtype));
- if (pdf_objcmp(ctx, subtype, subtype2))
- return NULL;
- }
- return subtype;
- }
- void
- pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int len, const int *pages)
- {
- int i, j;
- pdf_page *page = NULL;
- fonts_usage_t usage = { 0 };
- fz_var(page);
- fz_try(ctx)
- {
- if (len == 0)
- {
- /* Process every page. */
- len = pdf_count_pages(ctx, doc);
- for (i = 0; i < len; i++)
- {
- page = pdf_load_page(ctx, doc, i);
- examine_page(ctx, doc, page, &usage);
- fz_drop_page(ctx, (fz_page *)page);
- page = NULL;
- }
- }
- else
- {
- /* Process just the pages we are given. */
- for (i = 0; i < len; i++)
- {
- page = pdf_load_page(ctx, doc, pages[i]);
- examine_page(ctx, doc, page, &usage);
- fz_drop_page(ctx, (fz_page *)page);
- page = NULL;
- }
- }
- /* All our font usage data is in heaps. Sort the heaps. */
- for (i = 0; i < usage.len; i++)
- {
- font_usage_t *font = &usage.font[i];
- fz_int_heap_sort(ctx, &font->cids);
- fz_int_heap_uniq(ctx, &font->cids);
- fz_int_heap_sort(ctx, &font->gids);
- fz_int_heap_uniq(ctx, &font->gids);
- }
- /* Now, actually subset the fonts. */
- for (i = 0; i < usage.len; i++)
- {
- font_usage_t *font = &usage.font[i];
- pdf_obj *subtype = get_subtype(ctx, font);
- int symbolic = get_symbolic(ctx, font);
- if (symbolic < 0)
- continue;
- /* Not sure this can ever happen, and if it does this is not a great
- * way to handle it, but it'll do for now. */
- if (font->gids.len == 0 || font->cids.len == 0 || subtype == NULL)
- continue;
- #ifdef DEBUG_SUBSETTING
- fz_write_printf(ctx, fz_stddbg(ctx), "font->obj=%d subtype=", pdf_to_num(ctx, font->fontfile));
- pdf_debug_obj(ctx, subtype);
- fz_write_printf(ctx, fz_stddbg(ctx), "\n");
- pdf_debug_obj(ctx, pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor)));
- #endif
- /* If we hit a (non-SYSTEM) problem subsetting a font, give up for this font alone.
- * This will leave this font alone. */
- fz_try(ctx)
- {
- if (font->is_ttf)
- subset_ttf(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
- else if (font->is_cidfont)
- subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
- }
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- continue;
- }
- /* Any problems changing these parts of the fonts are really fatal though. */
- if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)) ||
- pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
- {
- adjust_simple_font(ctx, doc, font);
- }
- /* And prefix the name */
- for (j = 0; j < font->len; j++)
- prefix_font_name(ctx, doc, font->font[j], font->fontfile);
- }
- }
- fz_always(ctx)
- {
- fz_drop_page(ctx, (fz_page *)page);
- for (i = 0; i < usage.len; i++)
- {
- pdf_drop_obj(ctx, usage.font[i].fontfile);
- fz_free(ctx, usage.font[i].cids.heap);
- fz_free(ctx, usage.font[i].gids.heap);
- for (j = 0; j < usage.font[i].len; j++)
- pdf_drop_obj(ctx, usage.font[i].font[j]);
- fz_free(ctx, usage.font[i].font);
- }
- fz_free(ctx, usage.font);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- }
|