pdf-subset.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. /* Define the following for some debugging output. */
  25. #undef DEBUG_SUBSETTING
  26. typedef struct gstate
  27. {
  28. struct gstate *next;
  29. int current_font;
  30. pdf_font_desc *font;
  31. } gstate;
  32. typedef struct resources_stack
  33. {
  34. struct resources_stack *next;
  35. pdf_obj *res;
  36. } resources_stack;
  37. typedef struct
  38. {
  39. int num;
  40. int gen;
  41. int is_ttf;
  42. int is_cidfont;
  43. pdf_obj *fontfile;
  44. unsigned char digest[16];
  45. fz_int_heap gids;
  46. fz_int_heap cids;
  47. /* Pointers back to the top level fonts that refer to this. */
  48. int max;
  49. int len;
  50. pdf_obj **font;
  51. } font_usage_t;
  52. typedef struct
  53. {
  54. int max;
  55. int len;
  56. font_usage_t *font;
  57. } fonts_usage_t;
  58. typedef struct
  59. {
  60. pdf_processor super;
  61. resources_stack *rstack;
  62. fonts_usage_t *usage;
  63. gstate *gs;
  64. } pdf_font_analysis_processor;
  65. static void
  66. pop_gstate(fz_context *ctx, pdf_font_analysis_processor *p)
  67. {
  68. gstate *gs = p->gs;
  69. gstate *old;
  70. if (gs == NULL)
  71. return;
  72. old = gs->next;
  73. pdf_drop_font(ctx, gs->font);
  74. fz_free(ctx, gs);
  75. p->gs = old;
  76. }
  77. static void
  78. drop_processor(fz_context *ctx, pdf_processor *proc)
  79. {
  80. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  81. while (p->rstack)
  82. {
  83. resources_stack *stk = p->rstack;
  84. p->rstack = stk->next;
  85. pdf_drop_obj(ctx, stk->res);
  86. fz_free(ctx, stk);
  87. }
  88. while (p->gs)
  89. pop_gstate(ctx, p);
  90. }
  91. static void
  92. push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res)
  93. {
  94. pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
  95. resources_stack *stk = fz_malloc_struct(ctx, resources_stack);
  96. stk->next = p->rstack;
  97. p->rstack = stk;
  98. fz_try(ctx)
  99. {
  100. stk->res = pdf_keep_obj(ctx, res);
  101. }
  102. fz_catch(ctx)
  103. {
  104. pdf_drop_obj(ctx, stk->res);
  105. p->rstack = stk->next;
  106. fz_free(ctx, stk);
  107. fz_rethrow(ctx);
  108. }
  109. }
  110. static pdf_obj *
  111. pop_resources(fz_context *ctx, pdf_processor *proc)
  112. {
  113. pdf_font_analysis_processor *p = (pdf_font_analysis_processor *)proc;
  114. resources_stack *stk = p->rstack;
  115. pdf_obj *res = p->rstack->res;
  116. p->rstack = stk->next;
  117. fz_free(ctx, stk);
  118. return res;
  119. }
  120. static void
  121. font_analysis_Q(fz_context *ctx, pdf_processor *proc)
  122. {
  123. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  124. pop_gstate(ctx, p);
  125. }
  126. static void
  127. font_analysis_q(fz_context *ctx, pdf_processor *proc)
  128. {
  129. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  130. gstate *gs = p->gs;
  131. gstate *new_gs = fz_malloc_struct(ctx, gstate);
  132. p->gs = new_gs;
  133. if (gs)
  134. {
  135. *new_gs = *gs;
  136. new_gs->next = gs;
  137. }
  138. pdf_keep_font(ctx, new_gs->font);
  139. }
  140. static void
  141. font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size)
  142. {
  143. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  144. pdf_obj *dict = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name);
  145. pdf_obj *subtype, *fontdesc;
  146. pdf_obj *fontfile = NULL;
  147. pdf_obj *key;
  148. int num, gen, i;
  149. int is_cidfont = 0;
  150. int is_ttf = 0;
  151. unsigned char digest[16];
  152. p->gs->current_font = -1; /* unknown font! */
  153. if (dict == NULL)
  154. return;
  155. /* We can have multiple fonts that rely on the same underlying fontfile
  156. * object. Therefore, resolve down to that. */
  157. subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
  158. if (subtype == PDF_NAME(Type1) || subtype == PDF_NAME(MMType1))
  159. {
  160. // fontfile subtype should be Type1C for us to be able to subset it
  161. key = PDF_NAME(FontFile);
  162. fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
  163. fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile));
  164. is_cidfont = 0;
  165. is_ttf = 0;
  166. }
  167. else if (subtype == PDF_NAME(TrueType))
  168. {
  169. key = PDF_NAME(FontFile2);
  170. fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
  171. fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
  172. is_cidfont = 0;
  173. is_ttf = 1;
  174. }
  175. else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
  176. {
  177. dict = pdf_array_get(ctx, pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)), 0);
  178. subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
  179. fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
  180. if (subtype == PDF_NAME(CIDFontType0))
  181. {
  182. // fontfile subtype is either CIDFontType0C or OpenType
  183. key = PDF_NAME(FontFile3);
  184. fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile3));
  185. subtype = pdf_dict_get(ctx, fontfile, PDF_NAME(Subtype));
  186. if (subtype == PDF_NAME(CIDFontType0C))
  187. {
  188. is_cidfont = 1;
  189. is_ttf = 0;
  190. }
  191. else if (subtype == PDF_NAME(OpenType))
  192. {
  193. is_cidfont = 1;
  194. is_ttf = 1;
  195. }
  196. else
  197. {
  198. fontfile = NULL;
  199. }
  200. }
  201. else if (subtype == PDF_NAME(CIDFontType2))
  202. {
  203. key = PDF_NAME(FontFile2);
  204. fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2));
  205. is_cidfont = 1;
  206. is_ttf = 1;
  207. }
  208. }
  209. if (!fontfile)
  210. {
  211. #ifdef DEBUG_SUBSETTING
  212. fz_write_printf(ctx, fz_stddbg(ctx), "No embedded file found for font of subtype %s\n", pdf_to_name(ctx, subtype));
  213. #endif
  214. return;
  215. }
  216. num = pdf_to_num(ctx, fontfile);
  217. gen = pdf_to_gen(ctx, fontfile);
  218. for (i = 0; i < p->usage->len; i++)
  219. {
  220. if (p->usage->font[i].num == num &&
  221. p->usage->font[i].gen == gen)
  222. break;
  223. }
  224. fz_font_digest(ctx, font->font, digest);
  225. /* Check for duplicate fonts. (Fonts in the document that have
  226. * the font stream included multiple times as different objects).
  227. * This can happen with naive insertion routines. */
  228. if (i == p->usage->len)
  229. {
  230. for (i = 0; i < p->usage->len; i++)
  231. {
  232. if (memcmp(digest, p->usage->font[i].digest, 16) == 0)
  233. {
  234. pdf_dict_put(ctx, fontdesc, key, p->usage->font[i].fontfile);
  235. break;
  236. }
  237. }
  238. }
  239. pdf_drop_font(ctx, p->gs->font);
  240. p->gs->font = pdf_keep_font(ctx, font);
  241. p->gs->current_font = i;
  242. if (i < p->usage->len)
  243. {
  244. int j;
  245. for (j = 0; j < p->usage->font[i].len; j++)
  246. {
  247. if (pdf_objcmp(ctx, p->usage->font[i].font[j], dict) == 0)
  248. return;
  249. }
  250. if (p->usage->font[i].len == p->usage->font[i].max)
  251. {
  252. int newmax = p->usage->font[i].max * 2;
  253. p->usage->font[i].font = fz_realloc(ctx, p->usage->font[i].font, sizeof(*p->usage->font[i].font) * newmax);
  254. p->usage->font[i].max = newmax;
  255. }
  256. p->usage->font[i].font[j] = pdf_keep_obj(ctx, dict);
  257. p->usage->font[i].len++;
  258. return;
  259. }
  260. if (p->usage->max == p->usage->len)
  261. {
  262. int n = p->usage->max * 2;
  263. if (n == 0)
  264. n = 32;
  265. p->usage->font = (font_usage_t *)fz_realloc(ctx, p->usage->font, sizeof(*p->usage->font) * n);
  266. p->usage->max = n;
  267. }
  268. p->usage->font[i].is_ttf = is_ttf;
  269. p->usage->font[i].is_cidfont = is_cidfont;
  270. p->usage->font[i].fontfile = pdf_keep_obj(ctx, fontfile);
  271. p->usage->font[i].num = num;
  272. p->usage->font[i].gen = gen;
  273. p->usage->font[i].cids.len = 0;
  274. p->usage->font[i].cids.max = 0;
  275. p->usage->font[i].cids.heap = NULL;
  276. p->usage->font[i].gids.len = 0;
  277. p->usage->font[i].gids.max = 0;
  278. p->usage->font[i].gids.heap = NULL;
  279. p->usage->font[i].len = 0;
  280. p->usage->font[i].max = 0;
  281. p->usage->font[i].font = NULL;
  282. memcpy(p->usage->font[i].digest, digest, 16);
  283. p->usage->len++;
  284. p->usage->font[i].font = fz_malloc(ctx, sizeof(*p->usage->font[i].font) * 4);
  285. p->usage->font[i].len = 1;
  286. p->usage->font[i].max = 4;
  287. p->usage->font[i].font[0] = pdf_keep_obj(ctx, dict);
  288. }
  289. static void
  290. show_char(fz_context *ctx, font_usage_t *font, int cid, int gid)
  291. {
  292. fz_int_heap_insert(ctx, &font->cids, cid);
  293. fz_int_heap_insert(ctx, &font->gids, gid);
  294. }
  295. static void
  296. show_string(fz_context *ctx, pdf_font_analysis_processor *p, unsigned char *buf, size_t len)
  297. {
  298. gstate *gs = p->gs;
  299. pdf_font_desc *fontdesc = gs->font;
  300. size_t pos = 0;
  301. font_usage_t *font;
  302. // Not an embedded font!
  303. if (gs->current_font < 0 || fontdesc == NULL)
  304. return;
  305. font = &p->usage->font[gs->current_font];
  306. while (pos < len)
  307. {
  308. unsigned int cpt;
  309. int inc = pdf_decode_cmap(fontdesc->encoding, &buf[pos], &buf[len], &cpt);
  310. int cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
  311. if (cid >= 0)
  312. {
  313. int gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
  314. show_char(ctx, font, cid, gid);
  315. }
  316. pos += inc;
  317. }
  318. }
  319. static void
  320. show_text(fz_context *ctx, pdf_font_analysis_processor *p, pdf_obj *text)
  321. {
  322. gstate *gs = p->gs;
  323. pdf_font_desc *fontdesc;
  324. int i, n;
  325. if (!gs)
  326. return;
  327. fontdesc = gs->font;
  328. if (!fontdesc)
  329. return;
  330. if (pdf_is_string(ctx, text))
  331. {
  332. show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, text), pdf_to_str_len(ctx, text));
  333. }
  334. else if (pdf_is_array(ctx, text))
  335. {
  336. n = pdf_array_len(ctx, text);
  337. for (i = 0; i < n; i++)
  338. {
  339. pdf_obj *item = pdf_array_get(ctx, text, i);
  340. if (pdf_is_string(ctx, item))
  341. {
  342. show_string(ctx, p, (unsigned char *)pdf_to_str_buf(ctx, item), pdf_to_str_len(ctx, item));
  343. }
  344. }
  345. }
  346. }
  347. static void
  348. font_analysis_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array)
  349. {
  350. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  351. show_text(ctx, p, array);
  352. }
  353. static void
  354. font_analysis_Tj(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
  355. {
  356. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  357. show_string(ctx, p, (unsigned char *)str, len);
  358. }
  359. static void
  360. font_analysis_squote(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
  361. {
  362. /* Note, we convert all T' operators to (maybe) a T* and a Tj */
  363. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  364. show_string(ctx, p, (unsigned char *)str, len);
  365. }
  366. static void
  367. font_analysis_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len)
  368. {
  369. /* Note, we convert all T" operators to (maybe) a T*,
  370. * (maybe) Tc, (maybe) Tw and a Tj. */
  371. pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc;
  372. show_string(ctx, p, (unsigned char*)str, len);
  373. }
  374. static void
  375. font_analysis_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj)
  376. {
  377. pdf_font_analysis_processor *pr = (pdf_font_analysis_processor *)proc;
  378. pdf_document *doc = pdf_get_bound_document(ctx, xobj);
  379. pdf_obj *resources = pdf_xobject_resources(ctx, xobj);
  380. if (!resources)
  381. resources = pr->rstack->res;
  382. pdf_process_contents(ctx, (pdf_processor*)pr, doc, resources, xobj, NULL, NULL);
  383. }
  384. static pdf_processor *
  385. pdf_new_font_analysis_processor(fz_context *ctx, fonts_usage_t *usage)
  386. {
  387. pdf_font_analysis_processor *proc = (pdf_font_analysis_processor *)pdf_new_processor(ctx, sizeof *proc);
  388. proc->super.drop_processor = drop_processor;
  389. proc->super.push_resources = push_resources;
  390. proc->super.pop_resources = pop_resources;
  391. proc->super.op_Do_form = font_analysis_Do_form;
  392. proc->super.op_Tf = font_analysis_Tf;
  393. proc->super.op_Tj = font_analysis_Tj;
  394. proc->super.op_TJ = font_analysis_TJ;
  395. proc->super.op_squote = font_analysis_squote;
  396. proc->super.op_dquote = font_analysis_dquote;
  397. proc->super.op_q = font_analysis_q;
  398. proc->super.op_Q = font_analysis_Q;
  399. fz_try(ctx)
  400. proc->gs = fz_malloc_struct(ctx, gstate);
  401. fz_catch(ctx)
  402. {
  403. fz_free(ctx, proc);
  404. fz_rethrow(ctx);
  405. }
  406. proc->gs->current_font = -1; // no font set yet
  407. proc->usage = usage;
  408. return &proc->super;
  409. }
  410. static void
  411. examine_page(fz_context *ctx, pdf_document *doc, pdf_page *page, fonts_usage_t *usage)
  412. {
  413. pdf_processor *proc = pdf_new_font_analysis_processor(ctx, usage);
  414. pdf_obj *contents = pdf_page_contents(ctx, page);
  415. pdf_obj *resources = pdf_page_resources(ctx, page);
  416. pdf_annot *annot, *widget;
  417. fz_try(ctx)
  418. {
  419. pdf_process_contents(ctx, proc, doc, resources, contents, NULL, NULL);
  420. pdf_processor_push_resources(ctx, proc, resources);
  421. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  422. pdf_process_annot(ctx, proc, annot, NULL);
  423. for (widget = pdf_first_widget(ctx, page); widget; widget = pdf_next_widget(ctx, widget))
  424. pdf_process_annot(ctx, proc, widget, NULL);
  425. pdf_close_processor(ctx, proc);
  426. }
  427. fz_always(ctx)
  428. {
  429. pdf_drop_processor(ctx, proc);
  430. }
  431. fz_catch(ctx)
  432. fz_rethrow(ctx);
  433. }
  434. static void
  435. subset_ttf(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
  436. {
  437. fz_buffer *buf = pdf_load_stream(ctx, fontfile);
  438. fz_buffer *newbuf = NULL;
  439. if (buf->len == 0)
  440. {
  441. fz_drop_buffer(ctx, buf);
  442. return;
  443. }
  444. fz_var(newbuf);
  445. fz_try(ctx)
  446. {
  447. newbuf = fz_subset_ttf_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
  448. pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
  449. pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
  450. }
  451. fz_always(ctx)
  452. {
  453. fz_drop_buffer(ctx, newbuf);
  454. fz_drop_buffer(ctx, buf);
  455. }
  456. fz_catch(ctx)
  457. {
  458. fz_rethrow(ctx);
  459. }
  460. }
  461. static void
  462. subset_cff(fz_context *ctx, pdf_document *doc, font_usage_t *font, pdf_obj *fontfile, int symbolic, int cidfont)
  463. {
  464. fz_buffer *buf = pdf_load_stream(ctx, fontfile);
  465. fz_buffer *newbuf = NULL;
  466. if (buf->len == 0)
  467. {
  468. fz_drop_buffer(ctx, buf);
  469. return;
  470. }
  471. fz_var(newbuf);
  472. fz_try(ctx)
  473. {
  474. newbuf = fz_subset_cff_for_gids(ctx, buf, font->gids.heap, font->gids.len, symbolic, cidfont);
  475. pdf_update_stream(ctx, doc, fontfile, newbuf, 0);
  476. pdf_dict_put_int(ctx, fontfile, PDF_NAME(Length1), newbuf->len);
  477. }
  478. fz_always(ctx)
  479. {
  480. fz_drop_buffer(ctx, newbuf);
  481. fz_drop_buffer(ctx, buf);
  482. }
  483. fz_catch(ctx)
  484. {
  485. fz_rethrow(ctx);
  486. }
  487. }
  488. static void
  489. do_adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font, int n)
  490. {
  491. pdf_obj *obj = font->font[n];
  492. int old_firstchar = pdf_dict_get_int(ctx, obj, PDF_NAME(FirstChar));
  493. pdf_obj *old_widths = pdf_dict_get(ctx, obj, PDF_NAME(Widths));
  494. int new_firstchar = font->cids.heap[0];
  495. int new_lastchar = font->cids.heap[font->cids.len-1];
  496. pdf_obj *widths;
  497. int i;
  498. pdf_dict_put_int(ctx, obj, PDF_NAME(FirstChar), new_firstchar);
  499. pdf_dict_put_int(ctx, obj, PDF_NAME(LastChar), new_lastchar);
  500. if (old_widths)
  501. {
  502. int j = 0;
  503. widths = pdf_new_array(ctx, doc, new_lastchar - new_firstchar + 1);
  504. for (i = new_firstchar; i <= new_lastchar; i++)
  505. {
  506. if (font->cids.heap[j] == i)
  507. {
  508. pdf_array_push_int(ctx, widths, pdf_array_get_int(ctx, old_widths, i - old_firstchar));
  509. j++;
  510. }
  511. else
  512. pdf_array_push_int(ctx, widths, 0);
  513. }
  514. pdf_dict_put_drop(ctx, obj, PDF_NAME(Widths), widths);
  515. }
  516. }
  517. static void
  518. adjust_simple_font(fz_context *ctx, pdf_document *doc, font_usage_t *font)
  519. {
  520. int i;
  521. for (i = 0; i < font->len; i++)
  522. do_adjust_simple_font(ctx, doc, font, i);
  523. }
  524. static pdf_obj *
  525. get_fontdesc(fz_context *ctx, pdf_obj *font)
  526. {
  527. pdf_obj *fontdesc = pdf_dict_get(ctx, font, PDF_NAME(FontDescriptor));
  528. if (fontdesc)
  529. return fontdesc;
  530. return pdf_dict_get(ctx, pdf_array_get(ctx, pdf_dict_get(ctx, font, PDF_NAME(DescendantFonts)), 0), PDF_NAME(FontDescriptor));
  531. }
  532. static void
  533. prefix_font_name(fz_context *ctx, pdf_document *doc, pdf_obj *font, pdf_obj *file)
  534. {
  535. fz_buffer *buf;
  536. uint32_t digest[4], v;
  537. pdf_obj *fontdesc = get_fontdesc(ctx, font);
  538. const char *name = pdf_dict_get_name(ctx, fontdesc, PDF_NAME(FontName));
  539. char new_name[256];
  540. size_t len;
  541. /* If there is no name, just exit. Possibly should throw here. */
  542. if (name == NULL)
  543. return;
  544. len = strlen(name);
  545. if (len > 6 && name[6] == '+')
  546. return; /* Already a subset name */
  547. buf = pdf_load_stream(ctx, file);
  548. fz_md5_buffer(ctx, buf, (uint8_t *)digest);
  549. fz_drop_buffer(ctx, buf);
  550. v = digest[0] ^ digest[1] ^ digest[2] ^ digest[3];
  551. new_name[0] = 'A' + (v % 26);
  552. v /= 26;
  553. new_name[1] = 'A' + (v % 26);
  554. v /= 26;
  555. new_name[2] = 'A' + (v % 26);
  556. v /= 26;
  557. new_name[3] = 'A' + (v % 26);
  558. v /= 26;
  559. new_name[4] = 'A' + (v % 26);
  560. v /= 26;
  561. new_name[5] = 'A' + (v % 26);
  562. new_name[6] = '+';
  563. memcpy(new_name+7, name, len > sizeof(new_name)-8 ? sizeof(new_name)-8 : len+1);
  564. new_name[sizeof(new_name)-1] = 0;
  565. pdf_dict_put_name(ctx, fontdesc, PDF_NAME(FontName), new_name);
  566. }
  567. static int
  568. get_symbolic(fz_context *ctx, font_usage_t *font)
  569. {
  570. int i, flags, symbolic, symbolic2;
  571. pdf_obj *fontdesc;
  572. if (!font || font->len == 0)
  573. return 0;
  574. fontdesc = pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor));
  575. flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
  576. symbolic = (!!(flags & 4)) | ((flags & 32) == 0);
  577. for (i = 1; i < font->len; i++)
  578. {
  579. fontdesc = pdf_dict_get(ctx, font->font[i], PDF_NAME(FontDescriptor));
  580. flags = pdf_dict_get_int(ctx, fontdesc, PDF_NAME(Flags));
  581. symbolic2 = (!!(flags & 4)) | ((flags & 32) == 0);
  582. if (symbolic != symbolic2)
  583. {
  584. fz_warn(ctx, "Font cannot be both symbolic and non-symbolic. Skipping subsetting.");
  585. return -1;
  586. }
  587. }
  588. return symbolic;
  589. }
  590. static pdf_obj *get_subtype(fz_context *ctx, font_usage_t *font)
  591. {
  592. /* If we can get the subtype from the fontfile, great. Use that. */
  593. pdf_obj *subtype = pdf_dict_get(ctx, font->fontfile, PDF_NAME(Subtype));
  594. int i;
  595. if (subtype != NULL)
  596. return subtype;
  597. /* Otherwise we'll have to get it from the font objects, and they'd
  598. * all better agree. */
  599. if (font->len == 0)
  600. return NULL;
  601. subtype = pdf_dict_get(ctx, font->font[0], PDF_NAME(Subtype));
  602. for (i = 1; i < font->len; i++)
  603. {
  604. pdf_obj *subtype2 = pdf_dict_get(ctx, font->font[i], PDF_NAME(Subtype));
  605. if (pdf_objcmp(ctx, subtype, subtype2))
  606. return NULL;
  607. }
  608. return subtype;
  609. }
  610. void
  611. pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int len, const int *pages)
  612. {
  613. int i, j;
  614. pdf_page *page = NULL;
  615. fonts_usage_t usage = { 0 };
  616. fz_var(page);
  617. fz_try(ctx)
  618. {
  619. if (len == 0)
  620. {
  621. /* Process every page. */
  622. len = pdf_count_pages(ctx, doc);
  623. for (i = 0; i < len; i++)
  624. {
  625. page = pdf_load_page(ctx, doc, i);
  626. examine_page(ctx, doc, page, &usage);
  627. fz_drop_page(ctx, (fz_page *)page);
  628. page = NULL;
  629. }
  630. }
  631. else
  632. {
  633. /* Process just the pages we are given. */
  634. for (i = 0; i < len; i++)
  635. {
  636. page = pdf_load_page(ctx, doc, pages[i]);
  637. examine_page(ctx, doc, page, &usage);
  638. fz_drop_page(ctx, (fz_page *)page);
  639. page = NULL;
  640. }
  641. }
  642. /* All our font usage data is in heaps. Sort the heaps. */
  643. for (i = 0; i < usage.len; i++)
  644. {
  645. font_usage_t *font = &usage.font[i];
  646. fz_int_heap_sort(ctx, &font->cids);
  647. fz_int_heap_uniq(ctx, &font->cids);
  648. fz_int_heap_sort(ctx, &font->gids);
  649. fz_int_heap_uniq(ctx, &font->gids);
  650. }
  651. /* Now, actually subset the fonts. */
  652. for (i = 0; i < usage.len; i++)
  653. {
  654. font_usage_t *font = &usage.font[i];
  655. pdf_obj *subtype = get_subtype(ctx, font);
  656. int symbolic = get_symbolic(ctx, font);
  657. if (symbolic < 0)
  658. continue;
  659. /* Not sure this can ever happen, and if it does this is not a great
  660. * way to handle it, but it'll do for now. */
  661. if (font->gids.len == 0 || font->cids.len == 0 || subtype == NULL)
  662. continue;
  663. #ifdef DEBUG_SUBSETTING
  664. fz_write_printf(ctx, fz_stddbg(ctx), "font->obj=%d subtype=", pdf_to_num(ctx, font->fontfile));
  665. pdf_debug_obj(ctx, subtype);
  666. fz_write_printf(ctx, fz_stddbg(ctx), "\n");
  667. pdf_debug_obj(ctx, pdf_dict_get(ctx, font->font[0], PDF_NAME(FontDescriptor)));
  668. #endif
  669. /* If we hit a (non-SYSTEM) problem subsetting a font, give up for this font alone.
  670. * This will leave this font alone. */
  671. fz_try(ctx)
  672. {
  673. if (font->is_ttf)
  674. subset_ttf(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
  675. else if (font->is_cidfont)
  676. subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont);
  677. }
  678. fz_catch(ctx)
  679. {
  680. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  681. fz_report_error(ctx);
  682. continue;
  683. }
  684. /* Any problems changing these parts of the fonts are really fatal though. */
  685. if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)) ||
  686. pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
  687. {
  688. adjust_simple_font(ctx, doc, font);
  689. }
  690. /* And prefix the name */
  691. for (j = 0; j < font->len; j++)
  692. prefix_font_name(ctx, doc, font->font[j], font->fontfile);
  693. }
  694. }
  695. fz_always(ctx)
  696. {
  697. fz_drop_page(ctx, (fz_page *)page);
  698. for (i = 0; i < usage.len; i++)
  699. {
  700. pdf_drop_obj(ctx, usage.font[i].fontfile);
  701. fz_free(ctx, usage.font[i].cids.heap);
  702. fz_free(ctx, usage.font[i].gids.heap);
  703. for (j = 0; j < usage.font[i].len; j++)
  704. pdf_drop_obj(ctx, usage.font[i].font[j]);
  705. fz_free(ctx, usage.font[i].font);
  706. }
  707. fz_free(ctx, usage.font);
  708. }
  709. fz_catch(ctx)
  710. fz_rethrow(ctx);
  711. }