pdf-write.c 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "pdf-annot-imp.h"
  24. #include <zlib.h>
  25. #include <assert.h>
  26. #include <limits.h>
  27. #include <string.h>
  28. #include <stdio.h> /* for debug printing */
  29. /* #define DEBUG_HEAP_SORT */
  30. /* #define DEBUG_WRITING */
  31. /* #define DEBUG_MARK_AND_SWEEP */
  32. #define SIG_EXTRAS_SIZE (1024)
  33. #define SLASH_BYTE_RANGE ("/ByteRange")
  34. #define SLASH_CONTENTS ("/Contents")
  35. #define SLASH_FILTER ("/Filter")
  36. typedef struct
  37. {
  38. fz_output *out;
  39. int do_incremental;
  40. int do_tight;
  41. int do_ascii;
  42. int do_expand;
  43. int do_compress;
  44. int do_compress_images;
  45. int do_compress_fonts;
  46. int do_garbage;
  47. int do_clean;
  48. int do_encrypt;
  49. int dont_regenerate_id;
  50. int do_snapshot;
  51. int do_preserve_metadata;
  52. int do_use_objstms;
  53. int compression_effort;
  54. int list_len;
  55. int *use_list;
  56. int64_t *ofs_list;
  57. int *gen_list;
  58. int *renumber_map;
  59. pdf_object_labels *labels;
  60. int num_labels;
  61. char *obj_labels[100];
  62. int bias; /* when saving incrementally to a file with garbage before the version marker */
  63. int crypt_object_number;
  64. char opwd_utf8[128];
  65. char upwd_utf8[128];
  66. int permissions;
  67. pdf_crypt *crypt;
  68. pdf_obj *crypt_obj;
  69. pdf_obj *metadata;
  70. } pdf_write_state;
  71. static void
  72. expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
  73. {
  74. int i;
  75. /* objects are numbered 0..num and maybe two additional objects for linearization */
  76. num += 3;
  77. if (num <= opts->list_len)
  78. return;
  79. opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
  80. opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
  81. opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
  82. opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
  83. for (i = opts->list_len; i < num; i++)
  84. {
  85. opts->use_list[i] = 0;
  86. opts->ofs_list[i] = 0;
  87. opts->gen_list[i] = 0;
  88. opts->renumber_map[i] = i;
  89. }
  90. opts->list_len = num;
  91. }
  92. /*
  93. * Garbage collect objects not reachable from the trailer.
  94. */
  95. static void bake_stream_length(fz_context *ctx, pdf_document *doc, int num)
  96. {
  97. if (pdf_obj_num_is_stream(ctx, doc, num))
  98. {
  99. pdf_obj *len;
  100. pdf_obj *obj = NULL;
  101. fz_var(obj);
  102. fz_try(ctx)
  103. {
  104. obj = pdf_load_object(ctx, doc, num);
  105. len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
  106. if (pdf_is_indirect(ctx, len))
  107. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_to_int(ctx, len));
  108. }
  109. fz_always(ctx)
  110. pdf_drop_obj(ctx, obj);
  111. fz_catch(ctx)
  112. fz_rethrow(ctx);
  113. }
  114. }
  115. /* Mark a reference. If it's been marked already, return NULL (as no further
  116. * processing is required). If it's not, return the resolved object so
  117. * that we can continue our recursive marking. If it's a duff reference
  118. * return the fact so that we can remove the reference at source.
  119. */
  120. static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
  121. {
  122. int num = pdf_to_num(ctx, obj);
  123. int xref_len = pdf_xref_len(ctx, doc);
  124. if (num <= 0 || num >= xref_len)
  125. {
  126. *duff = 1;
  127. return NULL;
  128. }
  129. expand_lists(ctx, opts, xref_len);
  130. *duff = 0;
  131. if (opts->use_list[num])
  132. return NULL;
  133. opts->use_list[num] = 1;
  134. obj = pdf_resolve_indirect(ctx, obj);
  135. if (obj == NULL || pdf_is_null(ctx, obj))
  136. {
  137. *duff = 1;
  138. opts->use_list[num] = 0;
  139. }
  140. return obj;
  141. }
  142. #ifdef DEBUG_MARK_AND_SWEEP
  143. static int depth = 0;
  144. static
  145. void indent()
  146. {
  147. while (depth > 0)
  148. {
  149. int d = depth;
  150. if (d > 16)
  151. d = 16;
  152. printf("%s", &" "[16-d]);
  153. depth -= d;
  154. }
  155. }
  156. #define DEBUGGING_MARKING(A) do { A; } while (0)
  157. #else
  158. #define DEBUGGING_MARKING(A) do { } while (0)
  159. #endif
  160. /* Recursively mark an object. If any references found are duff, then
  161. * replace them with nulls. */
  162. static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
  163. {
  164. int i;
  165. DEBUGGING_MARKING(depth++);
  166. while (pdf_is_indirect(ctx, obj))
  167. {
  168. int duff;
  169. DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
  170. obj = markref(ctx, doc, opts, obj, &duff);
  171. if (duff)
  172. {
  173. DEBUGGING_MARKING(depth--);
  174. return 1;
  175. }
  176. }
  177. if (pdf_is_dict(ctx, obj))
  178. {
  179. int n = pdf_dict_len(ctx, obj);
  180. for (i = 0; i < n; i++)
  181. {
  182. DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
  183. if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
  184. pdf_dict_put_val_null(ctx, obj, i);
  185. }
  186. }
  187. else if (pdf_is_array(ctx, obj))
  188. {
  189. int n = pdf_array_len(ctx, obj);
  190. for (i = 0; i < n; i++)
  191. {
  192. DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
  193. if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
  194. pdf_array_put(ctx, obj, i, PDF_NULL);
  195. }
  196. }
  197. DEBUGGING_MARKING(depth--);
  198. return 0;
  199. }
  200. /*
  201. * Scan for and remove duplicate objects (slow)
  202. */
  203. static int removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  204. {
  205. int num, other;
  206. int xref_len = pdf_xref_len(ctx, doc);
  207. int changed = 0;
  208. expand_lists(ctx, opts, xref_len);
  209. for (num = 1; num < xref_len; num++)
  210. {
  211. /* Only compare an object to objects preceding it */
  212. for (other = 1; other < num; other++)
  213. {
  214. pdf_obj *a, *b;
  215. int newnum;
  216. if (num == other || num >= opts->list_len || !opts->use_list[num] || !opts->use_list[other])
  217. continue;
  218. /* TODO: resolve indirect references to see if we can omit them */
  219. a = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
  220. b = pdf_get_xref_entry_no_null(ctx, doc, other)->obj;
  221. if (opts->do_garbage >= 4)
  222. {
  223. if (pdf_objcmp_deep(ctx, a, b))
  224. continue;
  225. }
  226. else
  227. {
  228. if (pdf_objcmp(ctx, a, b))
  229. continue;
  230. }
  231. /* Keep the lowest numbered object */
  232. newnum = fz_mini(num, other);
  233. opts->renumber_map[num] = newnum;
  234. opts->renumber_map[other] = newnum;
  235. opts->use_list[fz_maxi(num, other)] = 0;
  236. /* One duplicate was found, do not look for another */
  237. changed = 1;
  238. break;
  239. }
  240. }
  241. return changed;
  242. }
  243. /*
  244. * Renumber objects sequentially so the xref is more compact
  245. *
  246. * This code assumes that any opts->renumber_map[n] <= n for all n.
  247. */
  248. static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  249. {
  250. int num, newnum;
  251. int xref_len = pdf_xref_len(ctx, doc);
  252. /*
  253. * Update renumber_map in-place, clustering all used
  254. * objects together at low object ids. Objects that
  255. * already should be renumbered will have their new
  256. * object ids be updated to reflect the compaction.
  257. */
  258. if (xref_len > opts->list_len)
  259. expand_lists(ctx, opts, xref_len-1);
  260. newnum = 1;
  261. for (num = 1; num < xref_len; num++)
  262. {
  263. /* If it's not used, map it to zero */
  264. if (!opts->use_list[opts->renumber_map[num]])
  265. {
  266. opts->renumber_map[num] = 0;
  267. }
  268. /* If it's not moved, compact it. */
  269. else if (opts->renumber_map[num] == num)
  270. {
  271. opts->renumber_map[num] = newnum++;
  272. }
  273. /* Otherwise it's used, and moved. We know that it must have
  274. * moved down, so the place it's moved to will be in the right
  275. * place already. */
  276. else
  277. {
  278. opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
  279. }
  280. }
  281. }
  282. /*
  283. * Update indirect objects according to renumbering established when
  284. * removing duplicate objects and compacting the xref.
  285. */
  286. static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
  287. {
  288. int i;
  289. int xref_len = pdf_xref_len(ctx, doc);
  290. if (pdf_is_dict(ctx, obj))
  291. {
  292. int n = pdf_dict_len(ctx, obj);
  293. for (i = 0; i < n; i++)
  294. {
  295. pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
  296. pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
  297. if (pdf_is_indirect(ctx, val))
  298. {
  299. int o = pdf_to_num(ctx, val);
  300. if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
  301. val = PDF_NULL;
  302. else
  303. val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
  304. pdf_dict_put_drop(ctx, obj, key, val);
  305. }
  306. else
  307. {
  308. renumberobj(ctx, doc, opts, val);
  309. }
  310. }
  311. }
  312. else if (pdf_is_array(ctx, obj))
  313. {
  314. int n = pdf_array_len(ctx, obj);
  315. for (i = 0; i < n; i++)
  316. {
  317. pdf_obj *val = pdf_array_get(ctx, obj, i);
  318. if (pdf_is_indirect(ctx, val))
  319. {
  320. int o = pdf_to_num(ctx, val);
  321. if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
  322. val = PDF_NULL;
  323. else
  324. val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
  325. pdf_array_put_drop(ctx, obj, i, val);
  326. }
  327. else
  328. {
  329. renumberobj(ctx, doc, opts, val);
  330. }
  331. }
  332. }
  333. }
  334. static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  335. {
  336. pdf_xref_entry *newxref = NULL;
  337. int newlen;
  338. int num;
  339. int *new_use_list;
  340. int xref_len = pdf_xref_len(ctx, doc);
  341. expand_lists(ctx, opts, xref_len);
  342. new_use_list = fz_calloc(ctx, opts->list_len, sizeof(int));
  343. fz_var(newxref);
  344. fz_try(ctx)
  345. {
  346. /* Apply renumber map to indirect references in all objects in xref */
  347. renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
  348. for (num = 0; num < xref_len; num++)
  349. {
  350. pdf_obj *obj;
  351. int to = opts->renumber_map[num];
  352. /* If object is going to be dropped, don't bother renumbering */
  353. if (to == 0)
  354. continue;
  355. obj = pdf_get_xref_entry_no_null(ctx, doc, num)->obj;
  356. if (pdf_is_indirect(ctx, obj))
  357. {
  358. obj = pdf_new_indirect(ctx, doc, to, 0);
  359. fz_try(ctx)
  360. pdf_update_object(ctx, doc, num, obj);
  361. fz_always(ctx)
  362. pdf_drop_obj(ctx, obj);
  363. fz_catch(ctx)
  364. fz_rethrow(ctx);
  365. }
  366. else
  367. {
  368. renumberobj(ctx, doc, opts, obj);
  369. }
  370. }
  371. /* Create new table for the reordered, compacted xref */
  372. newxref = Memento_label(fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry), "pdf_xref_entries");
  373. newxref[0] = *pdf_get_xref_entry_no_null(ctx, doc, 0);
  374. /* Move used objects into the new compacted xref */
  375. newlen = 0;
  376. for (num = 1; num < xref_len; num++)
  377. {
  378. if (opts->use_list[num])
  379. {
  380. pdf_xref_entry *e;
  381. if (newlen < opts->renumber_map[num])
  382. newlen = opts->renumber_map[num];
  383. e = pdf_get_xref_entry_no_null(ctx, doc, num);
  384. newxref[opts->renumber_map[num]] = *e;
  385. if (e->obj)
  386. pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
  387. e->obj = NULL;
  388. e->stm_buf = NULL;
  389. new_use_list[opts->renumber_map[num]] = opts->use_list[num];
  390. }
  391. else
  392. {
  393. pdf_xref_entry *e = pdf_get_xref_entry_no_null(ctx, doc, num);
  394. pdf_drop_obj(ctx, e->obj);
  395. e->obj = NULL;
  396. fz_drop_buffer(ctx, e->stm_buf);
  397. e->stm_buf = NULL;
  398. }
  399. }
  400. pdf_replace_xref(ctx, doc, newxref, newlen + 1);
  401. newxref = NULL;
  402. }
  403. fz_catch(ctx)
  404. {
  405. fz_free(ctx, newxref);
  406. fz_free(ctx, new_use_list);
  407. fz_rethrow(ctx);
  408. }
  409. fz_free(ctx, opts->use_list);
  410. opts->use_list = new_use_list;
  411. for (num = 1; num < xref_len; num++)
  412. {
  413. opts->renumber_map[num] = num;
  414. }
  415. }
  416. /*
  417. * Make sure we have loaded objects from object streams.
  418. */
  419. static void preloadobjstms(fz_context *ctx, pdf_document *doc)
  420. {
  421. pdf_obj *obj;
  422. int num;
  423. pdf_xref_entry *x = NULL;
  424. int load = 1;
  425. /* If we have attempted a repair, then everything will have been
  426. * loaded already. */
  427. if (doc->repair_attempted)
  428. {
  429. /* Bug 707112: But we do need to mark all our 'o' objects as being something else. */
  430. load = 0;
  431. }
  432. fz_var(num);
  433. fz_var(x);
  434. /* xref_len may change due to repair, so check it every iteration */
  435. for (num = 0; num < pdf_xref_len(ctx, doc); num++)
  436. {
  437. fz_try(ctx)
  438. {
  439. for (; num < pdf_xref_len(ctx, doc); num++)
  440. {
  441. x = pdf_get_xref_entry_no_null(ctx, doc, num);
  442. if (x->type == 'o')
  443. {
  444. if (load)
  445. {
  446. obj = pdf_load_object(ctx, doc, num);
  447. pdf_drop_obj(ctx, obj);
  448. }
  449. /* The object is no longer an objstm one. It's a regular object
  450. * held in memory. Previously we used gen to hold the index of
  451. * the obj in the objstm, so reset this to 0. */
  452. x->type = 'n';
  453. x->gen = 0;
  454. }
  455. x = NULL;
  456. }
  457. }
  458. fz_catch(ctx)
  459. {
  460. /* We need to clear the type even in the event of an error, lest we
  461. * hit an assert later. Bug 707110. */
  462. if (x && x->type == 'o')
  463. {
  464. x->type = 'f';
  465. x->gen = 0;
  466. }
  467. /* Ignore the error, so we can carry on trying to load. */
  468. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  469. fz_report_error(ctx);
  470. }
  471. }
  472. }
  473. /*
  474. * Save streams and objects to the output
  475. */
  476. static int is_bitmap_stream(fz_context *ctx, pdf_obj *obj, size_t len, int *w, int *h)
  477. {
  478. pdf_obj *bpc;
  479. pdf_obj *cs;
  480. int stride;
  481. if (pdf_dict_get(ctx, obj, PDF_NAME(Subtype)) != PDF_NAME(Image))
  482. return 0;
  483. *w = pdf_dict_get_int(ctx, obj, PDF_NAME(Width));
  484. *h = pdf_dict_get_int(ctx, obj, PDF_NAME(Height));
  485. stride = (*w + 7) >> 3;
  486. if ((size_t)stride * (*h) != len)
  487. return 0;
  488. if (pdf_dict_get_bool(ctx, obj, PDF_NAME(ImageMask)))
  489. {
  490. return 1;
  491. }
  492. else
  493. {
  494. bpc = pdf_dict_get(ctx, obj, PDF_NAME(BitsPerComponent));
  495. if (!pdf_is_int(ctx, bpc))
  496. return 0;
  497. if (pdf_to_int(ctx, bpc) != 1)
  498. return 0;
  499. cs = pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace));
  500. if (!pdf_name_eq(ctx, cs, PDF_NAME(DeviceGray)))
  501. return 0;
  502. return 1;
  503. }
  504. }
  505. static inline int isbinary(int c)
  506. {
  507. if (c == '\n' || c == '\r' || c == '\t')
  508. return 0;
  509. return c < 32 || c > 127;
  510. }
  511. static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
  512. {
  513. size_t i;
  514. for (i = 0; i < len; i++)
  515. if (isbinary(data[i]))
  516. return 1;
  517. return 0;
  518. }
  519. static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
  520. {
  521. static const char hex[17] = "0123456789abcdef";
  522. int x = 0;
  523. size_t len = n * 2 + (n / 32) + 1;
  524. unsigned char *data = Memento_label(fz_malloc(ctx, len), "hexbuf");
  525. fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
  526. while (n--)
  527. {
  528. *data++ = hex[*p >> 4];
  529. *data++ = hex[*p & 15];
  530. if (++x == 32)
  531. {
  532. *data++ = '\n';
  533. x = 0;
  534. }
  535. p++;
  536. }
  537. *data++ = '>';
  538. return buf;
  539. }
  540. static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
  541. {
  542. pdf_obj *f, *dp, *newf, *newdp;
  543. newf = newdp = NULL;
  544. f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
  545. dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
  546. fz_var(newf);
  547. fz_var(newdp);
  548. fz_try(ctx)
  549. {
  550. if (pdf_is_name(ctx, f))
  551. {
  552. newf = pdf_new_array(ctx, doc, 2);
  553. pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
  554. pdf_array_push(ctx, newf, f);
  555. f = newf;
  556. if (pdf_is_dict(ctx, dp))
  557. {
  558. newdp = pdf_new_array(ctx, doc, 2);
  559. pdf_array_push(ctx, newdp, PDF_NULL);
  560. pdf_array_push(ctx, newdp, dp);
  561. dp = newdp;
  562. }
  563. }
  564. else if (pdf_is_array(ctx, f))
  565. {
  566. pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
  567. if (pdf_is_array(ctx, dp))
  568. pdf_array_insert(ctx, dp, PDF_NULL, 0);
  569. }
  570. else
  571. f = PDF_NAME(ASCIIHexDecode);
  572. pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
  573. if (dp)
  574. pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
  575. }
  576. fz_always(ctx)
  577. {
  578. pdf_drop_obj(ctx, newf);
  579. pdf_drop_obj(ctx, newdp);
  580. }
  581. fz_catch(ctx)
  582. fz_rethrow(ctx);
  583. }
  584. static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n, int effort)
  585. {
  586. fz_buffer *buf;
  587. uLongf csize;
  588. int t;
  589. uLong longN = (uLong)n;
  590. unsigned char *data;
  591. size_t cap;
  592. int mode;
  593. if (n != (size_t)longN)
  594. fz_throw(ctx, FZ_ERROR_LIMIT, "Buffer too large to deflate");
  595. cap = compressBound(longN);
  596. data = Memento_label(fz_malloc(ctx, cap), "pdf_write_deflate");
  597. buf = fz_new_buffer_from_data(ctx, data, cap);
  598. csize = (uLongf)cap;
  599. if (effort == 0)
  600. mode = Z_DEFAULT_COMPRESSION;
  601. else
  602. mode = effort * Z_BEST_COMPRESSION / 100;
  603. t = compress2(data, &csize, p, longN, mode);
  604. if (t != Z_OK)
  605. {
  606. fz_drop_buffer(ctx, buf);
  607. fz_throw(ctx, FZ_ERROR_LIBRARY, "cannot deflate buffer");
  608. }
  609. fz_try(ctx)
  610. fz_resize_buffer(ctx, buf, csize);
  611. fz_catch(ctx)
  612. {
  613. fz_drop_buffer(ctx, buf);
  614. fz_rethrow(ctx);
  615. }
  616. return buf;
  617. }
  618. static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
  619. {
  620. pdf_obj *f, *dp;
  621. int is_hex = 0;
  622. f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
  623. dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
  624. if (pdf_is_array(ctx, f))
  625. {
  626. /* Remove ASCIIHexDecode from head of filter list */
  627. if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
  628. {
  629. is_hex = 1;
  630. pdf_array_delete(ctx, f, 0);
  631. if (pdf_is_array(ctx, dp))
  632. pdf_array_delete(ctx, dp, 0);
  633. }
  634. /* Unpack array if only one filter remains */
  635. if (pdf_array_len(ctx, f) == 1)
  636. {
  637. f = pdf_array_get(ctx, f, 0);
  638. pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
  639. if (dp)
  640. {
  641. dp = pdf_array_get(ctx, dp, 0);
  642. pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
  643. }
  644. }
  645. /* Remove array if no filters remain */
  646. else if (pdf_array_len(ctx, f) == 0)
  647. {
  648. pdf_dict_del(ctx, dict, PDF_NAME(Filter));
  649. pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
  650. }
  651. }
  652. else if (f == PDF_NAME(ASCIIHexDecode))
  653. {
  654. is_hex = 1;
  655. pdf_dict_del(ctx, dict, PDF_NAME(Filter));
  656. pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
  657. }
  658. return is_hex;
  659. }
  660. static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
  661. {
  662. fz_stream *mstm = NULL;
  663. fz_stream *xstm = NULL;
  664. fz_buffer *out = NULL;
  665. fz_var(mstm);
  666. fz_var(xstm);
  667. fz_try(ctx)
  668. {
  669. mstm = fz_open_memory(ctx, p, n);
  670. xstm = fz_open_ahxd(ctx, mstm);
  671. out = fz_read_all(ctx, xstm, n/2);
  672. }
  673. fz_always(ctx)
  674. {
  675. fz_drop_stream(ctx, xstm);
  676. fz_drop_stream(ctx, mstm);
  677. }
  678. fz_catch(ctx)
  679. fz_rethrow(ctx);
  680. return out;
  681. }
  682. static void write_data(fz_context *ctx, void *arg, const unsigned char *data, size_t len)
  683. {
  684. fz_write_data(ctx, (fz_output *)arg, data, len);
  685. }
  686. static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
  687. {
  688. fz_buffer *tmp_unhex = NULL, *tmp_comp = NULL, *tmp_hex = NULL, *buf = NULL;
  689. pdf_obj *obj = NULL;
  690. pdf_obj *dp;
  691. size_t len;
  692. unsigned char *data;
  693. int w, h;
  694. fz_var(buf);
  695. fz_var(tmp_comp);
  696. fz_var(tmp_hex);
  697. fz_var(obj);
  698. fz_try(ctx)
  699. {
  700. buf = pdf_load_raw_stream_number(ctx, doc, num);
  701. obj = pdf_copy_dict(ctx, obj_orig);
  702. len = fz_buffer_storage(ctx, buf, &data);
  703. if (do_deflate && striphexfilter(ctx, doc, obj))
  704. {
  705. tmp_unhex = unhexbuf(ctx, data, len);
  706. len = fz_buffer_storage(ctx, tmp_unhex, &data);
  707. }
  708. if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
  709. {
  710. if (is_bitmap_stream(ctx, obj, len, &w, &h))
  711. {
  712. tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
  713. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
  714. dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
  715. pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
  716. pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
  717. }
  718. else if (do_deflate == 1)
  719. {
  720. tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
  721. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
  722. }
  723. else
  724. {
  725. size_t comp_len;
  726. int mode = (opts->compression_effort == 0 ? FZ_BROTLI_DEFAULT :
  727. FZ_BROTLI_BEST * opts->compression_effort / 100);
  728. unsigned char *comp_data = fz_new_brotli_data(ctx, &comp_len, data, len, mode);
  729. tmp_comp = fz_new_buffer_from_data(ctx, comp_data, comp_len);
  730. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(BrotliDecode));
  731. }
  732. len = fz_buffer_storage(ctx, tmp_comp, &data);
  733. }
  734. if (opts->do_ascii && isbinarystream(ctx, data, len))
  735. {
  736. tmp_hex = hexbuf(ctx, data, len);
  737. len = fz_buffer_storage(ctx, tmp_hex, &data);
  738. addhexfilter(ctx, doc, obj);
  739. }
  740. fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
  741. if (unenc)
  742. {
  743. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
  744. pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
  745. fz_write_string(ctx, opts->out, "\nstream\n");
  746. fz_write_data(ctx, opts->out, data, len);
  747. }
  748. else
  749. {
  750. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, len));
  751. pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
  752. fz_write_string(ctx, opts->out, "\nstream\n");
  753. pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
  754. }
  755. fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
  756. }
  757. fz_always(ctx)
  758. {
  759. fz_drop_buffer(ctx, tmp_unhex);
  760. fz_drop_buffer(ctx, tmp_hex);
  761. fz_drop_buffer(ctx, tmp_comp);
  762. fz_drop_buffer(ctx, buf);
  763. pdf_drop_obj(ctx, obj);
  764. }
  765. fz_catch(ctx)
  766. {
  767. fz_rethrow(ctx);
  768. }
  769. }
  770. static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
  771. {
  772. fz_buffer *buf = NULL, *tmp_comp = NULL, *tmp_hex = NULL;
  773. pdf_obj *obj = NULL;
  774. pdf_obj *dp;
  775. size_t len;
  776. unsigned char *data;
  777. int w, h;
  778. fz_var(buf);
  779. fz_var(tmp_comp);
  780. fz_var(tmp_hex);
  781. fz_var(obj);
  782. fz_try(ctx)
  783. {
  784. buf = pdf_load_stream_number(ctx, doc, num);
  785. obj = pdf_copy_dict(ctx, obj_orig);
  786. pdf_dict_del(ctx, obj, PDF_NAME(Filter));
  787. pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
  788. len = fz_buffer_storage(ctx, buf, &data);
  789. if (do_deflate)
  790. {
  791. if (is_bitmap_stream(ctx, obj, len, &w, &h))
  792. {
  793. tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h, (w+7)>>3);
  794. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
  795. dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
  796. pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
  797. pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
  798. }
  799. else if (do_deflate == 1)
  800. {
  801. tmp_comp = deflatebuf(ctx, data, len, opts->compression_effort);
  802. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
  803. }
  804. else
  805. {
  806. size_t comp_len;
  807. int mode = (opts->compression_effort == 0 ? FZ_BROTLI_DEFAULT :
  808. FZ_BROTLI_BEST * opts->compression_effort / 100);
  809. unsigned char *comp_data = fz_new_brotli_data(ctx, &comp_len, data, len, mode);
  810. tmp_comp = fz_new_buffer_from_data(ctx, comp_data, comp_len);
  811. pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(BrotliDecode));
  812. }
  813. len = fz_buffer_storage(ctx, tmp_comp, &data);
  814. }
  815. if (opts->do_ascii && isbinarystream(ctx, data, len))
  816. {
  817. tmp_hex = hexbuf(ctx, data, len);
  818. len = fz_buffer_storage(ctx, tmp_hex, &data);
  819. addhexfilter(ctx, doc, obj);
  820. }
  821. fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
  822. if (unenc)
  823. {
  824. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
  825. pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
  826. fz_write_string(ctx, opts->out, "\nstream\n");
  827. fz_write_data(ctx, opts->out, data, len);
  828. }
  829. else
  830. {
  831. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
  832. pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen, NULL);
  833. fz_write_string(ctx, opts->out, "\nstream\n");
  834. pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
  835. }
  836. fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
  837. }
  838. fz_always(ctx)
  839. {
  840. fz_drop_buffer(ctx, tmp_hex);
  841. fz_drop_buffer(ctx, tmp_comp);
  842. fz_drop_buffer(ctx, buf);
  843. pdf_drop_obj(ctx, obj);
  844. }
  845. fz_catch(ctx)
  846. {
  847. fz_rethrow(ctx);
  848. }
  849. }
  850. static int is_image_filter(pdf_obj *s)
  851. {
  852. return
  853. s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
  854. s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
  855. s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
  856. s == PDF_NAME(JBIG2Decode) ||
  857. s == PDF_NAME(JPXDecode);
  858. }
  859. static int filter_implies_image(fz_context *ctx, pdf_obj *o)
  860. {
  861. if (pdf_is_name(ctx, o))
  862. return is_image_filter(o);
  863. if (pdf_is_array(ctx, o))
  864. {
  865. int i, len;
  866. len = pdf_array_len(ctx, o);
  867. for (i = 0; i < len; i++)
  868. if (is_image_filter(pdf_array_get(ctx, o, i)))
  869. return 1;
  870. }
  871. return 0;
  872. }
  873. static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
  874. {
  875. if (o == PDF_NAME(JPXDecode))
  876. return 1;
  877. if (pdf_is_array(ctx, o))
  878. {
  879. int i, len;
  880. len = pdf_array_len(ctx, o);
  881. for (i = 0; i < len; i++)
  882. if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
  883. return 1;
  884. }
  885. return 0;
  886. }
  887. int pdf_is_image_stream(fz_context *ctx, pdf_obj *obj)
  888. {
  889. pdf_obj *o;
  890. if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
  891. if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
  892. return 1;
  893. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
  894. return 1;
  895. if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
  896. return 1;
  897. return 0;
  898. }
  899. static int is_font_stream(fz_context *ctx, pdf_obj *obj)
  900. {
  901. pdf_obj *o;
  902. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
  903. return 1;
  904. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
  905. return 1;
  906. if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
  907. return 1;
  908. if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
  909. return 1;
  910. if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
  911. return 1;
  912. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
  913. return 1;
  914. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
  915. return 1;
  916. return 0;
  917. }
  918. static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
  919. {
  920. pdf_obj *o;
  921. if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
  922. return 1;
  923. return 0;
  924. }
  925. static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
  926. {
  927. if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
  928. if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
  929. return 1;
  930. return 0;
  931. }
  932. static void writelabel(fz_context *ctx, void *arg, const char *label)
  933. {
  934. pdf_write_state *opts = arg;
  935. if (opts->num_labels < (int)nelem(opts->obj_labels))
  936. opts->obj_labels[opts->num_labels++] = fz_strdup(ctx, label);
  937. }
  938. static int labelcmp(const void *aa, const void *bb)
  939. {
  940. return fz_strverscmp(*(const char **)aa, *(const char **)bb);
  941. }
  942. static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
  943. {
  944. pdf_obj *obj = NULL;
  945. fz_buffer *buf = NULL;
  946. int do_deflate = 0;
  947. int do_expand = 0;
  948. int skip = 0;
  949. int i;
  950. fz_var(obj);
  951. fz_var(buf);
  952. if (opts->do_encrypt == PDF_ENCRYPT_NONE)
  953. unenc = 1;
  954. fz_try(ctx)
  955. {
  956. obj = pdf_load_object(ctx, doc, num);
  957. /* skip ObjStm and XRef objects */
  958. if (pdf_is_dict(ctx, obj))
  959. {
  960. pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
  961. if (type == PDF_NAME(ObjStm) && !opts->do_use_objstms)
  962. {
  963. if (opts->use_list)
  964. opts->use_list[num] = 0;
  965. skip = 1;
  966. }
  967. if (skip_xrefs && type == PDF_NAME(XRef))
  968. {
  969. if (opts->use_list)
  970. opts->use_list[num] = 0;
  971. skip = 1;
  972. }
  973. }
  974. if (!skip)
  975. {
  976. if (opts->labels)
  977. {
  978. opts->num_labels = 0;
  979. pdf_label_object(ctx, opts->labels, num, writelabel, opts);
  980. if (opts->num_labels == 0)
  981. {
  982. fz_write_string(ctx, opts->out, "% unused\n");
  983. }
  984. else
  985. {
  986. qsort(opts->obj_labels, opts->num_labels, sizeof(char*), labelcmp);
  987. for (i = 0; i < opts->num_labels; ++i)
  988. {
  989. fz_write_printf(ctx, opts->out, "%% %s\n", opts->obj_labels[i]);
  990. fz_free(ctx, opts->obj_labels[i]);
  991. opts->obj_labels[i] = NULL;
  992. }
  993. }
  994. }
  995. if (pdf_obj_num_is_stream(ctx, doc, num))
  996. {
  997. do_deflate = opts->do_compress;
  998. do_expand = opts->do_expand;
  999. if (opts->do_compress_images && pdf_is_image_stream(ctx, obj))
  1000. do_deflate = opts->do_compress ? opts->do_compress : 1, do_expand = 0;
  1001. if (opts->do_compress_fonts && is_font_stream(ctx, obj))
  1002. do_deflate = opts->do_compress ? opts->do_compress : 1, do_expand = 0;
  1003. if (is_xml_metadata(ctx, obj))
  1004. do_deflate = 0, do_expand = 0;
  1005. if (is_jpx_stream(ctx, obj))
  1006. do_deflate = 0, do_expand = 0;
  1007. if (do_expand)
  1008. expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
  1009. else
  1010. copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
  1011. }
  1012. else
  1013. {
  1014. fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
  1015. pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen, NULL);
  1016. fz_write_string(ctx, opts->out, "\nendobj\n\n");
  1017. }
  1018. }
  1019. }
  1020. fz_always(ctx)
  1021. {
  1022. for (i = 0; i < opts->num_labels; ++i)
  1023. {
  1024. fz_free(ctx, opts->obj_labels[i]);
  1025. opts->obj_labels[i] = NULL;
  1026. }
  1027. fz_drop_buffer(ctx, buf);
  1028. pdf_drop_obj(ctx, obj);
  1029. }
  1030. fz_catch(ctx)
  1031. {
  1032. fz_rethrow(ctx);
  1033. }
  1034. }
  1035. static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
  1036. {
  1037. int num;
  1038. fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
  1039. for (num = from; num < to; num++)
  1040. {
  1041. if (opts->use_list[num])
  1042. fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
  1043. else
  1044. fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num] - opts->bias, opts->gen_list[num]);
  1045. }
  1046. }
  1047. static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t startxref)
  1048. {
  1049. pdf_obj *trailer = NULL;
  1050. pdf_obj *obj;
  1051. fz_write_string(ctx, opts->out, "xref\n");
  1052. if (opts->do_incremental)
  1053. {
  1054. int subfrom = from;
  1055. int subto;
  1056. while (subfrom < to)
  1057. {
  1058. while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
  1059. subfrom++;
  1060. subto = subfrom;
  1061. while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
  1062. subto++;
  1063. if (subfrom < subto)
  1064. writexrefsubsect(ctx, opts, subfrom, subto);
  1065. subfrom = subto;
  1066. }
  1067. }
  1068. else
  1069. {
  1070. writexrefsubsect(ctx, opts, from, to);
  1071. }
  1072. fz_write_string(ctx, opts->out, "\n");
  1073. fz_var(trailer);
  1074. fz_try(ctx)
  1075. {
  1076. if (opts->do_incremental)
  1077. {
  1078. trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
  1079. pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
  1080. pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
  1081. pdf_dict_del(ctx, trailer, PDF_NAME(XRefStm));
  1082. if (!opts->do_snapshot)
  1083. doc->startxref = startxref - opts->bias;
  1084. }
  1085. else
  1086. {
  1087. trailer = pdf_new_dict(ctx, doc, 5);
  1088. pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), to);
  1089. if (first)
  1090. {
  1091. pdf_obj *otrailer = pdf_trailer(ctx, doc);
  1092. obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Info));
  1093. if (obj)
  1094. pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
  1095. obj = pdf_dict_get(ctx, otrailer, PDF_NAME(Root));
  1096. if (obj)
  1097. pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
  1098. obj = pdf_dict_get(ctx, otrailer, PDF_NAME(ID));
  1099. if (obj)
  1100. pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
  1101. /* The encryption dictionary is kept in the writer state to handle
  1102. the encryption dictionary object being renumbered during repair.*/
  1103. if (opts->crypt_obj)
  1104. {
  1105. /* If the encryption dictionary used to be an indirect reference from the trailer,
  1106. store it the same way in the trailer in the saved file. */
  1107. if (pdf_is_indirect(ctx, opts->crypt_obj))
  1108. pdf_dict_put_indirect(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_object_number);
  1109. else
  1110. pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), opts->crypt_obj);
  1111. }
  1112. if (opts->metadata)
  1113. pdf_dict_putp(ctx, trailer, "Root/Metadata", opts->metadata);
  1114. }
  1115. }
  1116. fz_write_string(ctx, opts->out, "trailer\n");
  1117. /* Trailer is NOT encrypted */
  1118. pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
  1119. fz_write_string(ctx, opts->out, "\n");
  1120. fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
  1121. doc->last_xref_was_old_style = 1;
  1122. }
  1123. fz_always(ctx)
  1124. {
  1125. pdf_drop_obj(ctx, trailer);
  1126. }
  1127. fz_catch(ctx)
  1128. {
  1129. fz_rethrow(ctx);
  1130. }
  1131. }
  1132. static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
  1133. {
  1134. int num;
  1135. pdf_array_push_int(ctx, index, from);
  1136. pdf_array_push_int(ctx, index, to - from);
  1137. for (num = from; num < to; num++)
  1138. {
  1139. int f1, f2, f3;
  1140. pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, num);
  1141. if (opts->use_list[num] == 0)
  1142. {
  1143. f1 = 0; /* Free */
  1144. f2 = opts->ofs_list[num];
  1145. f3 = opts->gen_list[num];
  1146. }
  1147. else if (x->type == 'o')
  1148. {
  1149. f1 = 2; /* Object Stream */
  1150. f2 = opts->ofs_list[num];
  1151. f3 = opts->gen_list[num];
  1152. }
  1153. else
  1154. {
  1155. f1 = 1; /* Object */
  1156. f2 = opts->ofs_list[num] - opts->bias;
  1157. f3 = opts->gen_list[num];
  1158. }
  1159. fz_append_byte(ctx, fzbuf, f1);
  1160. fz_append_byte(ctx, fzbuf, f2>>24);
  1161. fz_append_byte(ctx, fzbuf, f2>>16);
  1162. fz_append_byte(ctx, fzbuf, f2>>8);
  1163. fz_append_byte(ctx, fzbuf, f2);
  1164. fz_append_byte(ctx, fzbuf, f3);
  1165. }
  1166. }
  1167. static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t startxref)
  1168. {
  1169. int num;
  1170. pdf_obj *dict = NULL;
  1171. pdf_obj *obj;
  1172. pdf_obj *w = NULL;
  1173. pdf_obj *index;
  1174. fz_buffer *fzbuf = NULL;
  1175. fz_var(dict);
  1176. fz_var(w);
  1177. fz_var(fzbuf);
  1178. fz_try(ctx)
  1179. {
  1180. num = pdf_create_object(ctx, doc);
  1181. expand_lists(ctx, opts, num);
  1182. dict = pdf_new_dict(ctx, doc, 6);
  1183. pdf_update_object(ctx, doc, num, dict);
  1184. to++;
  1185. if (first)
  1186. {
  1187. obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
  1188. if (obj)
  1189. pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
  1190. obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  1191. if (obj)
  1192. pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
  1193. obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
  1194. if (obj)
  1195. pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
  1196. /* The encryption dictionary is kept in the writer state to handle
  1197. the encryption dictionary object being renumbered during repair.*/
  1198. if (opts->crypt_obj)
  1199. {
  1200. /* If the encryption dictionary used to be an indirect reference from the trailer,
  1201. store it the same way in the xref stream in the saved file. */
  1202. if (pdf_is_indirect(ctx, opts->crypt_obj))
  1203. pdf_dict_put_indirect(ctx, dict, PDF_NAME(Encrypt), opts->crypt_object_number);
  1204. else
  1205. pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), opts->crypt_obj);
  1206. }
  1207. }
  1208. pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
  1209. if (opts->do_incremental)
  1210. {
  1211. pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
  1212. if (!opts->do_snapshot)
  1213. doc->startxref = startxref - opts->bias;
  1214. }
  1215. pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
  1216. w = pdf_new_array(ctx, doc, 3);
  1217. pdf_dict_put(ctx, dict, PDF_NAME(W), w);
  1218. pdf_array_push_int(ctx, w, 1);
  1219. pdf_array_push_int(ctx, w, 4);
  1220. pdf_array_push_int(ctx, w, 1);
  1221. index = pdf_new_array(ctx, doc, 2);
  1222. pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
  1223. /* opts->gen_list[num] is already initialized by fz_calloc. */
  1224. opts->use_list[num] = 1;
  1225. opts->ofs_list[num] = startxref;
  1226. fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
  1227. if (opts->do_incremental)
  1228. {
  1229. int subfrom = from;
  1230. int subto;
  1231. while (subfrom < to)
  1232. {
  1233. while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
  1234. subfrom++;
  1235. subto = subfrom;
  1236. while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
  1237. subto++;
  1238. if (subfrom < subto)
  1239. writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
  1240. subfrom = subto;
  1241. }
  1242. }
  1243. else
  1244. {
  1245. writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
  1246. }
  1247. pdf_update_stream(ctx, doc, dict, fzbuf, 0);
  1248. writeobject(ctx, doc, opts, num, 0, 0, 1);
  1249. fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref - opts->bias);
  1250. if (opts->do_snapshot)
  1251. pdf_delete_object(ctx, doc, num);
  1252. }
  1253. fz_always(ctx)
  1254. {
  1255. pdf_drop_obj(ctx, dict);
  1256. pdf_drop_obj(ctx, w);
  1257. fz_drop_buffer(ctx, fzbuf);
  1258. }
  1259. fz_catch(ctx)
  1260. {
  1261. fz_rethrow(ctx);
  1262. }
  1263. doc->last_xref_was_old_style = 0;
  1264. }
  1265. static void
  1266. dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num)
  1267. {
  1268. pdf_xref_entry *entry = pdf_get_xref_entry_no_null(ctx, doc, num);
  1269. int gen = opts->gen_list ? opts->gen_list[num] : 0;
  1270. if (entry->type == 'f')
  1271. gen = entry->gen;
  1272. if (entry->type == 'n')
  1273. gen = entry->gen;
  1274. /* If we are renumbering, then make sure all generation numbers are
  1275. * zero (except object 0 which must be free, and have a gen number of
  1276. * 65535). Changing the generation numbers (and indeed object numbers)
  1277. * will break encryption - so only do this if we are renumbering
  1278. * anyway. */
  1279. if (opts->do_garbage >= 2)
  1280. gen = (num == 0 ? 65535 : 0);
  1281. /* For objects in object streams, the gen number gives us the index of
  1282. * the object within the stream. */
  1283. if (entry->type == 'o')
  1284. gen = entry->gen;
  1285. if (opts->gen_list)
  1286. opts->gen_list[num] = gen;
  1287. if (opts->do_garbage && !opts->use_list[num])
  1288. return;
  1289. if (entry->type == 'o' && (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num)))
  1290. {
  1291. assert(opts->do_use_objstms);
  1292. opts->ofs_list[num] = entry->ofs;
  1293. return;
  1294. }
  1295. if (entry->type == 'n')
  1296. {
  1297. if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
  1298. {
  1299. if (opts->ofs_list)
  1300. opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
  1301. writeobject(ctx, doc, opts, num, gen, 1, num == opts->crypt_object_number);
  1302. }
  1303. }
  1304. else if (opts->use_list)
  1305. opts->use_list[num] = 0;
  1306. }
  1307. static void
  1308. writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  1309. {
  1310. int num;
  1311. int xref_len = pdf_xref_len(ctx, doc);
  1312. if (!opts->do_incremental)
  1313. {
  1314. int version = pdf_version(ctx, doc);
  1315. fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", version / 10, version % 10);
  1316. fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
  1317. }
  1318. for (num = 0; num < xref_len; num++)
  1319. dowriteobject(ctx, doc, opts, num);
  1320. }
  1321. #ifdef DEBUG_WRITING
  1322. static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  1323. {
  1324. int i;
  1325. for (i = 0; i < pdf_xref_len(ctx, doc); i++)
  1326. {
  1327. fprintf(stderr, "%d@%ld: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
  1328. }
  1329. }
  1330. #endif
  1331. static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
  1332. {
  1333. int s;
  1334. for (s = 0; s < doc->num_incremental_sections; s++)
  1335. {
  1336. pdf_xref *xref = &doc->xref_sections[s];
  1337. if (xref->unsaved_sigs)
  1338. {
  1339. /* The ByteRange objects of signatures are initially written out with
  1340. * dummy values, and then overwritten later. We need to make sure their
  1341. * initial form at least takes enough sufficient file space */
  1342. pdf_unsaved_sig *usig;
  1343. int n = 0;
  1344. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1345. n++;
  1346. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1347. {
  1348. /* There will be segments of bytes at the beginning, at
  1349. * the end and between each consecutive pair of signatures,
  1350. * hence n + 1 */
  1351. int i;
  1352. pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
  1353. for (i = 0; i < n+1; i++)
  1354. {
  1355. pdf_array_push_int(ctx, byte_range, INT_MAX);
  1356. pdf_array_push_int(ctx, byte_range, INT_MAX);
  1357. }
  1358. }
  1359. }
  1360. }
  1361. }
  1362. static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
  1363. {
  1364. pdf_obj *byte_range = NULL;
  1365. char *buf = NULL, *ptr;
  1366. int s;
  1367. fz_stream *stm = NULL;
  1368. fz_var(byte_range);
  1369. fz_var(stm);
  1370. fz_var(buf);
  1371. fz_try(ctx)
  1372. {
  1373. for (s = 0; s < doc->num_incremental_sections; s++)
  1374. {
  1375. pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
  1376. if (xref->unsaved_sigs)
  1377. {
  1378. pdf_unsaved_sig *usig;
  1379. size_t buf_size = 0;
  1380. size_t i;
  1381. size_t last_end;
  1382. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1383. {
  1384. size_t size = usig->signer->max_digest_size(ctx, usig->signer);
  1385. buf_size = fz_maxz(buf_size, size);
  1386. }
  1387. buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
  1388. buf = fz_calloc(ctx, buf_size, 1);
  1389. stm = fz_stream_from_output(ctx, opts->out);
  1390. /* Locate the byte ranges and contents in the saved file */
  1391. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1392. {
  1393. char *bstr, *cstr, *fstr;
  1394. size_t bytes_read;
  1395. int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
  1396. fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
  1397. /* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
  1398. * could cause an attempt to read off the end of the file. That's not an
  1399. * error, but we need to keep track of how many bytes are read and search
  1400. * for markers only in defined data */
  1401. bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
  1402. assert(bytes_read <= buf_size);
  1403. bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
  1404. cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
  1405. fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
  1406. if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
  1407. fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to determine byte ranges while writing signature");
  1408. usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
  1409. usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
  1410. usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
  1411. usig->contents_end = fstr - buf + opts->ofs_list[pnum];
  1412. }
  1413. fz_drop_stream(ctx, stm);
  1414. stm = NULL;
  1415. /* Recreate ByteRange with correct values. */
  1416. byte_range = pdf_new_array(ctx, doc, 4);
  1417. last_end = 0;
  1418. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1419. {
  1420. pdf_array_push_int(ctx, byte_range, last_end);
  1421. pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
  1422. last_end = usig->contents_end;
  1423. }
  1424. pdf_array_push_int(ctx, byte_range, last_end);
  1425. pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
  1426. /* Copy the new ByteRange to the other unsaved signatures */
  1427. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1428. pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
  1429. /* Write the byte range into buf, padding with spaces*/
  1430. ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
  1431. if (ptr != buf) /* should never happen, since data should fit in buf_size */
  1432. fz_free(ctx, ptr);
  1433. memset(buf+i, ' ', buf_size-i);
  1434. /* Write the byte range to the file */
  1435. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1436. {
  1437. fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
  1438. fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
  1439. }
  1440. /* Write the digests into the file */
  1441. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  1442. pdf_write_digest(ctx, opts->out, byte_range, usig->field, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
  1443. /* delete the unsaved_sigs records */
  1444. while ((usig = xref->unsaved_sigs) != NULL)
  1445. {
  1446. xref->unsaved_sigs = usig->next;
  1447. pdf_drop_obj(ctx, usig->field);
  1448. pdf_drop_signer(ctx, usig->signer);
  1449. fz_free(ctx, usig);
  1450. }
  1451. xref->unsaved_sigs_end = NULL;
  1452. pdf_drop_obj(ctx, byte_range);
  1453. byte_range = NULL;
  1454. fz_free(ctx, buf);
  1455. buf = NULL;
  1456. }
  1457. }
  1458. }
  1459. fz_always(ctx)
  1460. {
  1461. pdf_drop_obj(ctx, byte_range);
  1462. }
  1463. fz_catch(ctx)
  1464. {
  1465. fz_drop_stream(ctx, stm);
  1466. fz_free(ctx, buf);
  1467. fz_rethrow(ctx);
  1468. }
  1469. }
  1470. static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii, int newlines)
  1471. {
  1472. int n = pdf_count_pages(ctx, doc);
  1473. int i;
  1474. pdf_filter_options options = { 0 };
  1475. pdf_sanitize_filter_options sopts = { 0 };
  1476. pdf_filter_factory list[2] = { 0 };
  1477. options.recurse = 1;
  1478. options.ascii = ascii;
  1479. options.newlines = newlines;
  1480. options.filters = sanitize ? list : NULL;
  1481. list[0].filter = pdf_new_sanitize_filter;
  1482. list[0].options = &sopts;
  1483. for (i = 0; i < n; i++)
  1484. {
  1485. pdf_annot *annot;
  1486. pdf_page *page = pdf_load_page(ctx, doc, i);
  1487. fz_try(ctx)
  1488. {
  1489. pdf_filter_page_contents(ctx, doc, page, &options);
  1490. for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
  1491. {
  1492. pdf_filter_annot_contents(ctx, doc, annot, &options);
  1493. }
  1494. }
  1495. fz_always(ctx)
  1496. fz_drop_page(ctx, &page->super);
  1497. fz_catch(ctx)
  1498. fz_rethrow(ctx);
  1499. }
  1500. }
  1501. /* Initialise the pdf_write_state, used dynamically during the write, from the static
  1502. * pdf_write_options, passed into pdf_save_document */
  1503. static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
  1504. {
  1505. int xref_len = pdf_xref_len(ctx, doc);
  1506. opts->do_incremental = in_opts->do_incremental;
  1507. opts->do_ascii = in_opts->do_ascii;
  1508. opts->do_tight = !in_opts->do_pretty;
  1509. opts->do_expand = in_opts->do_decompress;
  1510. opts->do_compress = in_opts->do_compress;
  1511. opts->do_compress_images = in_opts->do_compress_images;
  1512. opts->do_compress_fonts = in_opts->do_compress_fonts;
  1513. opts->do_snapshot = in_opts->do_snapshot;
  1514. opts->compression_effort = in_opts->compression_effort;
  1515. if (opts->compression_effort < 0)
  1516. opts->compression_effort = 0;
  1517. else if (opts->compression_effort > 100)
  1518. opts->compression_effort = 100;
  1519. opts->do_garbage = in_opts->do_garbage;
  1520. opts->do_clean = in_opts->do_clean;
  1521. opts->do_encrypt = in_opts->do_encrypt;
  1522. opts->dont_regenerate_id = in_opts->dont_regenerate_id;
  1523. opts->do_preserve_metadata = in_opts->do_preserve_metadata;
  1524. opts->do_use_objstms = in_opts->do_use_objstms;
  1525. opts->permissions = in_opts->permissions;
  1526. memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
  1527. memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
  1528. /* We deliberately make these arrays long enough to cope with
  1529. * 1 to n access rather than 0..n-1, and add space for 2 new
  1530. * extra entries that may be required for linearization. */
  1531. opts->list_len = 0;
  1532. opts->use_list = NULL;
  1533. opts->ofs_list = NULL;
  1534. opts->gen_list = NULL;
  1535. opts->renumber_map = NULL;
  1536. expand_lists(ctx, opts, xref_len);
  1537. }
  1538. /* Free the resources held by the dynamic write options */
  1539. static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
  1540. {
  1541. fz_free(ctx, opts->use_list);
  1542. fz_free(ctx, opts->ofs_list);
  1543. fz_free(ctx, opts->gen_list);
  1544. fz_free(ctx, opts->renumber_map);
  1545. pdf_drop_object_labels(ctx, opts->labels);
  1546. }
  1547. const pdf_write_options pdf_default_write_options = {
  1548. 0, /* do_incremental */
  1549. 0, /* do_pretty */
  1550. 0, /* do_ascii */
  1551. 0, /* do_compress */
  1552. 0, /* do_compress_images */
  1553. 0, /* do_compress_fonts */
  1554. 0, /* do_decompress */
  1555. 0, /* do_garbage */
  1556. 0, /* do_linear */
  1557. 0, /* do_clean */
  1558. 0, /* do_sanitize */
  1559. 0, /* do_appearance */
  1560. 0, /* do_encrypt */
  1561. 0, /* dont_regenerate_id */
  1562. ~0, /* permissions */
  1563. "", /* opwd_utf8[128] */
  1564. "", /* upwd_utf8[128] */
  1565. 0 /* do_snapshot */
  1566. };
  1567. static const pdf_write_options pdf_snapshot_write_options = {
  1568. 1, /* do_incremental */
  1569. 0, /* do_pretty */
  1570. 0, /* do_ascii */
  1571. 0, /* do_compress */
  1572. 0, /* do_compress_images */
  1573. 0, /* do_compress_fonts */
  1574. 0, /* do_decompress */
  1575. 0, /* do_garbage */
  1576. 0, /* do_linear */
  1577. 0, /* do_clean */
  1578. 0, /* do_sanitize */
  1579. 0, /* do_appearance */
  1580. 0, /* do_encrypt */
  1581. 1, /* dont_regenerate_id */
  1582. ~0, /* permissions */
  1583. "", /* opwd_utf8[128] */
  1584. "", /* upwd_utf8[128] */
  1585. 1 /* do_snapshot */
  1586. };
  1587. const char *fz_pdf_write_options_usage =
  1588. "PDF output options:\n"
  1589. "\tdecompress: decompress all streams (except compress-fonts/images)\n"
  1590. "\tcompress=yes|flate|brotli: compress all streams, yes defaults to flate\n"
  1591. "\tcompress-fonts: compress embedded fonts\n"
  1592. "\tcompress-images: compress images\n"
  1593. "\tcompress-effort=0|percentage: effort spent compressing, 0 is default, 100 is max effort\n"
  1594. "\tascii: ASCII hex encode binary streams\n"
  1595. "\tpretty: pretty-print objects with indentation\n"
  1596. "\tlabels: print object labels\n"
  1597. "\tlinearize: optimize for web browsers (no longer supported!)\n"
  1598. "\tclean: pretty-print graphics commands in content streams\n"
  1599. "\tsanitize: sanitize graphics commands in content streams\n"
  1600. "\tgarbage: garbage collect unused objects\n"
  1601. "\tor garbage=compact: ... and compact cross reference table\n"
  1602. "\tor garbage=deduplicate: ... and remove duplicate objects\n"
  1603. "\tincremental: write changes as incremental update\n"
  1604. "\tobjstms: use object streams and cross reference streams\n"
  1605. "\tappearance=yes|all: synthesize just missing, or all, annotation/widget apperance streams\n"
  1606. "\tcontinue-on-error: continue saving the document even if there is an error\n"
  1607. "\tdecrypt: write unencrypted document\n"
  1608. "\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
  1609. "\tpermissions=NUMBER: document permissions to grant when encrypting\n"
  1610. "\tuser-password=PASSWORD: password required to read document\n"
  1611. "\towner-password=PASSWORD: password required to edit document\n"
  1612. "\tregenerate-id: (default yes) regenerate document id\n"
  1613. "\n";
  1614. pdf_write_options *
  1615. pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
  1616. {
  1617. const char *val;
  1618. memset(opts, 0, sizeof *opts);
  1619. if (fz_has_option(ctx, args, "decompress", &val))
  1620. opts->do_decompress = fz_option_eq(val, "yes");
  1621. if (fz_has_option(ctx, args, "compress", &val))
  1622. {
  1623. if (fz_option_eq(val, "brotli"))
  1624. opts->do_compress = 2;
  1625. else if (fz_option_eq(val, "flate"))
  1626. opts->do_compress = 1;
  1627. else
  1628. opts->do_compress = fz_option_eq(val, "yes");
  1629. }
  1630. if (fz_has_option(ctx, args, "compress-fonts", &val))
  1631. opts->do_compress_fonts = fz_option_eq(val, "yes");
  1632. if (fz_has_option(ctx, args, "compress-images", &val))
  1633. opts->do_compress_images = fz_option_eq(val, "yes");
  1634. if (fz_has_option(ctx, args, "compression-effort", &val))
  1635. opts->compression_effort = fz_atoi(val);
  1636. if (fz_has_option(ctx, args, "labels", &val))
  1637. opts->do_labels = fz_option_eq(val, "yes");
  1638. if (fz_has_option(ctx, args, "ascii", &val))
  1639. opts->do_ascii = fz_option_eq(val, "yes");
  1640. if (fz_has_option(ctx, args, "pretty", &val))
  1641. opts->do_pretty = fz_option_eq(val, "yes");
  1642. if (fz_has_option(ctx, args, "linearize", &val))
  1643. opts->do_linear = fz_option_eq(val, "yes");
  1644. if (fz_has_option(ctx, args, "clean", &val))
  1645. opts->do_clean = fz_option_eq(val, "yes");
  1646. if (fz_has_option(ctx, args, "sanitize", &val))
  1647. opts->do_sanitize = fz_option_eq(val, "yes");
  1648. if (fz_has_option(ctx, args, "incremental", &val))
  1649. opts->do_incremental = fz_option_eq(val, "yes");
  1650. if (fz_has_option(ctx, args, "objstms", &val))
  1651. opts->do_use_objstms = fz_option_eq(val, "yes");
  1652. if (fz_has_option(ctx, args, "regenerate-id", &val))
  1653. opts->dont_regenerate_id = fz_option_eq(val, "no");
  1654. if (fz_has_option(ctx, args, "decrypt", &val))
  1655. opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
  1656. if (fz_has_option(ctx, args, "encrypt", &val))
  1657. {
  1658. if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
  1659. opts->do_encrypt = PDF_ENCRYPT_NONE;
  1660. else if (fz_option_eq(val, "keep"))
  1661. opts->do_encrypt = PDF_ENCRYPT_KEEP;
  1662. else if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
  1663. opts->do_encrypt = PDF_ENCRYPT_RC4_40;
  1664. else if (fz_option_eq(val, "rc4-128"))
  1665. opts->do_encrypt = PDF_ENCRYPT_RC4_128;
  1666. else if (fz_option_eq(val, "aes-128"))
  1667. opts->do_encrypt = PDF_ENCRYPT_AES_128;
  1668. else if (fz_option_eq(val, "aes-256"))
  1669. opts->do_encrypt = PDF_ENCRYPT_AES_256;
  1670. else
  1671. fz_throw(ctx, FZ_ERROR_ARGUMENT, "unknown encryption in options");
  1672. }
  1673. if (fz_has_option(ctx, args, "owner-password", &val))
  1674. fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
  1675. if (fz_has_option(ctx, args, "user-password", &val))
  1676. fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
  1677. if (fz_has_option(ctx, args, "permissions", &val))
  1678. opts->permissions = fz_atoi(val);
  1679. else
  1680. opts->permissions = ~0;
  1681. if (fz_has_option(ctx, args, "garbage", &val))
  1682. {
  1683. if (fz_option_eq(val, "yes"))
  1684. opts->do_garbage = 1;
  1685. else if (fz_option_eq(val, "compact"))
  1686. opts->do_garbage = 2;
  1687. else if (fz_option_eq(val, "deduplicate"))
  1688. opts->do_garbage = 3;
  1689. else
  1690. opts->do_garbage = fz_atoi(val);
  1691. }
  1692. if (fz_has_option(ctx, args, "appearance", &val))
  1693. {
  1694. if (fz_option_eq(val, "yes"))
  1695. opts->do_appearance = 1;
  1696. else if (fz_option_eq(val, "all"))
  1697. opts->do_appearance = 2;
  1698. }
  1699. return opts;
  1700. }
  1701. int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
  1702. {
  1703. if (doc->repair_attempted)
  1704. return 0;
  1705. if (doc->redacted)
  1706. return 0;
  1707. return 1;
  1708. }
  1709. static void
  1710. prepare_for_save(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts)
  1711. {
  1712. /* Rewrite (and possibly sanitize) the operator streams */
  1713. if (in_opts->do_clean || in_opts->do_sanitize)
  1714. {
  1715. pdf_begin_operation(ctx, doc, "Clean content streams");
  1716. fz_try(ctx)
  1717. {
  1718. clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii, in_opts->do_pretty);
  1719. pdf_end_operation(ctx, doc);
  1720. }
  1721. fz_catch(ctx)
  1722. {
  1723. pdf_abandon_operation(ctx, doc);
  1724. fz_rethrow(ctx);
  1725. }
  1726. }
  1727. /* When saving a PDF with signatures the file will
  1728. first be written once, then the file will have its
  1729. digests and byte ranges calculated and and then the
  1730. signature dictionary containing them will be updated
  1731. both in memory and in the saved file. By setting this
  1732. flag we avoid a new xref section from being created when
  1733. the signature dictionary is updated. */
  1734. doc->save_in_progress = 1;
  1735. if (!in_opts->do_snapshot)
  1736. presize_unsaved_signature_byteranges(ctx, doc);
  1737. }
  1738. static pdf_obj *
  1739. new_identity(fz_context *ctx, pdf_document *doc)
  1740. {
  1741. unsigned char rnd[32];
  1742. pdf_obj *id;
  1743. fz_memrnd(ctx, rnd, nelem(rnd));
  1744. id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
  1745. pdf_array_push_string(ctx, id, (char *) rnd + 0, nelem(rnd) / 2);
  1746. pdf_array_push_string(ctx, id, (char *) rnd + 16, nelem(rnd) / 2);
  1747. return id;
  1748. }
  1749. static void
  1750. change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
  1751. {
  1752. unsigned char rnd[16];
  1753. if (pdf_array_len(ctx, id) >= 2)
  1754. {
  1755. /* Update second half of ID array with new random data. */
  1756. fz_memrnd(ctx, rnd, 16);
  1757. pdf_array_put_string(ctx, id, 1, (char *)rnd, 16);
  1758. }
  1759. }
  1760. static void
  1761. create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
  1762. {
  1763. unsigned char *o, *u;
  1764. pdf_obj *encrypt;
  1765. int r;
  1766. r = pdf_crypt_revision(ctx, crypt);
  1767. encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
  1768. pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
  1769. pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
  1770. pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
  1771. pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
  1772. pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
  1773. pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
  1774. o = pdf_crypt_owner_password(ctx, crypt);
  1775. u = pdf_crypt_user_password(ctx, crypt);
  1776. if (r < 4)
  1777. {
  1778. pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
  1779. pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
  1780. }
  1781. else if (r == 4)
  1782. {
  1783. pdf_obj *cf;
  1784. pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
  1785. pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
  1786. cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
  1787. cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
  1788. pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
  1789. pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
  1790. pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
  1791. pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
  1792. pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
  1793. }
  1794. else if (r == 6)
  1795. {
  1796. unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
  1797. unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
  1798. pdf_obj *cf;
  1799. pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
  1800. pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
  1801. cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
  1802. cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
  1803. pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
  1804. pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
  1805. pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
  1806. pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
  1807. pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
  1808. pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
  1809. pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
  1810. pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
  1811. }
  1812. }
  1813. static void
  1814. ensure_initial_incremental_contents(fz_context *ctx, fz_stream *in, fz_output *out, int64_t len)
  1815. {
  1816. fz_stream *verify;
  1817. unsigned char buf0[4096];
  1818. unsigned char buf1[4096];
  1819. size_t n0, n1;
  1820. int64_t off = 0;
  1821. int same;
  1822. if (!in)
  1823. fz_throw(ctx, FZ_ERROR_ARGUMENT, "no input file for incremental write");
  1824. verify = fz_stream_from_output(ctx, out);
  1825. fz_try(ctx)
  1826. {
  1827. /* Compare current contents of output file (in case we append) */
  1828. if (verify)
  1829. {
  1830. do
  1831. {
  1832. int64_t read = sizeof(buf0);
  1833. if (off + read > len)
  1834. read = len - off;
  1835. fz_seek(ctx, in, off, SEEK_SET);
  1836. n0 = fz_read(ctx, in, buf0, read);
  1837. fz_seek(ctx, verify, off, SEEK_SET);
  1838. n1 = fz_read(ctx, verify, buf1, read);
  1839. same = (n0 == n1 && !memcmp(buf0, buf1, n0));
  1840. off += (int64_t)n0;
  1841. }
  1842. while (same && n0 > 0 && off < len);
  1843. if (same)
  1844. {
  1845. fz_seek_output(ctx, out, len, SEEK_SET);
  1846. fz_truncate_output(ctx, out);
  1847. break; /* return from try */
  1848. }
  1849. fz_seek_output(ctx, out, 0, SEEK_SET);
  1850. }
  1851. /* Copy old contents into new file */
  1852. fz_seek(ctx, in, 0, SEEK_SET);
  1853. off = 0;
  1854. do
  1855. {
  1856. int64_t read = sizeof(buf0);
  1857. if (off + read > len)
  1858. read = len - off;
  1859. n0 = fz_read(ctx, in, buf0, read);
  1860. if (n0)
  1861. fz_write_data(ctx, out, buf0, n0);
  1862. off += n0;
  1863. }
  1864. while (n0 > 0 && off < len);
  1865. if (verify)
  1866. {
  1867. fz_truncate_output(ctx, out);
  1868. fz_seek_output(ctx, out, 0, SEEK_END);
  1869. }
  1870. }
  1871. fz_always(ctx)
  1872. fz_drop_stream(ctx, verify);
  1873. fz_catch(ctx)
  1874. fz_rethrow(ctx);
  1875. }
  1876. #define OBJSTM_MAXOBJS 256
  1877. #define OBJSTM_MAXLEN 1<<24
  1878. typedef struct
  1879. {
  1880. pdf_write_state *opts;
  1881. int n;
  1882. int objnum[OBJSTM_MAXOBJS];
  1883. size_t len[OBJSTM_MAXOBJS];
  1884. fz_buffer *content_buf;
  1885. fz_output *content_out;
  1886. int root_num;
  1887. int info_num;
  1888. int sep;
  1889. } objstm_gather_data;
  1890. static void
  1891. flush_gathered(fz_context *ctx, pdf_document *doc, objstm_gather_data *data)
  1892. {
  1893. pdf_obj *obj;
  1894. pdf_obj *ref = NULL;
  1895. fz_buffer *newbuf = NULL;
  1896. fz_output *out = NULL;
  1897. int i;
  1898. if (data->n == 0)
  1899. return;
  1900. obj = pdf_new_dict(ctx, doc, 4);
  1901. fz_var(ref);
  1902. fz_var(newbuf);
  1903. fz_var(out);
  1904. fz_try(ctx)
  1905. {
  1906. size_t pos = 0, first;
  1907. int num;
  1908. newbuf = fz_new_buffer(ctx, 128);
  1909. out = fz_new_output_with_buffer(ctx, newbuf);
  1910. for (i = 0; i < data->n; i++)
  1911. {
  1912. fz_write_printf(ctx, out, "%d %d ", data->objnum[i], pos);
  1913. pos += data->len[i];
  1914. }
  1915. fz_close_output(ctx, out);
  1916. first = fz_tell_output(ctx, out);
  1917. fz_drop_output(ctx, out);
  1918. out = NULL;
  1919. pdf_dict_put_int(ctx, obj, PDF_NAME(First), first);
  1920. pdf_dict_put_int(ctx, obj, PDF_NAME(N), data->n);
  1921. pdf_dict_put(ctx, obj, PDF_NAME(Type), PDF_NAME(ObjStm));
  1922. fz_close_output(ctx, data->content_out);
  1923. fz_append_buffer(ctx, newbuf, data->content_buf);
  1924. doc->xref_base = 0; /* Might have been reset by our caller */
  1925. ref = pdf_add_object(ctx, doc, obj);
  1926. pdf_update_stream(ctx, doc, ref, newbuf, 0);
  1927. num = pdf_to_num(ctx, ref);
  1928. expand_lists(ctx, data->opts, num);
  1929. data->opts->use_list[num] = 1;
  1930. /* Update all the xref entries for the objects to point into this stream. */
  1931. for (i = 0; i < data->n; i++)
  1932. {
  1933. pdf_xref_entry *x = pdf_get_xref_entry_no_null(ctx, doc, data->objnum[i]);
  1934. x->ofs = num; /* ofs = which objstm is this in */
  1935. x->gen = i; /* gen = nth entry in the objstm */
  1936. data->opts->ofs_list[data->objnum[i]] = i;
  1937. data->opts->gen_list[data->objnum[i]] = i;
  1938. }
  1939. data->n = 0;
  1940. data->sep = 0;
  1941. }
  1942. fz_always(ctx)
  1943. {
  1944. fz_drop_output(ctx, data->content_out);
  1945. data->content_out = NULL;
  1946. fz_drop_buffer(ctx, data->content_buf);
  1947. data->content_buf = NULL;
  1948. pdf_drop_obj(ctx, obj);
  1949. pdf_drop_obj(ctx, ref);
  1950. fz_drop_buffer(ctx, newbuf);
  1951. fz_drop_output(ctx, out);
  1952. }
  1953. fz_catch(ctx)
  1954. fz_rethrow(ctx);
  1955. }
  1956. static void
  1957. objstm_gather(fz_context *ctx, pdf_xref_entry *x, int i, pdf_document *doc, objstm_gather_data *data)
  1958. {
  1959. size_t olen, len;
  1960. if (i == data->root_num || i == data->info_num)
  1961. return;
  1962. /* Ensure the object is loaded! */
  1963. if (i == 0)
  1964. return; /* pdf_cache_object does not like being called for i == 0 which should be free. */
  1965. pdf_cache_object(ctx, doc, i);
  1966. /* Both normal objects and stream objects can get put into objstms (because we've already
  1967. * unpacked stream objects from objstms earlier!) Stream objects that are non-incremental
  1968. * will be left as they are by the later check. */
  1969. if ((x->type != 'n' && x->type != 'o') || x->stm_buf != NULL || x->stm_ofs != 0 || x->gen != 0)
  1970. return; /* Objects with generation number != 0 cannot be put in objstms */
  1971. if (i == data->opts->crypt_object_number)
  1972. return; /* Encryption dictionaries can also not be put in objstms */
  1973. /* If we are writing incrementally, then only the last one can be gathered. */
  1974. if (data->opts->do_incremental && !pdf_obj_is_incremental(ctx, x->obj))
  1975. return;
  1976. /* FIXME: Can we do a pass through to check for such objects more exactly? */
  1977. if (pdf_is_int(ctx, x->obj))
  1978. return; /* In case it's a Length value. */
  1979. if (pdf_is_indirect(ctx, x->obj))
  1980. return; /* Bare indirect references are not allowed. */
  1981. if (data->content_buf == NULL)
  1982. data->content_buf = fz_new_buffer(ctx, 128);
  1983. if (data->content_out == NULL)
  1984. data->content_out = fz_new_output_with_buffer(ctx, data->content_buf);
  1985. olen = data->content_buf->len;
  1986. pdf_print_encrypted_obj(ctx, data->content_out, x->obj, 1, 0, NULL, 0, 0, NULL);
  1987. data->objnum[data->n] = i;
  1988. len = data->content_buf->len;
  1989. data->len[data->n] = len - olen;
  1990. x->type = 'o';
  1991. x->gen = data->n;
  1992. data->n++;
  1993. if (data->n == OBJSTM_MAXOBJS || len > OBJSTM_MAXLEN)
  1994. flush_gathered(ctx, doc, data);
  1995. }
  1996. static void
  1997. gather_to_objstms(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int xref_len)
  1998. {
  1999. int count, num;
  2000. objstm_gather_data data = { 0 };
  2001. data.opts = opts;
  2002. data.root_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)));
  2003. data.info_num = pdf_to_num(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)));
  2004. count = pdf_xref_len(ctx, doc);
  2005. for (num = 1; num < count; ++num)
  2006. {
  2007. pdf_xref_entry *x = pdf_get_xref_entry_no_change(ctx, doc, num);
  2008. if (x)
  2009. objstm_gather(ctx, x, num, doc, &data);
  2010. }
  2011. flush_gathered(ctx, doc, &data);
  2012. }
  2013. static void
  2014. unpack_objstm_objs(fz_context *ctx, pdf_document *doc, int xref_len)
  2015. {
  2016. int num;
  2017. /* At this point, all our objects are cached already. Let's change
  2018. * all the 'o' objects to be 'n' and get rid of the ObjStm objects
  2019. * they all came from. */
  2020. for (num = 1; num < xref_len; ++num)
  2021. {
  2022. pdf_xref_entry *x = pdf_get_xref_entry_no_change(ctx, doc, num);
  2023. if (!x || x->type != 'o')
  2024. continue;
  2025. /* Change the type of the object to 'n'. */
  2026. x->type = 'n';
  2027. /* This leaves x->ofs etc wrong, but that's OK as the object is
  2028. * in memory, and we'll fix it up after the write. */
  2029. /* We no longer need the ObjStm that this object came from. */
  2030. if (x->ofs != 0)
  2031. {
  2032. pdf_xref_entry *y = pdf_get_xref_entry_no_change(ctx, doc, x->ofs);
  2033. /* The xref entry y for the objstm containing the object identified by
  2034. xref entry x above must exist, otherwise that object would not be labelled
  2035. 'o' in the xref. */
  2036. assert(y != NULL);
  2037. y->type = 'f';
  2038. }
  2039. }
  2040. }
  2041. static void
  2042. prepass(fz_context *ctx, pdf_document *doc)
  2043. {
  2044. int num;
  2045. for (num = 1; num < pdf_xref_len(ctx, doc); ++num)
  2046. {
  2047. if (pdf_object_exists(ctx, doc, num))
  2048. {
  2049. fz_try(ctx)
  2050. pdf_cache_object(ctx, doc, num);
  2051. fz_catch(ctx)
  2052. fz_report_error(ctx);
  2053. }
  2054. }
  2055. }
  2056. static void
  2057. do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, const pdf_write_options *in_opts)
  2058. {
  2059. int lastfree;
  2060. int num;
  2061. int xref_len;
  2062. pdf_obj *id1, *id = NULL;
  2063. int changed;
  2064. int64_t current_offset;
  2065. if (in_opts->do_incremental)
  2066. {
  2067. ensure_initial_incremental_contents(ctx, doc->file, opts->out, doc->file_size);
  2068. /* If no changes, nothing more to write */
  2069. if (!pdf_has_unsaved_changes(ctx, doc))
  2070. {
  2071. doc->save_in_progress = 0;
  2072. return;
  2073. }
  2074. fz_write_string(ctx, opts->out, "\n");
  2075. }
  2076. pdf_begin_operation(ctx, doc, "Save document");
  2077. fz_try(ctx)
  2078. {
  2079. /* First, we do a prepass across the document to load all the objects
  2080. * into memory. We'll end up doing this later on anyway, but by doing
  2081. * it here, we force any repairs to happen before writing proper
  2082. * starts. */
  2083. prepass(ctx, doc);
  2084. xref_len = pdf_xref_len(ctx, doc);
  2085. initialise_write_state(ctx, doc, in_opts, opts);
  2086. if (in_opts->do_labels)
  2087. opts->labels = pdf_load_object_labels(ctx, doc);
  2088. if (!opts->dont_regenerate_id)
  2089. {
  2090. /* Update second half of ID array if it exists. */
  2091. id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
  2092. if (id)
  2093. change_identity(ctx, doc, id);
  2094. }
  2095. /* Remove encryption dictionary if saving without encryption. */
  2096. if (opts->do_encrypt == PDF_ENCRYPT_NONE)
  2097. {
  2098. assert(!in_opts->do_snapshot);
  2099. pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
  2100. }
  2101. /* Keep encryption dictionary if saving with old encryption. */
  2102. else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
  2103. {
  2104. opts->crypt = doc->crypt;
  2105. }
  2106. /* Create encryption dictionary if saving with new encryption. */
  2107. else
  2108. {
  2109. assert(!opts->do_snapshot);
  2110. if (!id)
  2111. id = new_identity(ctx, doc);
  2112. id1 = pdf_array_get(ctx, id, 0);
  2113. opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
  2114. create_encryption_dictionary(ctx, doc, opts->crypt);
  2115. }
  2116. /* Stash Encrypt entry in the writer state, in case a repair pass throws away the old trailer. */
  2117. opts->crypt_obj = pdf_keep_obj(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)));
  2118. /* If we're writing a snapshot, we can't be doing garbage
  2119. * collection, or linearisation, and must be writing
  2120. * incrementally. */
  2121. assert(!opts->do_snapshot || opts->do_garbage == 0);
  2122. /* Make sure any objects hidden in compressed streams have been loaded */
  2123. if (!opts->do_incremental)
  2124. {
  2125. pdf_ensure_solid_xref(ctx, doc, xref_len);
  2126. preloadobjstms(ctx, doc);
  2127. }
  2128. /* If we're using objstms, then the version must be at least 1.5 */
  2129. if (opts->do_use_objstms && pdf_version(ctx, doc) < 15)
  2130. {
  2131. pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  2132. pdf_obj *version = pdf_dict_get(ctx, root, PDF_NAME(Version));
  2133. doc->version = 15;
  2134. if (opts->do_incremental || version != NULL)
  2135. {
  2136. pdf_dict_put(ctx, root, PDF_NAME(Version), PDF_NAME(1_5));
  2137. }
  2138. }
  2139. if (opts->do_preserve_metadata)
  2140. opts->metadata = pdf_keep_obj(ctx, pdf_metadata(ctx, doc));
  2141. xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
  2142. expand_lists(ctx, opts, xref_len);
  2143. do
  2144. {
  2145. changed = 0;
  2146. /* Sweep & mark objects from the trailer */
  2147. if (opts->do_garbage >= 1)
  2148. {
  2149. /* Start by removing indirect /Length attributes on streams */
  2150. for (num = 0; num < xref_len; num++)
  2151. bake_stream_length(ctx, doc, num);
  2152. (void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
  2153. }
  2154. else
  2155. {
  2156. for (num = 0; num < xref_len; num++)
  2157. opts->use_list[num] = 1;
  2158. }
  2159. /* Coalesce and renumber duplicate objects */
  2160. if (opts->do_garbage >= 3)
  2161. changed = removeduplicateobjs(ctx, doc, opts);
  2162. /* Compact xref by renumbering and removing unused objects */
  2163. if (opts->do_garbage >= 2)
  2164. compactxref(ctx, doc, opts);
  2165. /* Make renumbering affect all indirect references and update xref */
  2166. if (opts->do_garbage >= 2)
  2167. renumberobjs(ctx, doc, opts);
  2168. }
  2169. while (changed);
  2170. opts->crypt_object_number = 0;
  2171. if (opts->crypt)
  2172. {
  2173. pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
  2174. opts->crypt_object_number = pdf_to_num(ctx, crypt);
  2175. }
  2176. xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
  2177. expand_lists(ctx, opts, xref_len);
  2178. /* If we're about to do a non-incremental write, we can't
  2179. * afford to leave any objects in ObjStms. We might have
  2180. * changed the objects, and we won't know to update the
  2181. * stream. So pull all the objects into memory. */
  2182. if (!opts->do_incremental)
  2183. unpack_objstm_objs(ctx, doc, xref_len);
  2184. if (opts->do_use_objstms)
  2185. gather_to_objstms(ctx, doc, opts, xref_len);
  2186. xref_len = pdf_xref_len(ctx, doc); /* May have changed due to the gather */
  2187. expand_lists(ctx, opts, xref_len);
  2188. /* Truncate the xref after compacting and renumbering */
  2189. if ((opts->do_garbage >= 2) &&
  2190. !opts->do_incremental)
  2191. {
  2192. while (xref_len > 0 && !opts->use_list[xref_len-1])
  2193. xref_len--;
  2194. }
  2195. if (opts->do_incremental)
  2196. {
  2197. int i;
  2198. doc->disallow_new_increments = 1;
  2199. for (i = 0; i < doc->num_incremental_sections; i++)
  2200. {
  2201. doc->xref_base = doc->num_incremental_sections - i - 1;
  2202. xref_len = pdf_xref_len(ctx, doc);
  2203. writeobjects(ctx, doc, opts);
  2204. #ifdef DEBUG_WRITING
  2205. dump_object_details(ctx, doc, opts);
  2206. #endif
  2207. for (num = 0; num < xref_len; num++)
  2208. {
  2209. if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
  2210. {
  2211. /* Make unreusable. FIXME: would be better to link to existing free list */
  2212. opts->gen_list[num] = 65535;
  2213. opts->ofs_list[num] = 0;
  2214. }
  2215. }
  2216. current_offset = fz_tell_output(ctx, opts->out);
  2217. if (!doc->last_xref_was_old_style || opts->do_use_objstms)
  2218. writexrefstream(ctx, doc, opts, 0, xref_len, 1, current_offset);
  2219. else
  2220. writexref(ctx, doc, opts, 0, xref_len, 1, current_offset);
  2221. doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
  2222. }
  2223. doc->xref_base = 0;
  2224. doc->disallow_new_increments = 0;
  2225. }
  2226. else
  2227. {
  2228. writeobjects(ctx, doc, opts);
  2229. #ifdef DEBUG_WRITING
  2230. dump_object_details(ctx, doc, opts);
  2231. #endif
  2232. /* Construct linked list of free object slots */
  2233. lastfree = 0;
  2234. for (num = 0; num < xref_len; num++)
  2235. {
  2236. if (!opts->use_list[num])
  2237. {
  2238. opts->gen_list[num]++;
  2239. opts->ofs_list[lastfree] = num;
  2240. lastfree = num;
  2241. }
  2242. }
  2243. opts->gen_list[0] = 0xffff;
  2244. current_offset = fz_tell_output(ctx, opts->out);
  2245. if (opts->do_use_objstms)
  2246. writexrefstream(ctx, doc, opts, 0, xref_len, 1, current_offset);
  2247. else
  2248. writexref(ctx, doc, opts, 0, xref_len, 1, current_offset);
  2249. doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
  2250. }
  2251. if (!in_opts->do_snapshot)
  2252. {
  2253. complete_signatures(ctx, doc, opts);
  2254. }
  2255. pdf_end_operation(ctx, doc);
  2256. }
  2257. fz_always(ctx)
  2258. {
  2259. finalise_write_state(ctx, opts);
  2260. if (opts->crypt != doc->crypt)
  2261. pdf_drop_crypt(ctx, opts->crypt);
  2262. pdf_drop_obj(ctx, opts->crypt_obj);
  2263. pdf_drop_obj(ctx, opts->metadata);
  2264. doc->save_in_progress = 0;
  2265. }
  2266. fz_catch(ctx)
  2267. {
  2268. pdf_abandon_operation(ctx, doc);
  2269. fz_rethrow(ctx);
  2270. }
  2271. }
  2272. int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
  2273. {
  2274. int s;
  2275. for (s = 0; s < doc->num_incremental_sections; s++)
  2276. {
  2277. pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
  2278. if (xref->unsaved_sigs)
  2279. return 1;
  2280. }
  2281. return 0;
  2282. }
  2283. void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *in_opts)
  2284. {
  2285. pdf_write_options opts_defaults = pdf_default_write_options;
  2286. pdf_write_state opts = { 0 };
  2287. if (!doc || !out)
  2288. return;
  2289. if (!in_opts)
  2290. in_opts = &opts_defaults;
  2291. if (in_opts->do_incremental && doc->repair_attempted)
  2292. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
  2293. if (in_opts->do_incremental && in_opts->do_garbage)
  2294. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
  2295. if (in_opts->do_linear)
  2296. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Linearisation is no longer supported");
  2297. if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
  2298. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
  2299. if (in_opts->do_snapshot)
  2300. {
  2301. if (in_opts->do_incremental == 0 ||
  2302. in_opts->do_pretty ||
  2303. in_opts->do_ascii ||
  2304. in_opts->do_compress ||
  2305. in_opts->do_compress_images ||
  2306. in_opts->do_compress_fonts ||
  2307. in_opts->do_decompress ||
  2308. in_opts->do_garbage ||
  2309. in_opts->do_linear ||
  2310. in_opts->do_clean ||
  2311. in_opts->do_sanitize ||
  2312. in_opts->do_appearance ||
  2313. in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
  2314. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
  2315. }
  2316. if (pdf_has_unsaved_sigs(ctx, doc) && !fz_output_supports_stream(ctx, out))
  2317. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
  2318. prepare_for_save(ctx, doc, in_opts);
  2319. opts.out = out;
  2320. do_pdf_save_document(ctx, doc, &opts, in_opts);
  2321. }
  2322. void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *in_opts)
  2323. {
  2324. pdf_write_options opts_defaults = pdf_default_write_options;
  2325. pdf_write_state opts = { 0 };
  2326. if (!doc)
  2327. return;
  2328. if (!in_opts)
  2329. in_opts = &opts_defaults;
  2330. if (in_opts->do_incremental && !doc->file)
  2331. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a new document");
  2332. if (in_opts->do_incremental && doc->repair_attempted)
  2333. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes on a repaired file");
  2334. if (in_opts->do_incremental && in_opts->do_garbage)
  2335. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes with garbage collection");
  2336. if (in_opts->do_linear)
  2337. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Linearisation is no longer supported");
  2338. if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
  2339. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
  2340. if (in_opts->do_snapshot)
  2341. {
  2342. if (in_opts->do_incremental == 0 ||
  2343. in_opts->do_pretty ||
  2344. in_opts->do_ascii ||
  2345. in_opts->do_compress ||
  2346. in_opts->do_compress_images ||
  2347. in_opts->do_compress_fonts ||
  2348. in_opts->do_decompress ||
  2349. in_opts->do_garbage ||
  2350. in_opts->do_clean ||
  2351. in_opts->do_sanitize ||
  2352. in_opts->do_appearance ||
  2353. in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
  2354. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
  2355. }
  2356. if (in_opts->do_appearance > 0)
  2357. {
  2358. int i, n = pdf_count_pages(ctx, doc);
  2359. for (i = 0; i < n; ++i)
  2360. {
  2361. pdf_page *page = pdf_load_page(ctx, doc, i);
  2362. fz_try(ctx)
  2363. {
  2364. pdf_annot *annot;
  2365. for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
  2366. if (in_opts->do_appearance > 1)
  2367. pdf_annot_request_resynthesis(ctx, annot);
  2368. else
  2369. pdf_annot_request_synthesis(ctx, annot);
  2370. for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
  2371. if (in_opts->do_appearance > 1)
  2372. pdf_annot_request_resynthesis(ctx, annot);
  2373. else
  2374. pdf_annot_request_synthesis(ctx, annot);
  2375. pdf_update_page(ctx, page);
  2376. }
  2377. fz_always(ctx)
  2378. fz_drop_page(ctx, &page->super);
  2379. fz_catch(ctx)
  2380. fz_warn(ctx, "could not create annotation appearances");
  2381. }
  2382. }
  2383. if (in_opts->do_incremental)
  2384. opts.bias = doc->bias;
  2385. prepare_for_save(ctx, doc, in_opts);
  2386. if (in_opts->do_incremental)
  2387. {
  2388. opts.out = fz_new_output_with_path(ctx, filename, 1);
  2389. }
  2390. else
  2391. {
  2392. opts.out = fz_new_output_with_path(ctx, filename, 0);
  2393. }
  2394. fz_try(ctx)
  2395. {
  2396. do_pdf_save_document(ctx, doc, &opts, in_opts);
  2397. fz_close_output(ctx, opts.out);
  2398. }
  2399. fz_always(ctx)
  2400. {
  2401. fz_drop_output(ctx, opts.out);
  2402. opts.out = NULL;
  2403. }
  2404. fz_catch(ctx)
  2405. {
  2406. fz_rethrow(ctx);
  2407. }
  2408. }
  2409. void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename)
  2410. {
  2411. pdf_save_document(ctx, doc, filename, &pdf_snapshot_write_options);
  2412. }
  2413. void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out)
  2414. {
  2415. pdf_write_document(ctx, doc, out, &pdf_snapshot_write_options);
  2416. }
  2417. char *
  2418. pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
  2419. {
  2420. #define ADD_OPT(S) do { if (!first) fz_strlcat(buffer, ",", buffer_len); fz_strlcat(buffer, (S), buffer_len); first = 0; } while (0)
  2421. int first = 1;
  2422. *buffer = 0;
  2423. if (opts->do_decompress)
  2424. ADD_OPT("decompress=yes");
  2425. if (opts->do_compress)
  2426. ADD_OPT("compress=yes");
  2427. if (opts->do_compress_fonts)
  2428. ADD_OPT("compress-fonts=yes");
  2429. if (opts->do_compress_images)
  2430. ADD_OPT("compress-images=yes");
  2431. if (opts->do_ascii)
  2432. ADD_OPT("ascii=yes");
  2433. if (opts->do_pretty)
  2434. ADD_OPT("pretty=yes");
  2435. if (opts->do_linear)
  2436. ADD_OPT("linearize=yes");
  2437. if (opts->do_clean)
  2438. ADD_OPT("clean=yes");
  2439. if (opts->do_sanitize)
  2440. ADD_OPT("sanitize=yes");
  2441. if (opts->do_incremental)
  2442. ADD_OPT("incremental=yes");
  2443. if (opts->do_encrypt == PDF_ENCRYPT_NONE)
  2444. ADD_OPT("decrypt=yes");
  2445. else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
  2446. ADD_OPT("decrypt=no");
  2447. switch(opts->do_encrypt)
  2448. {
  2449. default:
  2450. case PDF_ENCRYPT_UNKNOWN:
  2451. break;
  2452. case PDF_ENCRYPT_NONE:
  2453. ADD_OPT("encrypt=no");
  2454. break;
  2455. case PDF_ENCRYPT_KEEP:
  2456. ADD_OPT("encrypt=keep");
  2457. break;
  2458. case PDF_ENCRYPT_RC4_40:
  2459. ADD_OPT("encrypt=rc4-40");
  2460. break;
  2461. case PDF_ENCRYPT_RC4_128:
  2462. ADD_OPT("encrypt=rc4-128");
  2463. break;
  2464. case PDF_ENCRYPT_AES_128:
  2465. ADD_OPT("encrypt=aes-128");
  2466. break;
  2467. case PDF_ENCRYPT_AES_256:
  2468. ADD_OPT("encrypt=aes-256");
  2469. break;
  2470. }
  2471. if (strlen(opts->opwd_utf8)) {
  2472. ADD_OPT("owner-password=");
  2473. fz_strlcat(buffer, opts->opwd_utf8, buffer_len);
  2474. }
  2475. if (strlen(opts->upwd_utf8)) {
  2476. ADD_OPT("user-password=");
  2477. fz_strlcat(buffer, opts->upwd_utf8, buffer_len);
  2478. }
  2479. {
  2480. char temp[32];
  2481. ADD_OPT("permissions=");
  2482. fz_snprintf(temp, sizeof(temp), "%d", opts->permissions);
  2483. fz_strlcat(buffer, temp, buffer_len);
  2484. }
  2485. switch(opts->do_garbage)
  2486. {
  2487. case 0:
  2488. break;
  2489. case 1:
  2490. ADD_OPT("garbage=yes");
  2491. break;
  2492. case 2:
  2493. ADD_OPT("garbage=compact");
  2494. break;
  2495. case 3:
  2496. ADD_OPT("garbage=deduplicate");
  2497. break;
  2498. default:
  2499. {
  2500. char temp[32];
  2501. fz_snprintf(temp, sizeof(temp), "%d", opts->do_garbage);
  2502. ADD_OPT("garbage=");
  2503. fz_strlcat(buffer, temp, buffer_len);
  2504. break;
  2505. }
  2506. }
  2507. switch(opts->do_appearance)
  2508. {
  2509. case 1:
  2510. ADD_OPT("appearance=yes");
  2511. break;
  2512. case 2:
  2513. ADD_OPT("appearance=all");
  2514. break;
  2515. }
  2516. #undef ADD_OPT
  2517. return buffer;
  2518. }
  2519. typedef struct
  2520. {
  2521. fz_document_writer super;
  2522. pdf_document *pdf;
  2523. pdf_write_options opts;
  2524. fz_output *out;
  2525. fz_rect mediabox;
  2526. pdf_obj *resources;
  2527. fz_buffer *contents;
  2528. } pdf_writer;
  2529. static fz_device *
  2530. pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
  2531. {
  2532. pdf_writer *wri = (pdf_writer*)wri_;
  2533. wri->mediabox = mediabox; // TODO: handle non-zero x0,y0
  2534. return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
  2535. }
  2536. static void
  2537. pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
  2538. {
  2539. pdf_writer *wri = (pdf_writer*)wri_;
  2540. pdf_obj *obj = NULL;
  2541. fz_var(obj);
  2542. fz_try(ctx)
  2543. {
  2544. fz_close_device(ctx, dev);
  2545. obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
  2546. pdf_insert_page(ctx, wri->pdf, -1, obj);
  2547. }
  2548. fz_always(ctx)
  2549. {
  2550. fz_drop_device(ctx, dev);
  2551. pdf_drop_obj(ctx, obj);
  2552. fz_drop_buffer(ctx, wri->contents);
  2553. wri->contents = NULL;
  2554. pdf_drop_obj(ctx, wri->resources);
  2555. wri->resources = NULL;
  2556. }
  2557. fz_catch(ctx)
  2558. fz_rethrow(ctx);
  2559. }
  2560. static void
  2561. pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
  2562. {
  2563. pdf_writer *wri = (pdf_writer*)wri_;
  2564. pdf_write_document(ctx, wri->pdf, wri->out, &wri->opts);
  2565. fz_close_output(ctx, wri->out);
  2566. }
  2567. static void
  2568. pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
  2569. {
  2570. pdf_writer *wri = (pdf_writer*)wri_;
  2571. fz_drop_buffer(ctx, wri->contents);
  2572. pdf_drop_obj(ctx, wri->resources);
  2573. pdf_drop_document(ctx, wri->pdf);
  2574. fz_drop_output(ctx, wri->out);
  2575. }
  2576. fz_document_writer *
  2577. fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
  2578. {
  2579. pdf_writer *wri;
  2580. fz_var(wri);
  2581. fz_try(ctx)
  2582. {
  2583. wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
  2584. pdf_parse_write_options(ctx, &wri->opts, options);
  2585. wri->out = out;
  2586. wri->pdf = pdf_create_document(ctx);
  2587. }
  2588. fz_catch(ctx)
  2589. {
  2590. fz_drop_output(ctx, out);
  2591. pdf_drop_document(ctx, wri->pdf);
  2592. fz_free(ctx, wri);
  2593. fz_rethrow(ctx);
  2594. }
  2595. return (fz_document_writer*)wri;
  2596. }
  2597. fz_document_writer *
  2598. fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
  2599. {
  2600. fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdf", 0);
  2601. return fz_new_pdf_writer_with_output(ctx, out, options);
  2602. }
  2603. void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out)
  2604. {
  2605. if (!doc || !out)
  2606. return;
  2607. if (!doc->journal)
  2608. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't write non-existent journal");
  2609. pdf_serialise_journal(ctx, doc, out);
  2610. }
  2611. void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename)
  2612. {
  2613. fz_output *out;
  2614. if (!doc)
  2615. return;
  2616. out = fz_new_output_with_path(ctx, filename, 0);
  2617. fz_try(ctx)
  2618. {
  2619. pdf_write_journal(ctx, doc, out);
  2620. fz_close_output(ctx, out);
  2621. }
  2622. fz_always(ctx)
  2623. fz_drop_output(ctx, out);
  2624. fz_catch(ctx)
  2625. fz_rethrow(ctx);
  2626. }
  2627. void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm)
  2628. {
  2629. pdf_deserialise_journal(ctx, doc, stm);
  2630. }
  2631. void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename)
  2632. {
  2633. fz_stream *stm;
  2634. if (!doc)
  2635. return;
  2636. stm = fz_open_file(ctx, filename);
  2637. fz_try(ctx)
  2638. pdf_read_journal(ctx, doc, stm);
  2639. fz_always(ctx)
  2640. fz_drop_stream(ctx, stm);
  2641. fz_catch(ctx)
  2642. fz_rethrow(ctx);
  2643. }