pdf-xref.c 136 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "pdf-annot-imp.h"
  24. #include "pdf-imp.h"
  25. #include <assert.h>
  26. #include <limits.h>
  27. #include <string.h>
  28. #undef DEBUG_PROGESSIVE_ADVANCE
  29. #ifdef DEBUG_PROGESSIVE_ADVANCE
  30. #define DEBUGMESS(A) do { fz_warn A; } while (0)
  31. #else
  32. #define DEBUGMESS(A) do { } while (0)
  33. #endif
  34. #define isdigit(c) (c >= '0' && c <= '9')
  35. static inline int iswhite(int ch)
  36. {
  37. return
  38. ch == '\000' || ch == '\011' || ch == '\012' ||
  39. ch == '\014' || ch == '\015' || ch == '\040';
  40. }
  41. /*
  42. * xref tables
  43. */
  44. static void
  45. pdf_drop_xref_subsec(fz_context *ctx, pdf_xref *xref)
  46. {
  47. pdf_xref_subsec *sub = xref->subsec;
  48. pdf_unsaved_sig *usig;
  49. int e;
  50. while (sub != NULL)
  51. {
  52. pdf_xref_subsec *next_sub = sub->next;
  53. for (e = 0; e < sub->len; e++)
  54. {
  55. pdf_xref_entry *entry = &sub->table[e];
  56. pdf_drop_obj(ctx, entry->obj);
  57. fz_drop_buffer(ctx, entry->stm_buf);
  58. }
  59. fz_free(ctx, sub->table);
  60. fz_free(ctx, sub);
  61. sub = next_sub;
  62. }
  63. pdf_drop_obj(ctx, xref->pre_repair_trailer);
  64. pdf_drop_obj(ctx, xref->trailer);
  65. while ((usig = xref->unsaved_sigs) != NULL)
  66. {
  67. xref->unsaved_sigs = usig->next;
  68. pdf_drop_obj(ctx, usig->field);
  69. pdf_drop_signer(ctx, usig->signer);
  70. fz_free(ctx, usig);
  71. }
  72. }
  73. static void pdf_drop_xref_sections_imp(fz_context *ctx, pdf_document *doc, pdf_xref *xref_sections, int num_xref_sections)
  74. {
  75. int x;
  76. for (x = 0; x < num_xref_sections; x++)
  77. pdf_drop_xref_subsec(ctx, &xref_sections[x]);
  78. fz_free(ctx, xref_sections);
  79. }
  80. static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
  81. {
  82. pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
  83. pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
  84. doc->saved_xref_sections = NULL;
  85. doc->saved_num_xref_sections = 0;
  86. doc->xref_sections = NULL;
  87. doc->num_xref_sections = 0;
  88. doc->num_incremental_sections = 0;
  89. }
  90. static void
  91. extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
  92. {
  93. int i;
  94. doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
  95. for (i = doc->max_xref_len; i < newlen; i++)
  96. {
  97. doc->xref_index[i] = 0;
  98. }
  99. doc->max_xref_len = newlen;
  100. }
  101. static void
  102. resize_xref_sub(fz_context *ctx, pdf_xref *xref, int base, int newlen)
  103. {
  104. pdf_xref_subsec *sub;
  105. int i;
  106. assert(xref != NULL);
  107. sub = xref->subsec;
  108. assert(sub->next == NULL && sub->start == base && sub->len+base == xref->num_objects);
  109. assert(newlen+base > xref->num_objects);
  110. sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
  111. for (i = sub->len; i < newlen; i++)
  112. {
  113. sub->table[i].type = 0;
  114. sub->table[i].ofs = 0;
  115. sub->table[i].gen = 0;
  116. sub->table[i].num = 0;
  117. sub->table[i].stm_ofs = 0;
  118. sub->table[i].stm_buf = NULL;
  119. sub->table[i].obj = NULL;
  120. }
  121. sub->len = newlen;
  122. if (newlen+base > xref->num_objects)
  123. xref->num_objects = newlen+base;
  124. }
  125. /* This is only ever called when we already have an incremental
  126. * xref. This means there will only be 1 subsec, and it will be
  127. * a complete subsec. */
  128. static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
  129. {
  130. pdf_xref *xref = &doc->xref_sections[doc->xref_base];
  131. resize_xref_sub(ctx, xref, 0, newlen);
  132. if (doc->max_xref_len < newlen)
  133. extend_xref_index(ctx, doc, newlen);
  134. }
  135. static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
  136. {
  137. pdf_xref *xref;
  138. doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
  139. doc->num_xref_sections++;
  140. xref = &doc->xref_sections[doc->num_xref_sections - 1];
  141. xref->subsec = NULL;
  142. xref->num_objects = 0;
  143. xref->trailer = NULL;
  144. xref->pre_repair_trailer = NULL;
  145. xref->unsaved_sigs = NULL;
  146. xref->unsaved_sigs_end = NULL;
  147. }
  148. pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
  149. {
  150. /* Return the document's trailer (of the appropriate vintage) */
  151. pdf_xref *xrefs = doc->xref_sections;
  152. return xrefs ? xrefs[doc->xref_base].trailer : NULL;
  153. }
  154. void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
  155. {
  156. /* Update the trailer of the xref section being populated */
  157. pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
  158. if (xref->trailer)
  159. {
  160. pdf_drop_obj(ctx, xref->pre_repair_trailer);
  161. xref->pre_repair_trailer = xref->trailer;
  162. }
  163. xref->trailer = pdf_keep_obj(ctx, trailer);
  164. }
  165. int pdf_xref_len(fz_context *ctx, pdf_document *doc)
  166. {
  167. int i = doc->xref_base;
  168. int xref_len = 0;
  169. if (doc->local_xref && doc->local_xref_nesting > 0)
  170. xref_len = doc->local_xref->num_objects;
  171. while (i < doc->num_xref_sections)
  172. xref_len = fz_maxi(xref_len, doc->xref_sections[i++].num_objects);
  173. return xref_len;
  174. }
  175. /* Ensure that the given xref has a single subsection
  176. * that covers the entire range. */
  177. static void
  178. ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
  179. {
  180. pdf_xref *xref = &doc->xref_sections[which];
  181. pdf_xref_subsec *sub = xref->subsec;
  182. pdf_xref_subsec *new_sub;
  183. if (num < xref->num_objects)
  184. num = xref->num_objects;
  185. if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
  186. return;
  187. new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
  188. fz_try(ctx)
  189. {
  190. new_sub->table = fz_malloc_struct_array(ctx, num, pdf_xref_entry);
  191. new_sub->start = 0;
  192. new_sub->len = num;
  193. new_sub->next = NULL;
  194. }
  195. fz_catch(ctx)
  196. {
  197. fz_free(ctx, new_sub);
  198. fz_rethrow(ctx);
  199. }
  200. /* Move objects over to the new subsection and destroy the old
  201. * ones */
  202. sub = xref->subsec;
  203. while (sub != NULL)
  204. {
  205. pdf_xref_subsec *next = sub->next;
  206. int i;
  207. for (i = 0; i < sub->len; i++)
  208. {
  209. new_sub->table[i+sub->start] = sub->table[i];
  210. }
  211. fz_free(ctx, sub->table);
  212. fz_free(ctx, sub);
  213. sub = next;
  214. }
  215. xref->num_objects = num;
  216. xref->subsec = new_sub;
  217. if (doc->max_xref_len < num)
  218. extend_xref_index(ctx, doc, num);
  219. }
  220. static pdf_xref_entry *
  221. pdf_get_local_xref_entry(fz_context *ctx, pdf_document *doc, int num)
  222. {
  223. pdf_xref *xref = doc->local_xref;
  224. pdf_xref_subsec *sub;
  225. if (xref == NULL || doc->local_xref_nesting == 0)
  226. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Local xref not present!");
  227. /* Local xrefs only ever have 1 section, and it should be solid. */
  228. sub = xref->subsec;
  229. assert(sub && !sub->next);
  230. if (num >= sub->start && num < sub->start + sub->len)
  231. return &sub->table[num - sub->start];
  232. /* Expand the xref so we can return a pointer. */
  233. resize_xref_sub(ctx, xref, 0, num+1);
  234. sub = xref->subsec;
  235. return &sub->table[num - sub->start];
  236. }
  237. pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
  238. {
  239. /* Return an entry within the xref currently being populated */
  240. pdf_xref *xref;
  241. pdf_xref_subsec *sub;
  242. if (doc->num_xref_sections == 0)
  243. {
  244. doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
  245. doc->num_xref_sections = 1;
  246. }
  247. if (doc->local_xref && doc->local_xref_nesting > 0)
  248. return pdf_get_local_xref_entry(ctx, doc, num);
  249. /* Prevent accidental heap underflow */
  250. if (num < 0 || num > PDF_MAX_OBJECT_NUMBER)
  251. fz_throw(ctx, FZ_ERROR_ARGUMENT, "object number out of range (%d)", num);
  252. /* Return the pointer to the entry in the last section. */
  253. xref = &doc->xref_sections[doc->num_xref_sections-1];
  254. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  255. {
  256. if (num >= sub->start && num < sub->start + sub->len)
  257. return &sub->table[num-sub->start];
  258. }
  259. /* We've been asked for an object that's not in a subsec. */
  260. ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
  261. xref = &doc->xref_sections[doc->num_xref_sections-1];
  262. sub = xref->subsec;
  263. return &sub->table[num-sub->start];
  264. }
  265. /* It is vital that pdf_get_xref_entry_aux called with !solidify_if_needed
  266. * and a value object number, does NOT try/catch or throw. */
  267. static
  268. pdf_xref_entry *pdf_get_xref_entry_aux(fz_context *ctx, pdf_document *doc, int i, int solidify_if_needed)
  269. {
  270. pdf_xref *xref = NULL;
  271. pdf_xref_subsec *sub;
  272. int j;
  273. if (i < 0)
  274. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
  275. if (i < doc->max_xref_len)
  276. j = doc->xref_index[i];
  277. else
  278. j = 0;
  279. /* If we have an active local xref, check there first. */
  280. if (doc->local_xref && doc->local_xref_nesting > 0)
  281. {
  282. xref = doc->local_xref;
  283. if (i < xref->num_objects)
  284. {
  285. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  286. {
  287. pdf_xref_entry *entry;
  288. if (i < sub->start || i >= sub->start + sub->len)
  289. continue;
  290. entry = &sub->table[i - sub->start];
  291. if (entry->type)
  292. return entry;
  293. }
  294. }
  295. }
  296. /* We may be accessing an earlier version of the document using xref_base
  297. * and j may be an index into a later xref section */
  298. if (doc->xref_base > j)
  299. j = doc->xref_base;
  300. else
  301. j = 0;
  302. /* Find the first xref section where the entry is defined. */
  303. for (; j < doc->num_xref_sections; j++)
  304. {
  305. xref = &doc->xref_sections[j];
  306. if (i < xref->num_objects)
  307. {
  308. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  309. {
  310. pdf_xref_entry *entry;
  311. if (i < sub->start || i >= sub->start + sub->len)
  312. continue;
  313. entry = &sub->table[i - sub->start];
  314. if (entry->type)
  315. {
  316. /* Don't update xref_index if xref_base may have
  317. * influenced the value of j */
  318. if (doc->xref_base == 0)
  319. doc->xref_index[i] = j;
  320. return entry;
  321. }
  322. }
  323. }
  324. }
  325. /* Didn't find the entry in any section. Return the entry from
  326. * the local_xref (if there is one active), or the final section. */
  327. if (doc->local_xref && doc->local_xref_nesting > 0)
  328. {
  329. if (xref == NULL || i < xref->num_objects)
  330. {
  331. xref = doc->local_xref;
  332. sub = xref->subsec;
  333. assert(sub != NULL && sub->next == NULL);
  334. if (i >= sub->start && i < sub->start + sub->len)
  335. return &sub->table[i - sub->start];
  336. }
  337. /* Expand the xref so we can return a pointer. */
  338. resize_xref_sub(ctx, xref, 0, i+1);
  339. sub = xref->subsec;
  340. return &sub->table[i - sub->start];
  341. }
  342. doc->xref_index[i] = 0;
  343. if (xref == NULL || i < xref->num_objects)
  344. {
  345. xref = &doc->xref_sections[doc->xref_base];
  346. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  347. {
  348. if (i >= sub->start && i < sub->start + sub->len)
  349. return &sub->table[i - sub->start];
  350. }
  351. }
  352. /* Some really hairy code here. When we are reading the file in
  353. * initially, we read from 'newest' to 'oldest' (i.e. from 0 to
  354. * doc->num_xref_sections-1). Each section is created initially
  355. * with num_objects == 0 in it, and remains like that while we
  356. * are parsing the stream from the file. This is the only time
  357. * we'll ever have xref_sections with 0 objects in them. */
  358. if (doc->xref_sections[doc->num_xref_sections-1].num_objects == 0)
  359. {
  360. /* The oldest xref section has 0 objects in it. So we are
  361. * parsing an xref stream while loading. We don't want to
  362. * solidify the xref we are currently parsing for (as it'll
  363. * get very confused, and end up a different 'shape' in
  364. * memory to that which is in the file, and would hence
  365. * render 'fingerprinting' for snapshotting invalid) so
  366. * just give up at this point. */
  367. return NULL;
  368. }
  369. if (!solidify_if_needed)
  370. return NULL;
  371. /* At this point, we solidify the xref. This ensures that we
  372. * can return a pointer. This is the only case where this function
  373. * might throw an exception, and it will never happen when we are
  374. * working within a 'solid' xref. */
  375. ensure_solid_xref(ctx, doc, i+1, 0);
  376. xref = &doc->xref_sections[0];
  377. sub = xref->subsec;
  378. return &sub->table[i - sub->start];
  379. }
  380. pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
  381. {
  382. return pdf_get_xref_entry_aux(ctx, doc, i, 1);
  383. }
  384. pdf_xref_entry *pdf_get_xref_entry_no_change(fz_context *ctx, pdf_document *doc, int i)
  385. {
  386. return pdf_get_xref_entry_aux(ctx, doc, i, 0);
  387. }
  388. pdf_xref_entry *pdf_get_xref_entry_no_null(fz_context *ctx, pdf_document *doc, int i)
  389. {
  390. pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
  391. if (entry != NULL)
  392. return entry;
  393. fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find object in xref (%d 0 R), but not allowed to return NULL", i);
  394. }
  395. void pdf_xref_entry_map(fz_context *ctx, pdf_document *doc, void (*fn)(fz_context *, pdf_xref_entry *, int, pdf_document *, void *), void *arg)
  396. {
  397. int i, j;
  398. pdf_xref_subsec *sub;
  399. int xref_base = doc->xref_base;
  400. fz_try(ctx)
  401. {
  402. /* Map over any active local xref first. */
  403. if (doc->local_xref && doc->local_xref_nesting > 0)
  404. {
  405. pdf_xref *xref = doc->local_xref;
  406. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  407. {
  408. for (i = sub->start; i < sub->start + sub->len; i++)
  409. {
  410. pdf_xref_entry *entry = &sub->table[i - sub->start];
  411. if (entry->type)
  412. fn(ctx, entry, i, doc, arg);
  413. }
  414. }
  415. }
  416. for (j = 0; j < doc->num_xref_sections; j++)
  417. {
  418. pdf_xref *xref = &doc->xref_sections[j];
  419. doc->xref_base = j;
  420. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  421. {
  422. for (i = sub->start; i < sub->start + sub->len; i++)
  423. {
  424. pdf_xref_entry *entry = &sub->table[i - sub->start];
  425. if (entry->type)
  426. fn(ctx, entry, i, doc, arg);
  427. }
  428. }
  429. }
  430. }
  431. fz_always(ctx)
  432. {
  433. doc->xref_base = xref_base;
  434. }
  435. fz_catch(ctx)
  436. fz_rethrow(ctx);
  437. }
  438. /*
  439. Ensure we have an incremental xref section where we can store
  440. updated versions of indirect objects. This is a new xref section
  441. consisting of a single xref subsection.
  442. */
  443. static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
  444. {
  445. /* If there are as yet no incremental sections, or if the most recent
  446. * one has been used to sign a signature field, then we need a new one.
  447. * After a signing, any further document changes require a new increment */
  448. if ((doc->num_incremental_sections == 0 || doc->xref_sections[0].unsaved_sigs != NULL)
  449. && !doc->disallow_new_increments)
  450. {
  451. pdf_xref *xref = &doc->xref_sections[0];
  452. pdf_xref *pxref;
  453. pdf_xref_entry *new_table = fz_malloc_struct_array(ctx, xref->num_objects, pdf_xref_entry);
  454. pdf_xref_subsec *sub = NULL;
  455. pdf_obj *trailer = NULL;
  456. int i;
  457. fz_var(trailer);
  458. fz_var(sub);
  459. fz_try(ctx)
  460. {
  461. sub = fz_malloc_struct(ctx, pdf_xref_subsec);
  462. trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
  463. doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, pdf_xref);
  464. xref = &doc->xref_sections[0];
  465. pxref = &doc->xref_sections[1];
  466. memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
  467. /* xref->num_objects is already correct */
  468. xref->subsec = sub;
  469. sub = NULL;
  470. xref->trailer = trailer;
  471. xref->pre_repair_trailer = NULL;
  472. xref->unsaved_sigs = NULL;
  473. xref->unsaved_sigs_end = NULL;
  474. xref->subsec->next = NULL;
  475. xref->subsec->len = xref->num_objects;
  476. xref->subsec->start = 0;
  477. xref->subsec->table = new_table;
  478. doc->num_xref_sections++;
  479. doc->num_incremental_sections++;
  480. }
  481. fz_catch(ctx)
  482. {
  483. fz_free(ctx, sub);
  484. fz_free(ctx, new_table);
  485. pdf_drop_obj(ctx, trailer);
  486. fz_rethrow(ctx);
  487. }
  488. /* Update the xref_index */
  489. for (i = 0; i < doc->max_xref_len; i++)
  490. {
  491. doc->xref_index[i]++;
  492. }
  493. }
  494. }
  495. /* Used when altering a document */
  496. pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
  497. {
  498. pdf_xref *xref;
  499. pdf_xref_subsec *sub;
  500. /* Make a new final xref section if we haven't already */
  501. ensure_incremental_xref(ctx, doc);
  502. xref = &doc->xref_sections[doc->xref_base];
  503. if (i >= xref->num_objects)
  504. pdf_resize_xref(ctx, doc, i + 1);
  505. sub = xref->subsec;
  506. assert(sub != NULL && sub->next == NULL);
  507. assert(i >= sub->start && i < sub->start + sub->len);
  508. doc->xref_index[i] = 0;
  509. return &sub->table[i - sub->start];
  510. }
  511. int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
  512. {
  513. pdf_xref *xref = &doc->xref_sections[doc->xref_base];
  514. pdf_xref_subsec *sub = xref->subsec;
  515. assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
  516. return num < xref->num_objects && sub->table[num].type;
  517. }
  518. /* Used when clearing signatures. Removes the signature
  519. from the list of unsaved signed signatures. */
  520. void pdf_xref_remove_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field)
  521. {
  522. int num = pdf_to_num(ctx, field);
  523. int idx = doc->xref_index[num];
  524. pdf_xref *xref = &doc->xref_sections[idx];
  525. pdf_unsaved_sig **usigptr = &xref->unsaved_sigs;
  526. pdf_unsaved_sig *usig = xref->unsaved_sigs;
  527. while (usig)
  528. {
  529. pdf_unsaved_sig **nextptr = &usig->next;
  530. pdf_unsaved_sig *next = usig->next;
  531. if (usig->field == field)
  532. {
  533. if (xref->unsaved_sigs_end == &usig->next)
  534. {
  535. if (usig->next)
  536. xref->unsaved_sigs_end = &usig->next->next;
  537. else
  538. xref->unsaved_sigs_end = NULL;
  539. }
  540. if (usigptr)
  541. *usigptr = usig->next;
  542. usig->next = NULL;
  543. pdf_drop_obj(ctx, usig->field);
  544. pdf_drop_signer(ctx, usig->signer);
  545. fz_free(ctx, usig);
  546. break;
  547. }
  548. usig = next;
  549. usigptr = nextptr;
  550. }
  551. }
  552. void pdf_xref_store_unsaved_signature(fz_context *ctx, pdf_document *doc, pdf_obj *field, pdf_pkcs7_signer *signer)
  553. {
  554. pdf_xref *xref = &doc->xref_sections[0];
  555. pdf_unsaved_sig *unsaved_sig;
  556. /* Record details within the document structure so that contents
  557. * and byte_range can be updated with their correct values at
  558. * saving time */
  559. unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
  560. unsaved_sig->field = pdf_keep_obj(ctx, field);
  561. unsaved_sig->signer = signer->keep(ctx, signer);
  562. unsaved_sig->next = NULL;
  563. if (xref->unsaved_sigs_end == NULL)
  564. xref->unsaved_sigs_end = &xref->unsaved_sigs;
  565. *xref->unsaved_sigs_end = unsaved_sig;
  566. xref->unsaved_sigs_end = &unsaved_sig->next;
  567. }
  568. int pdf_xref_obj_is_unsaved_signature(pdf_document *doc, pdf_obj *obj)
  569. {
  570. int i;
  571. for (i = 0; i < doc->num_incremental_sections; i++)
  572. {
  573. pdf_xref *xref = &doc->xref_sections[i];
  574. pdf_unsaved_sig *usig;
  575. for (usig = xref->unsaved_sigs; usig; usig = usig->next)
  576. {
  577. if (usig->field == obj)
  578. return 1;
  579. }
  580. }
  581. return 0;
  582. }
  583. void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
  584. {
  585. if (doc->num_xref_sections == 0)
  586. pdf_populate_next_xref_level(ctx, doc);
  587. ensure_solid_xref(ctx, doc, num, 0);
  588. }
  589. int pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
  590. {
  591. pdf_xref_entry *new_entry, *old_entry;
  592. pdf_xref_subsec *sub = NULL;
  593. int i;
  594. pdf_obj *copy;
  595. /* Make sure we have created an xref section for incremental updates */
  596. ensure_incremental_xref(ctx, doc);
  597. /* Search for the section that contains this object */
  598. for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
  599. {
  600. pdf_xref *xref = &doc->xref_sections[i];
  601. if (num < 0 && num >= xref->num_objects)
  602. break;
  603. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  604. {
  605. if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
  606. break;
  607. }
  608. if (sub != NULL)
  609. break;
  610. }
  611. /* sub == NULL implies we did not find it */
  612. /* If we don't find it, or it's already in the incremental section, return */
  613. if (i == 0 || sub == NULL)
  614. return 0;
  615. copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
  616. /* Move the object to the incremental section */
  617. i = doc->xref_index[num];
  618. doc->xref_index[num] = 0;
  619. old_entry = &sub->table[num - sub->start];
  620. fz_try(ctx)
  621. new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
  622. fz_catch(ctx)
  623. {
  624. pdf_drop_obj(ctx, copy);
  625. doc->xref_index[num] = i;
  626. fz_rethrow(ctx);
  627. }
  628. *new_entry = *old_entry;
  629. if (new_entry->type == 'o')
  630. {
  631. new_entry->type = 'n';
  632. new_entry->gen = 0;
  633. }
  634. /* Better keep a copy. We must override the old entry with
  635. * the copy because the caller may be holding a reference to
  636. * the original and expect it to end up in the new entry */
  637. old_entry->obj = copy;
  638. old_entry->stm_buf = NULL;
  639. return 1;
  640. }
  641. void pdf_xref_ensure_local_object(fz_context *ctx, pdf_document *doc, int num)
  642. {
  643. pdf_xref_entry *new_entry, *old_entry;
  644. pdf_xref_subsec *sub = NULL;
  645. int i;
  646. pdf_xref *xref;
  647. pdf_obj *copy;
  648. /* Is it in the local section already? */
  649. xref = doc->local_xref;
  650. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  651. {
  652. if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
  653. break;
  654. }
  655. /* If we found it, it's in the local section already. */
  656. if (sub != NULL)
  657. return;
  658. /* Search for the section that contains this object */
  659. for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
  660. {
  661. xref = &doc->xref_sections[i];
  662. if (num < 0 && num >= xref->num_objects)
  663. break;
  664. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  665. {
  666. if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
  667. break;
  668. }
  669. if (sub != NULL)
  670. break;
  671. }
  672. /* sub == NULL implies we did not find it */
  673. if (sub == NULL)
  674. return; /* No object to find */
  675. copy = pdf_deep_copy_obj(ctx, sub->table[num - sub->start].obj);
  676. /* Copy the object to the local section */
  677. i = doc->xref_index[num];
  678. doc->xref_index[num] = 0;
  679. old_entry = &sub->table[num - sub->start];
  680. fz_try(ctx)
  681. new_entry = pdf_get_local_xref_entry(ctx, doc, num);
  682. fz_catch(ctx)
  683. {
  684. pdf_drop_obj(ctx, copy);
  685. doc->xref_index[num] = i;
  686. fz_rethrow(ctx);
  687. }
  688. *new_entry = *old_entry;
  689. if (new_entry->type == 'o')
  690. {
  691. new_entry->type = 'n';
  692. new_entry->gen = 0;
  693. }
  694. new_entry->stm_buf = NULL;
  695. new_entry->obj = NULL;
  696. /* old entry is incremental and may have changes.
  697. * Better keep a copy. We must override the old entry with
  698. * the copy because the caller may be holding a reference to
  699. * the original and expect it to end up in the new entry */
  700. new_entry->obj = old_entry->obj;
  701. old_entry->obj = copy;
  702. new_entry->stm_buf = NULL; /* FIXME */
  703. }
  704. void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
  705. {
  706. int *xref_index = NULL;
  707. pdf_xref *xref = NULL;
  708. pdf_xref_subsec *sub;
  709. fz_var(xref_index);
  710. fz_var(xref);
  711. fz_try(ctx)
  712. {
  713. xref_index = fz_calloc(ctx, n, sizeof(int));
  714. xref = fz_malloc_struct(ctx, pdf_xref);
  715. sub = fz_malloc_struct(ctx, pdf_xref_subsec);
  716. }
  717. fz_catch(ctx)
  718. {
  719. fz_free(ctx, xref);
  720. fz_free(ctx, xref_index);
  721. fz_rethrow(ctx);
  722. }
  723. sub->table = entries;
  724. sub->start = 0;
  725. sub->len = n;
  726. xref->subsec = sub;
  727. xref->num_objects = n;
  728. xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
  729. /* The new table completely replaces the previous separate sections */
  730. pdf_drop_xref_sections(ctx, doc);
  731. doc->xref_sections = xref;
  732. doc->num_xref_sections = 1;
  733. doc->num_incremental_sections = 0;
  734. doc->xref_base = 0;
  735. doc->disallow_new_increments = 0;
  736. doc->max_xref_len = n;
  737. fz_free(ctx, doc->xref_index);
  738. doc->xref_index = xref_index;
  739. }
  740. void pdf_forget_xref(fz_context *ctx, pdf_document *doc)
  741. {
  742. pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
  743. pdf_drop_local_xref_and_resources(ctx, doc);
  744. if (doc->saved_xref_sections)
  745. pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
  746. doc->saved_xref_sections = doc->xref_sections;
  747. doc->saved_num_xref_sections = doc->num_xref_sections;
  748. doc->xref_sections = NULL;
  749. doc->startxref = 0;
  750. doc->num_xref_sections = 0;
  751. doc->num_incremental_sections = 0;
  752. doc->xref_base = 0;
  753. doc->disallow_new_increments = 0;
  754. fz_try(ctx)
  755. {
  756. pdf_get_populating_xref_entry(ctx, doc, 0);
  757. }
  758. fz_catch(ctx)
  759. {
  760. pdf_drop_obj(ctx, trailer);
  761. fz_rethrow(ctx);
  762. }
  763. /* Set the trailer of the final xref section. */
  764. doc->xref_sections[0].trailer = trailer;
  765. }
  766. /*
  767. * magic version tag and startxref
  768. */
  769. int
  770. pdf_version(fz_context *ctx, pdf_document *doc)
  771. {
  772. int version = doc->version;
  773. fz_try(ctx)
  774. {
  775. pdf_obj *obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
  776. const char *str = pdf_to_name(ctx, obj);
  777. if (*str)
  778. version = 10 * (fz_atof(str) + 0.05f);
  779. }
  780. fz_catch(ctx)
  781. {
  782. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  783. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  784. fz_report_error(ctx);
  785. fz_warn(ctx, "Ignoring broken Root/Version number.");
  786. }
  787. return version;
  788. }
  789. static void
  790. pdf_load_version(fz_context *ctx, pdf_document *doc)
  791. {
  792. char buf[1024];
  793. char *s = NULL;
  794. size_t i, n;
  795. /* look for '%PDF' version marker within first kilobyte of file */
  796. fz_seek(ctx, doc->file, 0, SEEK_SET);
  797. n = fz_read(ctx, doc->file, (unsigned char*) buf, sizeof buf);
  798. if (n < 5)
  799. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker");
  800. buf[n-1] = 0;
  801. for (i = 0; i < n - 5; i++)
  802. {
  803. if (memcmp(&buf[i], "%PDF-", 5) == 0 || memcmp(&buf[i], "%FDF-", 5) == 0)
  804. {
  805. s = buf + i;
  806. break;
  807. }
  808. }
  809. if (!s)
  810. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find version marker");
  811. if (s[1] == 'F')
  812. doc->is_fdf = 1;
  813. doc->version = 10 * (fz_atof(s+5) + 0.05f);
  814. if ((doc->version < 10 || doc->version > 17) && doc->version != 20)
  815. fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / 10, doc->version % 10);
  816. if (s != buf)
  817. {
  818. fz_warn(ctx, "garbage bytes before version marker");
  819. doc->bias = s - buf;
  820. }
  821. fz_seek(ctx, doc->file, doc->bias, SEEK_SET);
  822. }
  823. static void
  824. pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
  825. {
  826. unsigned char buf[1024];
  827. size_t i, n;
  828. int64_t t;
  829. fz_seek(ctx, doc->file, 0, SEEK_END);
  830. doc->file_size = fz_tell(ctx, doc->file);
  831. t = fz_maxi64(0, doc->file_size - (int64_t)sizeof buf);
  832. fz_seek(ctx, doc->file, t, SEEK_SET);
  833. n = fz_read(ctx, doc->file, buf, sizeof buf);
  834. if (n < 9)
  835. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
  836. i = n - 9;
  837. do
  838. {
  839. if (memcmp(buf + i, "startxref", 9) == 0)
  840. {
  841. i += 9;
  842. while (i < n && iswhite(buf[i]))
  843. i ++;
  844. doc->startxref = 0;
  845. while (i < n && isdigit(buf[i]))
  846. {
  847. if (doc->startxref >= INT64_MAX/10)
  848. fz_throw(ctx, FZ_ERROR_LIMIT, "startxref too large");
  849. doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
  850. }
  851. if (doc->startxref != 0)
  852. return;
  853. break;
  854. }
  855. } while (i-- > 0);
  856. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find startxref");
  857. }
  858. void fz_skip_space(fz_context *ctx, fz_stream *stm)
  859. {
  860. do
  861. {
  862. int c = fz_peek_byte(ctx, stm);
  863. if (c == EOF || c > 32)
  864. return;
  865. (void)fz_read_byte(ctx, stm);
  866. }
  867. while (1);
  868. }
  869. int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str)
  870. {
  871. while (*str)
  872. {
  873. int c = fz_peek_byte(ctx, stm);
  874. if (c == EOF || c != *str++)
  875. return 1;
  876. (void)fz_read_byte(ctx, stm);
  877. }
  878. return 0;
  879. }
  880. /*
  881. * trailer dictionary
  882. */
  883. static int
  884. pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc)
  885. {
  886. int len;
  887. char *s;
  888. int64_t t;
  889. pdf_token tok;
  890. int c;
  891. int size = 0;
  892. int64_t ofs;
  893. pdf_obj *trailer = NULL;
  894. size_t n;
  895. pdf_lexbuf *buf = &doc->lexbuf.base;
  896. pdf_obj *obj = NULL;
  897. fz_var(trailer);
  898. /* Record the current file read offset so that we can reinstate it */
  899. ofs = fz_tell(ctx, doc->file);
  900. fz_skip_space(ctx, doc->file);
  901. if (fz_skip_string(ctx, doc->file, "xref"))
  902. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
  903. fz_skip_space(ctx, doc->file);
  904. while (1)
  905. {
  906. c = fz_peek_byte(ctx, doc->file);
  907. if (!isdigit(c))
  908. break;
  909. fz_read_line(ctx, doc->file, buf->scratch, buf->size);
  910. s = buf->scratch;
  911. fz_strsep(&s, " "); /* ignore start */
  912. if (!s)
  913. fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length missing");
  914. len = fz_atoi(fz_strsep(&s, " "));
  915. if (len < 0)
  916. fz_throw(ctx, FZ_ERROR_FORMAT, "xref subsection length must be positive");
  917. /* broken pdfs where the section is not on a separate line */
  918. if (s && *s != '\0')
  919. fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
  920. t = fz_tell(ctx, doc->file);
  921. if (t < 0)
  922. fz_throw(ctx, FZ_ERROR_SYSTEM, "cannot tell in file");
  923. /* Spec says xref entries should be 20 bytes, but it's not infrequent
  924. * to see 19, in particular for some PCLm drivers. Cope. */
  925. if (len > 0)
  926. {
  927. n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
  928. if (n < 19)
  929. fz_throw(ctx, FZ_ERROR_FORMAT, "malformed xref table");
  930. if (n == 20 && buf->scratch[19] > 32)
  931. n = 19;
  932. }
  933. else
  934. n = 20;
  935. if (len > (int64_t)((INT64_MAX - t) / n))
  936. fz_throw(ctx, FZ_ERROR_LIMIT, "xref has too many entries");
  937. fz_seek(ctx, doc->file, t + n * (int64_t)len, SEEK_SET);
  938. }
  939. fz_try(ctx)
  940. {
  941. tok = pdf_lex(ctx, doc->file, buf);
  942. if (tok != PDF_TOK_TRAILER)
  943. fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
  944. tok = pdf_lex(ctx, doc->file, buf);
  945. if (tok != PDF_TOK_OPEN_DICT)
  946. fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
  947. trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
  948. obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
  949. if (pdf_is_indirect(ctx, obj))
  950. fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry is indirect");
  951. size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
  952. if (size < 0 || size > PDF_MAX_OBJECT_NUMBER + 1)
  953. fz_throw(ctx, FZ_ERROR_FORMAT, "trailer Size entry out of range");
  954. }
  955. fz_always(ctx)
  956. {
  957. pdf_drop_obj(ctx, trailer);
  958. }
  959. fz_catch(ctx)
  960. {
  961. fz_rethrow(ctx);
  962. }
  963. fz_seek(ctx, doc->file, ofs, SEEK_SET);
  964. return size;
  965. }
  966. static pdf_xref_entry *
  967. pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int start, int len)
  968. {
  969. pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
  970. pdf_xref_subsec *sub, *extend = NULL;
  971. int num_objects;
  972. int solidify = 0;
  973. if (len == 0)
  974. return NULL;
  975. /* Different cases here.
  976. * Case 1) We might be asking for a subsection (or a subset of a
  977. * subsection) that we already have - Just return it.
  978. * Case 2) We might be asking for a subsection that overlaps (or
  979. * extends) a subsection we already have - extend the existing one.
  980. * Case 3) We might be asking for a subsection that overlaps multiple
  981. * existing subsections - solidify the whole set.
  982. * Case 4) We might be asking for a completely new subsection - just
  983. * allocate it.
  984. */
  985. /* Sanity check */
  986. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  987. {
  988. if (start >= sub->start && start <= sub->start + sub->len)
  989. {
  990. /* 'start' is in (or immediately after) 'sub' */
  991. if (start + len <= sub->start + sub->len)
  992. {
  993. /* And so is start+len-1 - just return this! Case 1. */
  994. return &sub->table[start-sub->start];
  995. }
  996. /* So we overlap with sub. */
  997. if (extend == NULL)
  998. {
  999. /* Maybe we can extend sub? */
  1000. extend = sub;
  1001. }
  1002. else
  1003. {
  1004. /* OK, so we've already found an overlapping one. We'll need to solidify. Case 3. */
  1005. solidify = 1;
  1006. break;
  1007. }
  1008. }
  1009. else if (start + len > sub->start && start + len < sub->start + sub->len)
  1010. {
  1011. /* The end of the start+len range is in 'sub'. */
  1012. /* For now, we won't support extending sub backwards. Just take this as
  1013. * needing to solidify. Case 3. */
  1014. solidify = 1;
  1015. break;
  1016. }
  1017. else if (start < sub->start && start + len >= sub->start + sub->len)
  1018. {
  1019. /* The end of the start+len range is beyond 'sub'. */
  1020. /* For now, we won't support extending sub backwards. Just take this as
  1021. * needing to solidify. Another variant of case 3. */
  1022. solidify = 1;
  1023. break;
  1024. }
  1025. }
  1026. num_objects = xref->num_objects;
  1027. if (num_objects < start + len)
  1028. num_objects = start + len;
  1029. if (solidify)
  1030. {
  1031. /* Case 3: Solidify the xref */
  1032. ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-1);
  1033. xref = &doc->xref_sections[doc->num_xref_sections-1];
  1034. sub = xref->subsec;
  1035. }
  1036. else if (extend)
  1037. {
  1038. /* Case 2: Extend the subsection */
  1039. int newlen = start + len - extend->start;
  1040. sub = extend;
  1041. sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
  1042. memset(&sub->table[sub->len], 0, sizeof(pdf_xref_entry) * (newlen - sub->len));
  1043. sub->len = newlen;
  1044. if (xref->num_objects < sub->start + sub->len)
  1045. xref->num_objects = sub->start + sub->len;
  1046. if (doc->max_xref_len < sub->start + sub->len)
  1047. extend_xref_index(ctx, doc, sub->start + sub->len);
  1048. }
  1049. else
  1050. {
  1051. /* Case 4 */
  1052. sub = fz_malloc_struct(ctx, pdf_xref_subsec);
  1053. fz_try(ctx)
  1054. {
  1055. sub->table = fz_malloc_struct_array(ctx, len, pdf_xref_entry);
  1056. sub->start = start;
  1057. sub->len = len;
  1058. sub->next = xref->subsec;
  1059. xref->subsec = sub;
  1060. }
  1061. fz_catch(ctx)
  1062. {
  1063. fz_free(ctx, sub);
  1064. fz_rethrow(ctx);
  1065. }
  1066. if (xref->num_objects < num_objects)
  1067. xref->num_objects = num_objects;
  1068. if (doc->max_xref_len < num_objects)
  1069. extend_xref_index(ctx, doc, num_objects);
  1070. }
  1071. return &sub->table[start-sub->start];
  1072. }
  1073. static inline void
  1074. validate_object_number_range(fz_context *ctx, int first, int len, const char *what)
  1075. {
  1076. if (first < 0 || first > PDF_MAX_OBJECT_NUMBER)
  1077. fz_throw(ctx, FZ_ERROR_FORMAT, "first object number in %s out of range", what);
  1078. if (len < 0 || len > PDF_MAX_OBJECT_NUMBER)
  1079. fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in %s out of range", what);
  1080. if (len > 0 && len - 1 > PDF_MAX_OBJECT_NUMBER - first)
  1081. fz_throw(ctx, FZ_ERROR_FORMAT, "last object number in %s out of range", what);
  1082. }
  1083. static pdf_obj *
  1084. pdf_read_old_xref(fz_context *ctx, pdf_document *doc)
  1085. {
  1086. int start, len, c, i, xref_len, carried;
  1087. fz_stream *file = doc->file;
  1088. pdf_xref_entry *table;
  1089. pdf_token tok;
  1090. size_t n;
  1091. char *s, *e;
  1092. pdf_lexbuf *buf = &doc->lexbuf.base;
  1093. xref_len = pdf_xref_size_from_old_trailer(ctx, doc);
  1094. fz_skip_space(ctx, doc->file);
  1095. if (fz_skip_string(ctx, doc->file, "xref"))
  1096. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find xref marker");
  1097. fz_skip_space(ctx, doc->file);
  1098. while (1)
  1099. {
  1100. c = fz_peek_byte(ctx, file);
  1101. if (!isdigit(c))
  1102. break;
  1103. fz_read_line(ctx, file, buf->scratch, buf->size);
  1104. s = buf->scratch;
  1105. start = fz_atoi(fz_strsep(&s, " "));
  1106. len = fz_atoi(fz_strsep(&s, " "));
  1107. /* broken pdfs where the section is not on a separate line */
  1108. if (s && *s != '\0')
  1109. {
  1110. fz_warn(ctx, "broken xref subsection. proceeding anyway.");
  1111. fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
  1112. }
  1113. validate_object_number_range(ctx, start, len, "xref subsection");
  1114. /* broken pdfs where size in trailer undershoots entries in xref sections */
  1115. if (start + len > xref_len)
  1116. {
  1117. fz_warn(ctx, "broken xref subsection, proceeding anyway.");
  1118. }
  1119. table = pdf_xref_find_subsection(ctx, doc, start, len);
  1120. /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
  1121. * ones more frequently than we'd like (e.g. PCLm drivers).
  1122. * Cope with this by 'carrying' data forward. */
  1123. carried = 0;
  1124. for (i = 0; i < len; i++)
  1125. {
  1126. pdf_xref_entry *entry = &table[i];
  1127. n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
  1128. if (n != (size_t)(20-carried))
  1129. fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected EOF in xref table");
  1130. n += carried;
  1131. buf->scratch[n] = '\0';
  1132. if (!entry->type)
  1133. {
  1134. s = buf->scratch;
  1135. e = s + n;
  1136. entry->num = start + i;
  1137. /* broken pdfs where line start with white space */
  1138. while (s < e && iswhite(*s))
  1139. s++;
  1140. if (s == e || !isdigit(*s))
  1141. fz_throw(ctx, FZ_ERROR_FORMAT, "xref offset missing");
  1142. while (s < e && isdigit(*s))
  1143. entry->ofs = entry->ofs * 10 + *s++ - '0';
  1144. while (s < e && iswhite(*s))
  1145. s++;
  1146. if (s == e || !isdigit(*s))
  1147. fz_throw(ctx, FZ_ERROR_FORMAT, "xref generation number missing");
  1148. while (s < e && isdigit(*s))
  1149. entry->gen = entry->gen * 10 + *s++ - '0';
  1150. while (s < e && iswhite(*s))
  1151. s++;
  1152. if (s == e || (*s != 'f' && *s != 'n' && *s != 'o'))
  1153. fz_throw(ctx, FZ_ERROR_FORMAT, "unexpected xref type: 0x%x (%d %d R)", s == e ? 0 : *s, entry->num, entry->gen);
  1154. entry->type = *s++;
  1155. /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
  1156. carried = buf->scratch[19] > 32;
  1157. if (carried)
  1158. buf->scratch[0] = buf->scratch[19];
  1159. }
  1160. }
  1161. if (carried)
  1162. fz_unread_byte(ctx, file);
  1163. }
  1164. tok = pdf_lex(ctx, file, buf);
  1165. if (tok != PDF_TOK_TRAILER)
  1166. fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer marker");
  1167. tok = pdf_lex(ctx, file, buf);
  1168. if (tok != PDF_TOK_OPEN_DICT)
  1169. fz_throw(ctx, FZ_ERROR_FORMAT, "expected trailer dictionary");
  1170. doc->last_xref_was_old_style = 1;
  1171. return pdf_parse_dict(ctx, doc, file, buf);
  1172. }
  1173. static void
  1174. pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
  1175. {
  1176. pdf_xref_entry *table;
  1177. int i, n;
  1178. validate_object_number_range(ctx, i0, i1, "xref subsection");
  1179. table = pdf_xref_find_subsection(ctx, doc, i0, i1);
  1180. for (i = i0; i < i0 + i1; i++)
  1181. {
  1182. pdf_xref_entry *entry = &table[i-i0];
  1183. int a = 0;
  1184. int64_t b = 0;
  1185. int c = 0;
  1186. if (fz_is_eof(ctx, stm))
  1187. fz_throw(ctx, FZ_ERROR_FORMAT, "truncated xref stream");
  1188. for (n = 0; n < w0; n++)
  1189. a = (a << 8) + fz_read_byte(ctx, stm);
  1190. for (n = 0; n < w1; n++)
  1191. b = (b << 8) + fz_read_byte(ctx, stm);
  1192. for (n = 0; n < w2; n++)
  1193. c = (c << 8) + fz_read_byte(ctx, stm);
  1194. if (!entry->type)
  1195. {
  1196. int t = w0 ? a : 1;
  1197. entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
  1198. entry->ofs = w1 ? b : 0;
  1199. entry->gen = w2 ? c : 0;
  1200. entry->num = i;
  1201. }
  1202. }
  1203. doc->last_xref_was_old_style = 0;
  1204. }
  1205. /* Entered with file locked, remains locked throughout. */
  1206. static pdf_obj *
  1207. pdf_read_new_xref(fz_context *ctx, pdf_document *doc)
  1208. {
  1209. fz_stream *stm = NULL;
  1210. pdf_obj *trailer = NULL;
  1211. pdf_obj *index = NULL;
  1212. pdf_obj *obj = NULL;
  1213. int gen, num = 0;
  1214. int64_t ofs, stm_ofs;
  1215. int size, w0, w1, w2;
  1216. int t;
  1217. fz_var(trailer);
  1218. fz_var(stm);
  1219. fz_try(ctx)
  1220. {
  1221. ofs = fz_tell(ctx, doc->file);
  1222. trailer = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stm_ofs, NULL);
  1223. if (num == 0)
  1224. fz_throw(ctx, FZ_ERROR_FORMAT, "Trailer object number cannot be 0\n");
  1225. }
  1226. fz_catch(ctx)
  1227. {
  1228. pdf_drop_obj(ctx, trailer);
  1229. fz_rethrow(ctx);
  1230. }
  1231. fz_try(ctx)
  1232. {
  1233. pdf_xref_entry *entry;
  1234. obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
  1235. if (!obj)
  1236. fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing Size entry (%d 0 R)", num);
  1237. size = pdf_to_int(ctx, obj);
  1238. /* Bug708176: If the PDF file producer has declared Size without
  1239. * including this object, then increment it. */
  1240. if (size == num)
  1241. pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), size+1);
  1242. obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
  1243. if (!obj)
  1244. fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream missing W entry (%d R)", num);
  1245. if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 0)))
  1246. fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object type field width an indirect object");
  1247. if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 1)))
  1248. fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 2 width an indirect object");
  1249. if (pdf_is_indirect(ctx, pdf_array_get(ctx, obj, 2)))
  1250. fz_throw(ctx, FZ_ERROR_FORMAT, "xref stream object field 3 width an indirect object");
  1251. if (doc->file_reading_linearly && pdf_dict_get(ctx, trailer, PDF_NAME(Encrypt)))
  1252. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Cannot read linearly with encryption");
  1253. w0 = pdf_array_get_int(ctx, obj, 0);
  1254. w1 = pdf_array_get_int(ctx, obj, 1);
  1255. w2 = pdf_array_get_int(ctx, obj, 2);
  1256. if (w0 < 0)
  1257. fz_warn(ctx, "xref stream objects have corrupt type");
  1258. if (w1 < 0)
  1259. fz_warn(ctx, "xref stream objects have corrupt offset");
  1260. if (w2 < 0)
  1261. fz_warn(ctx, "xref stream objects have corrupt generation");
  1262. w0 = w0 < 0 ? 0 : w0;
  1263. w1 = w1 < 0 ? 0 : w1;
  1264. w2 = w2 < 0 ? 0 : w2;
  1265. index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
  1266. stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
  1267. if (!index)
  1268. {
  1269. pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
  1270. }
  1271. else
  1272. {
  1273. int n = pdf_array_len(ctx, index);
  1274. for (t = 0; t < n; t += 2)
  1275. {
  1276. int i0 = pdf_array_get_int(ctx, index, t + 0);
  1277. int i1 = pdf_array_get_int(ctx, index, t + 1);
  1278. pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
  1279. }
  1280. }
  1281. entry = pdf_get_populating_xref_entry(ctx, doc, num);
  1282. entry->ofs = ofs;
  1283. entry->gen = gen;
  1284. entry->num = num;
  1285. entry->stm_ofs = stm_ofs;
  1286. pdf_drop_obj(ctx, entry->obj);
  1287. entry->obj = pdf_keep_obj(ctx, trailer);
  1288. entry->type = 'n';
  1289. pdf_set_obj_parent(ctx, trailer, num);
  1290. }
  1291. fz_always(ctx)
  1292. {
  1293. fz_drop_stream(ctx, stm);
  1294. }
  1295. fz_catch(ctx)
  1296. {
  1297. pdf_drop_obj(ctx, trailer);
  1298. fz_rethrow(ctx);
  1299. }
  1300. return trailer;
  1301. }
  1302. static pdf_obj *
  1303. pdf_read_xref(fz_context *ctx, pdf_document *doc, int64_t ofs)
  1304. {
  1305. pdf_obj *trailer;
  1306. int c;
  1307. fz_seek(ctx, doc->file, doc->bias + ofs, SEEK_SET);
  1308. while (iswhite(fz_peek_byte(ctx, doc->file)))
  1309. fz_read_byte(ctx, doc->file);
  1310. c = fz_peek_byte(ctx, doc->file);
  1311. if (c == 'x')
  1312. trailer = pdf_read_old_xref(ctx, doc);
  1313. else if (isdigit(c))
  1314. trailer = pdf_read_new_xref(ctx, doc);
  1315. else
  1316. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize xref format");
  1317. return trailer;
  1318. }
  1319. static int64_t
  1320. read_xref_section(fz_context *ctx, pdf_document *doc, int64_t ofs)
  1321. {
  1322. pdf_obj *trailer = NULL;
  1323. pdf_obj *prevobj;
  1324. int64_t xrefstmofs = 0;
  1325. int64_t prevofs = 0;
  1326. trailer = pdf_read_xref(ctx, doc, ofs);
  1327. fz_try(ctx)
  1328. {
  1329. pdf_set_populating_xref_trailer(ctx, doc, trailer);
  1330. /* FIXME: do we overwrite free entries properly? */
  1331. /* FIXME: Does this work properly with progression? */
  1332. xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
  1333. if (xrefstmofs)
  1334. {
  1335. if (xrefstmofs < 0)
  1336. fz_throw(ctx, FZ_ERROR_FORMAT, "negative xref stream offset");
  1337. /*
  1338. Read the XRefStm stream, but throw away the resulting trailer. We do not
  1339. follow any Prev tag therein, as specified on Page 108 of the PDF reference
  1340. 1.7
  1341. */
  1342. pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs));
  1343. }
  1344. prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
  1345. if (pdf_is_int(ctx, prevobj))
  1346. {
  1347. prevofs = pdf_to_int64(ctx, prevobj);
  1348. if (prevofs <= 0)
  1349. fz_throw(ctx, FZ_ERROR_FORMAT, "invalid offset for previous xref section");
  1350. }
  1351. }
  1352. fz_always(ctx)
  1353. pdf_drop_obj(ctx, trailer);
  1354. fz_catch(ctx)
  1355. fz_rethrow(ctx);
  1356. return prevofs;
  1357. }
  1358. static void
  1359. pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int64_t ofs, int read_previous)
  1360. {
  1361. int i, len, cap;
  1362. int64_t *offsets;
  1363. int populated = 0;
  1364. int size, xref_len;
  1365. len = 0;
  1366. cap = 10;
  1367. offsets = fz_malloc_array(ctx, cap, int64_t);
  1368. fz_var(populated);
  1369. fz_var(offsets);
  1370. fz_try(ctx)
  1371. {
  1372. while(ofs)
  1373. {
  1374. for (i = 0; i < len; i ++)
  1375. {
  1376. if (offsets[i] == ofs)
  1377. break;
  1378. }
  1379. if (i < len)
  1380. {
  1381. fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
  1382. break;
  1383. }
  1384. if (len == cap)
  1385. {
  1386. cap *= 2;
  1387. offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
  1388. }
  1389. offsets[len++] = ofs;
  1390. pdf_populate_next_xref_level(ctx, doc);
  1391. populated = 1;
  1392. ofs = read_xref_section(ctx, doc, ofs);
  1393. if (!read_previous)
  1394. break;
  1395. }
  1396. /* For pathological files, such as chinese-example.pdf, where the original
  1397. * xref in the file is highly fragmented, we can safely solidify it here
  1398. * with no ill effects. */
  1399. ensure_solid_xref(ctx, doc, 0, doc->num_xref_sections-1);
  1400. size = pdf_dict_get_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size));
  1401. xref_len = pdf_xref_len(ctx, doc);
  1402. if (xref_len > size)
  1403. {
  1404. if (xref_len == size+1)
  1405. {
  1406. /* Bug 708456 && Bug 708176. Allow for (sadly, quite common
  1407. * PDF generators that can't get size right). */
  1408. fz_warn(ctx, "Trailer Size is off-by-one. Ignoring.");
  1409. pdf_dict_put_int(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size), size+1);
  1410. }
  1411. else
  1412. fz_throw(ctx, FZ_ERROR_FORMAT, "incorrect number of xref entries in trailer, repairing");
  1413. }
  1414. }
  1415. fz_always(ctx)
  1416. {
  1417. fz_free(ctx, offsets);
  1418. }
  1419. fz_catch(ctx)
  1420. {
  1421. /* Undo pdf_populate_next_xref_level if we've done that already. */
  1422. if (populated)
  1423. {
  1424. pdf_drop_xref_subsec(ctx, &doc->xref_sections[doc->num_xref_sections - 1]);
  1425. doc->num_xref_sections--;
  1426. }
  1427. fz_rethrow(ctx);
  1428. }
  1429. }
  1430. void
  1431. pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
  1432. {
  1433. int i, j;
  1434. int *idx = doc->xref_index;
  1435. for (i = doc->num_xref_sections-1; i >= 0; i--)
  1436. {
  1437. pdf_xref *xref = &doc->xref_sections[i];
  1438. pdf_xref_subsec *subsec = xref->subsec;
  1439. while (subsec != NULL)
  1440. {
  1441. int start = subsec->start;
  1442. int end = subsec->start + subsec->len;
  1443. for (j = start; j < end; j++)
  1444. {
  1445. char t = subsec->table[j-start].type;
  1446. if (t != 0 && t != 'f')
  1447. idx[j] = i;
  1448. }
  1449. subsec = subsec->next;
  1450. }
  1451. }
  1452. }
  1453. static void
  1454. check_xref_entry_offsets(fz_context *ctx, pdf_xref_entry *entry, int i, pdf_document *doc, void *arg)
  1455. {
  1456. int xref_len = (int)(intptr_t)arg;
  1457. if (entry->type == 'n')
  1458. {
  1459. /* Special case code: "0000000000 * n" means free,
  1460. * according to some producers (inc Quartz) */
  1461. if (entry->ofs == 0)
  1462. entry->type = 'f';
  1463. else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
  1464. fz_throw(ctx, FZ_ERROR_FORMAT, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
  1465. }
  1466. else if (entry->type == 'o')
  1467. {
  1468. /* Read this into a local variable here, because pdf_get_xref_entry
  1469. * may solidify the xref, hence invalidating "entry", meaning we
  1470. * need a stashed value for the throw. */
  1471. int64_t ofs = entry->ofs;
  1472. if (ofs <= 0 || ofs >= xref_len || pdf_get_xref_entry_no_null(ctx, doc, ofs)->type != 'n')
  1473. fz_throw(ctx, FZ_ERROR_FORMAT, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
  1474. }
  1475. }
  1476. /*
  1477. * load xref tables from pdf
  1478. *
  1479. * File locked on entry, throughout and on exit.
  1480. */
  1481. static void
  1482. pdf_load_xref(fz_context *ctx, pdf_document *doc)
  1483. {
  1484. int xref_len;
  1485. pdf_xref_entry *entry;
  1486. pdf_read_start_xref(ctx, doc);
  1487. pdf_read_xref_sections(ctx, doc, doc->startxref, 1);
  1488. if (pdf_xref_len(ctx, doc) == 0)
  1489. fz_throw(ctx, FZ_ERROR_FORMAT, "found xref was empty");
  1490. pdf_prime_xref_index(ctx, doc);
  1491. entry = pdf_get_xref_entry_no_null(ctx, doc, 0);
  1492. /* broken pdfs where first object is missing */
  1493. if (!entry->type)
  1494. {
  1495. entry->type = 'f';
  1496. entry->gen = 65535;
  1497. entry->num = 0;
  1498. }
  1499. /* broken pdfs where first object is not free */
  1500. else if (entry->type != 'f')
  1501. fz_warn(ctx, "first object in xref is not free");
  1502. /* broken pdfs where object offsets are out of range */
  1503. xref_len = pdf_xref_len(ctx, doc);
  1504. pdf_xref_entry_map(ctx, doc, check_xref_entry_offsets, (void *)(intptr_t)xref_len);
  1505. }
  1506. static void
  1507. pdf_check_linear(fz_context *ctx, pdf_document *doc)
  1508. {
  1509. pdf_obj *dict = NULL;
  1510. pdf_obj *o;
  1511. int num, gen;
  1512. int64_t stmofs;
  1513. fz_var(dict);
  1514. fz_try(ctx)
  1515. {
  1516. dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
  1517. if (!pdf_is_dict(ctx, dict))
  1518. break;
  1519. o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
  1520. if (o == NULL)
  1521. break;
  1522. if (pdf_to_int(ctx, o) != 1)
  1523. break;
  1524. doc->has_linearization_object = 1;
  1525. }
  1526. fz_always(ctx)
  1527. pdf_drop_obj(ctx, dict);
  1528. fz_catch(ctx)
  1529. {
  1530. /* Silently swallow this error. */
  1531. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1532. fz_report_error(ctx);
  1533. }
  1534. }
  1535. static void
  1536. pdf_load_linear(fz_context *ctx, pdf_document *doc)
  1537. {
  1538. pdf_obj *dict = NULL;
  1539. pdf_obj *hint = NULL;
  1540. pdf_obj *o;
  1541. int num, gen, lin, len;
  1542. int64_t stmofs;
  1543. fz_var(dict);
  1544. fz_var(hint);
  1545. fz_try(ctx)
  1546. {
  1547. pdf_xref_entry *entry;
  1548. dict = pdf_parse_ind_obj(ctx, doc, doc->file, &num, &gen, &stmofs, NULL);
  1549. if (!pdf_is_dict(ctx, dict))
  1550. fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
  1551. o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
  1552. if (o == NULL)
  1553. fz_throw(ctx, FZ_ERROR_FORMAT, "Failed to read linearized dictionary");
  1554. lin = pdf_to_int(ctx, o);
  1555. if (lin != 1)
  1556. fz_throw(ctx, FZ_ERROR_FORMAT, "Unexpected version of Linearized tag (%d)", lin);
  1557. doc->has_linearization_object = 1;
  1558. len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
  1559. if (len != doc->file_length)
  1560. fz_throw(ctx, FZ_ERROR_ARGUMENT, "File has been updated since linearization");
  1561. pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), 0);
  1562. doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
  1563. doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
  1564. memset(doc->linear_page_refs, 0, doc->linear_page_count * sizeof(pdf_obj*));
  1565. doc->linear_obj = dict;
  1566. doc->linear_pos = fz_tell(ctx, doc->file);
  1567. doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
  1568. doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
  1569. doc->linear_page_num = 0;
  1570. hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
  1571. doc->hint_object_offset = pdf_array_get_int(ctx, hint, 0);
  1572. doc->hint_object_length = pdf_array_get_int(ctx, hint, 1);
  1573. entry = pdf_get_populating_xref_entry(ctx, doc, 0);
  1574. entry->type = 'f';
  1575. }
  1576. fz_catch(ctx)
  1577. {
  1578. pdf_drop_obj(ctx, dict);
  1579. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  1580. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1581. fz_report_error(ctx);
  1582. /* Drop back to non linearized reading mode */
  1583. doc->file_reading_linearly = 0;
  1584. }
  1585. }
  1586. static void
  1587. id_and_password(fz_context *ctx, pdf_document *doc)
  1588. {
  1589. pdf_obj *encrypt, *id;
  1590. pdf_prime_xref_index(ctx, doc);
  1591. encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
  1592. id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
  1593. if (pdf_is_dict(ctx, encrypt))
  1594. doc->crypt = pdf_new_crypt(ctx, encrypt, id);
  1595. /* Allow lazy clients to read encrypted files with a blank password */
  1596. (void)pdf_authenticate_password(ctx, doc, "");
  1597. }
  1598. /*
  1599. * Initialize and load xref tables.
  1600. * If password is not null, try to decrypt.
  1601. */
  1602. static void
  1603. pdf_init_document(fz_context *ctx, pdf_document *doc)
  1604. {
  1605. int repaired = 0;
  1606. fz_try(ctx)
  1607. {
  1608. /* Check to see if we should work in progressive mode */
  1609. if (doc->file->progressive)
  1610. {
  1611. doc->file_reading_linearly = 1;
  1612. fz_seek(ctx, doc->file, 0, SEEK_END);
  1613. doc->file_length = fz_tell(ctx, doc->file);
  1614. if (doc->file_length < 0)
  1615. doc->file_length = 0;
  1616. fz_seek(ctx, doc->file, 0, SEEK_SET);
  1617. }
  1618. pdf_load_version(ctx, doc);
  1619. if (doc->is_fdf)
  1620. {
  1621. doc->file_reading_linearly = 0;
  1622. repaired = 1;
  1623. break; /* skip to end of try/catch */
  1624. }
  1625. /* Try to load the linearized file if we are in progressive
  1626. * mode. */
  1627. if (doc->file_reading_linearly)
  1628. pdf_load_linear(ctx, doc);
  1629. else
  1630. /* Even if we're not in progressive mode, check to see
  1631. * if the file claims to be linearized. This is important
  1632. * for checking signatures later on. */
  1633. pdf_check_linear(ctx, doc);
  1634. /* If we aren't in progressive mode (or the linear load failed
  1635. * and has set us back to non-progressive mode), load normally.
  1636. */
  1637. if (!doc->file_reading_linearly)
  1638. pdf_load_xref(ctx, doc);
  1639. }
  1640. fz_catch(ctx)
  1641. {
  1642. pdf_drop_xref_sections(ctx, doc);
  1643. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  1644. doc->file_reading_linearly = 0;
  1645. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1646. fz_report_error(ctx);
  1647. fz_warn(ctx, "trying to repair broken xref");
  1648. repaired = 1;
  1649. }
  1650. if (repaired)
  1651. {
  1652. /* pdf_repair_xref may access xref_index, so reset it properly */
  1653. if (doc->xref_index)
  1654. memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
  1655. pdf_repair_xref_aux(ctx, doc, id_and_password);
  1656. }
  1657. else
  1658. id_and_password(ctx, doc);
  1659. }
  1660. void
  1661. pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
  1662. {
  1663. if (doc == NULL)
  1664. return;
  1665. fz_drop_xml(ctx, doc->xfa);
  1666. doc->xfa = NULL;
  1667. }
  1668. static void
  1669. pdf_drop_document_imp(fz_context *ctx, fz_document *doc_)
  1670. {
  1671. pdf_document *doc = (pdf_document*)doc_;
  1672. int i;
  1673. fz_defer_reap_start(ctx);
  1674. /* Type3 glyphs in the glyph cache can contain pdf_obj pointers
  1675. * that we are about to destroy. Simplest solution is to bin the
  1676. * glyph cache at this point. */
  1677. fz_try(ctx)
  1678. fz_purge_glyph_cache(ctx);
  1679. fz_catch(ctx)
  1680. {
  1681. /* Swallow error, but continue dropping */
  1682. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1683. fz_report_error(ctx);
  1684. }
  1685. pdf_set_doc_event_callback(ctx, doc, NULL, NULL, NULL);
  1686. pdf_drop_js(ctx, doc->js);
  1687. pdf_drop_journal(ctx, doc->journal);
  1688. pdf_drop_resource_tables(ctx, doc);
  1689. pdf_drop_local_xref(ctx, doc->local_xref);
  1690. pdf_drop_xref_sections(ctx, doc);
  1691. fz_free(ctx, doc->xref_index);
  1692. fz_drop_stream(ctx, doc->file);
  1693. pdf_drop_crypt(ctx, doc->crypt);
  1694. pdf_drop_obj(ctx, doc->linear_obj);
  1695. if (doc->linear_page_refs)
  1696. {
  1697. for (i=0; i < doc->linear_page_count; i++)
  1698. pdf_drop_obj(ctx, doc->linear_page_refs[i]);
  1699. fz_free(ctx, doc->linear_page_refs);
  1700. }
  1701. fz_free(ctx, doc->hint_page);
  1702. fz_free(ctx, doc->hint_shared_ref);
  1703. fz_free(ctx, doc->hint_shared);
  1704. fz_free(ctx, doc->hint_obj_offsets);
  1705. for (i=0; i < doc->num_type3_fonts; i++)
  1706. {
  1707. fz_try(ctx)
  1708. fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
  1709. fz_always(ctx)
  1710. fz_drop_font(ctx, doc->type3_fonts[i]);
  1711. fz_catch(ctx)
  1712. {
  1713. /* Swallow error, but continue dropping */
  1714. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  1715. fz_report_error(ctx);
  1716. }
  1717. }
  1718. fz_free(ctx, doc->type3_fonts);
  1719. pdf_drop_ocg(ctx, doc);
  1720. pdf_empty_store(ctx, doc);
  1721. pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
  1722. fz_drop_colorspace(ctx, doc->oi);
  1723. for (i = 0; i < doc->orphans_count; i++)
  1724. pdf_drop_obj(ctx, doc->orphans[i]);
  1725. fz_free(ctx, doc->orphans);
  1726. pdf_drop_page_tree_internal(ctx, doc);
  1727. fz_defer_reap_end(ctx);
  1728. pdf_invalidate_xfa(ctx, doc);
  1729. }
  1730. void
  1731. pdf_drop_document(fz_context *ctx, pdf_document *doc)
  1732. {
  1733. fz_drop_document(ctx, &doc->super);
  1734. }
  1735. pdf_document *
  1736. pdf_keep_document(fz_context *ctx, pdf_document *doc)
  1737. {
  1738. return (pdf_document *)fz_keep_document(ctx, &doc->super);
  1739. }
  1740. /*
  1741. * compressed object streams
  1742. */
  1743. /*
  1744. Do not hold pdf_xref_entry's over call to this function as they
  1745. may be invalidated!
  1746. */
  1747. static pdf_xref_entry *
  1748. pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, int target)
  1749. {
  1750. fz_stream *stm = NULL;
  1751. pdf_obj *objstm = NULL;
  1752. int *numbuf = NULL;
  1753. int64_t *ofsbuf = NULL;
  1754. pdf_obj *obj;
  1755. int64_t first;
  1756. int count;
  1757. int i;
  1758. pdf_token tok;
  1759. pdf_xref_entry *ret_entry = NULL;
  1760. int ret_idx;
  1761. int xref_len;
  1762. int found;
  1763. fz_stream *sub = NULL;
  1764. fz_var(numbuf);
  1765. fz_var(ofsbuf);
  1766. fz_var(objstm);
  1767. fz_var(stm);
  1768. fz_var(sub);
  1769. fz_try(ctx)
  1770. {
  1771. objstm = pdf_load_object(ctx, doc, num);
  1772. if (pdf_obj_marked(ctx, objstm))
  1773. fz_throw(ctx, FZ_ERROR_FORMAT, "recursive object stream lookup");
  1774. }
  1775. fz_catch(ctx)
  1776. {
  1777. pdf_drop_obj(ctx, objstm);
  1778. fz_rethrow(ctx);
  1779. }
  1780. fz_try(ctx)
  1781. {
  1782. (void)pdf_mark_obj(ctx, objstm);
  1783. count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
  1784. first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
  1785. if (count < 0 || count > PDF_MAX_OBJECT_NUMBER)
  1786. fz_throw(ctx, FZ_ERROR_FORMAT, "number of objects in object stream out of range");
  1787. numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
  1788. ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
  1789. xref_len = pdf_xref_len(ctx, doc);
  1790. found = 0;
  1791. stm = pdf_open_stream_number(ctx, doc, num);
  1792. for (i = 0; i < count; i++)
  1793. {
  1794. tok = pdf_lex(ctx, stm, buf);
  1795. if (tok != PDF_TOK_INT)
  1796. fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
  1797. numbuf[found] = buf->i;
  1798. tok = pdf_lex(ctx, stm, buf);
  1799. if (tok != PDF_TOK_INT)
  1800. fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt object stream (%d 0 R)", num);
  1801. ofsbuf[found] = buf->i;
  1802. if (numbuf[found] <= 0 || numbuf[found] >= xref_len)
  1803. fz_warn(ctx, "object stream object out of range, skipping");
  1804. else
  1805. found++;
  1806. }
  1807. ret_idx = -1;
  1808. for (i = 0; i < found; i++)
  1809. {
  1810. pdf_xref_entry *entry;
  1811. uint64_t length;
  1812. int64_t offset;
  1813. offset = first + ofsbuf[i];
  1814. if (i+1 < found)
  1815. length = ofsbuf[i+1] - ofsbuf[i];
  1816. else
  1817. length = UINT64_MAX;
  1818. sub = fz_open_null_filter(ctx, stm, length, offset);
  1819. obj = pdf_parse_stm_obj(ctx, doc, sub, buf);
  1820. fz_drop_stream(ctx, sub);
  1821. sub = NULL;
  1822. entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[i]);
  1823. pdf_set_obj_parent(ctx, obj, numbuf[i]);
  1824. /* We may have set entry->type to be 'O' from being 'o' to avoid nasty
  1825. * recursions in pdf_cache_object. Accept the type being 'O' here. */
  1826. if ((entry->type == 'o' || entry->type == 'O') && entry->ofs == num)
  1827. {
  1828. /* If we already have an entry for this object,
  1829. * we'd like to drop it and use the new one -
  1830. * but this means that anyone currently holding
  1831. * a pointer to the old one will be left with a
  1832. * stale pointer. Instead, we drop the new one
  1833. * and trust that the old one is correct. */
  1834. if (entry->obj)
  1835. {
  1836. if (pdf_objcmp(ctx, entry->obj, obj))
  1837. fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
  1838. pdf_drop_obj(ctx, obj);
  1839. }
  1840. else
  1841. {
  1842. entry->obj = obj;
  1843. /* If we've just read a 'null' object, don't leave this as a NULL 'o' object,
  1844. * as that will a) confuse the code that called us into thinking that nothing
  1845. * was loaded, and b) cause the entire objstm to be reloaded every time that
  1846. * object is accessed. Instead, just mark it as an 'f'. */
  1847. if (obj == NULL)
  1848. entry->type = 'f';
  1849. fz_drop_buffer(ctx, entry->stm_buf);
  1850. entry->stm_buf = NULL;
  1851. }
  1852. if (numbuf[i] == target)
  1853. ret_idx = i;
  1854. }
  1855. else
  1856. {
  1857. pdf_drop_obj(ctx, obj);
  1858. }
  1859. }
  1860. /* Parsing our way through the stream can cause the xref to be
  1861. * solidified, which will move an entry. We therefore can't
  1862. * read the entry for returning until no more parsing is to be
  1863. * done. Thus we end up reading this entry twice. */
  1864. if (ret_idx >= 0)
  1865. ret_entry = pdf_get_xref_entry_no_null(ctx, doc, numbuf[ret_idx]);
  1866. }
  1867. fz_always(ctx)
  1868. {
  1869. fz_drop_stream(ctx, stm);
  1870. fz_drop_stream(ctx, sub);
  1871. fz_free(ctx, ofsbuf);
  1872. fz_free(ctx, numbuf);
  1873. pdf_unmark_obj(ctx, objstm);
  1874. pdf_drop_obj(ctx, objstm);
  1875. }
  1876. fz_catch(ctx)
  1877. {
  1878. fz_rethrow(ctx);
  1879. }
  1880. return ret_entry;
  1881. }
  1882. /*
  1883. * object loading
  1884. */
  1885. static int
  1886. pdf_obj_read(fz_context *ctx, pdf_document *doc, int64_t *offset, int *nump, pdf_obj **page)
  1887. {
  1888. pdf_lexbuf *buf = &doc->lexbuf.base;
  1889. int num, gen, tok;
  1890. int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
  1891. int xref_len;
  1892. pdf_xref_entry *entry;
  1893. numofs = *offset;
  1894. fz_seek(ctx, doc->file, doc->bias + numofs, SEEK_SET);
  1895. /* We expect to read 'num' here */
  1896. tok = pdf_lex(ctx, doc->file, buf);
  1897. genofs = fz_tell(ctx, doc->file);
  1898. if (tok != PDF_TOK_INT)
  1899. {
  1900. /* Failed! */
  1901. DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
  1902. *offset = genofs;
  1903. return tok == PDF_TOK_EOF;
  1904. }
  1905. *nump = num = buf->i;
  1906. /* We expect to read 'gen' here */
  1907. tok = pdf_lex(ctx, doc->file, buf);
  1908. tmpofs = fz_tell(ctx, doc->file);
  1909. if (tok != PDF_TOK_INT)
  1910. {
  1911. /* Failed! */
  1912. DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
  1913. *offset = tmpofs;
  1914. return tok == PDF_TOK_EOF;
  1915. }
  1916. gen = buf->i;
  1917. /* We expect to read 'obj' here */
  1918. do
  1919. {
  1920. tmpofs = fz_tell(ctx, doc->file);
  1921. tok = pdf_lex(ctx, doc->file, buf);
  1922. if (tok == PDF_TOK_OBJ)
  1923. break;
  1924. if (tok != PDF_TOK_INT)
  1925. {
  1926. DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
  1927. *offset = fz_tell(ctx, doc->file);
  1928. return tok == PDF_TOK_EOF;
  1929. }
  1930. DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
  1931. *nump = num = gen;
  1932. numofs = genofs;
  1933. gen = buf->i;
  1934. genofs = tmpofs;
  1935. }
  1936. while (1);
  1937. /* Now we read the actual object */
  1938. xref_len = pdf_xref_len(ctx, doc);
  1939. /* When we are reading a progressive file, we typically see:
  1940. * File Header
  1941. * obj m (Linearization params)
  1942. * xref #1 (refers to objects m-n)
  1943. * obj m+1
  1944. * ...
  1945. * obj n
  1946. * obj 1
  1947. * ...
  1948. * obj n-1
  1949. * xref #2
  1950. *
  1951. * The linearisation params are read elsewhere, hence
  1952. * whenever we read an object it should just go into the
  1953. * previous xref.
  1954. */
  1955. tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
  1956. do /* So we can break out of it */
  1957. {
  1958. if (num <= 0 || num >= xref_len)
  1959. {
  1960. fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
  1961. break;
  1962. }
  1963. if (gen != 0)
  1964. {
  1965. fz_warn(ctx, "Unexpected non zero generation number in linearized file");
  1966. }
  1967. entry = pdf_get_populating_xref_entry(ctx, doc, num);
  1968. if (entry->type != 0)
  1969. {
  1970. DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
  1971. break;
  1972. }
  1973. if (page && *page)
  1974. {
  1975. DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
  1976. if (!entry->obj)
  1977. entry->obj = pdf_keep_obj(ctx, *page);
  1978. if (doc->linear_page_refs[doc->linear_page_num] == NULL)
  1979. doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
  1980. }
  1981. else
  1982. {
  1983. DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
  1984. }
  1985. entry->type = 'n';
  1986. entry->gen = gen; // XXX: was 0
  1987. entry->num = num;
  1988. entry->ofs = numofs;
  1989. entry->stm_ofs = stmofs;
  1990. }
  1991. while (0);
  1992. if (page && *page)
  1993. doc->linear_page_num++;
  1994. if (tok == PDF_TOK_ENDOBJ)
  1995. {
  1996. *offset = fz_tell(ctx, doc->file);
  1997. }
  1998. else
  1999. {
  2000. *offset = newtmpofs;
  2001. }
  2002. return 0;
  2003. }
  2004. static void
  2005. pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
  2006. {
  2007. pdf_obj *page = NULL;
  2008. if (!doc->hints_loaded || !doc->linear_page_refs)
  2009. return;
  2010. if (doc->linear_page_refs[pagenum])
  2011. return;
  2012. fz_var(page);
  2013. fz_try(ctx)
  2014. {
  2015. int num = doc->hint_page[pagenum].number;
  2016. page = pdf_load_object(ctx, doc, num);
  2017. if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
  2018. {
  2019. /* We have found the page object! */
  2020. DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
  2021. doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
  2022. }
  2023. }
  2024. fz_always(ctx)
  2025. pdf_drop_obj(ctx, page);
  2026. fz_catch(ctx)
  2027. {
  2028. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2029. /* Swallow the error and proceed as normal */
  2030. fz_report_error(ctx);
  2031. }
  2032. }
  2033. static int
  2034. read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
  2035. {
  2036. /* Try to find the object using our hint table. Find the closest
  2037. * object <= the one we want that has a hint and read forward from
  2038. * there. */
  2039. int expected = num;
  2040. int curr_pos;
  2041. int64_t start, offset;
  2042. while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
  2043. expected--;
  2044. if (expected != num)
  2045. DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
  2046. if (expected == 0) /* No hints found, just bail */
  2047. return 0;
  2048. curr_pos = fz_tell(ctx, doc->file);
  2049. offset = doc->hint_obj_offsets[expected];
  2050. fz_var(expected);
  2051. fz_try(ctx)
  2052. {
  2053. int found;
  2054. /* Try to read forward from there */
  2055. do
  2056. {
  2057. start = offset;
  2058. DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
  2059. pdf_obj_read(ctx, doc, &offset, &found, 0);
  2060. DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
  2061. if (found <= expected)
  2062. {
  2063. /* We found the right one (or one earlier than
  2064. * we expected). Update the hints. */
  2065. doc->hint_obj_offsets[expected] = offset;
  2066. doc->hint_obj_offsets[found] = start;
  2067. doc->hint_obj_offsets[found+1] = offset;
  2068. /* Retry with the next one */
  2069. expected = found+1;
  2070. }
  2071. else
  2072. {
  2073. /* We found one later than we expected. */
  2074. doc->hint_obj_offsets[expected] = 0;
  2075. doc->hint_obj_offsets[found] = start;
  2076. doc->hint_obj_offsets[found+1] = offset;
  2077. while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
  2078. expected--;
  2079. if (expected == 0) /* No hints found, we give up */
  2080. break;
  2081. }
  2082. }
  2083. while (found != num);
  2084. }
  2085. fz_always(ctx)
  2086. {
  2087. fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
  2088. }
  2089. fz_catch(ctx)
  2090. {
  2091. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2092. /* FIXME: Currently we ignore the hint. Perhaps we should
  2093. * drop back to non-hinted operation here. */
  2094. doc->hint_obj_offsets[expected] = 0;
  2095. fz_rethrow(ctx);
  2096. }
  2097. return expected != 0;
  2098. }
  2099. pdf_obj *
  2100. pdf_load_unencrypted_object(fz_context *ctx, pdf_document *doc, int num)
  2101. {
  2102. pdf_xref_entry *x;
  2103. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2104. fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
  2105. x = pdf_get_xref_entry_no_null(ctx, doc, num);
  2106. if (x->type == 'n')
  2107. {
  2108. fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET);
  2109. return pdf_parse_ind_obj(ctx, doc, doc->file, NULL, NULL, NULL, NULL);
  2110. }
  2111. return NULL;
  2112. }
  2113. int
  2114. pdf_object_exists(fz_context *ctx, pdf_document *doc, int num)
  2115. {
  2116. pdf_xref_entry *x;
  2117. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2118. return 0;
  2119. x = pdf_get_xref_entry(ctx, doc, num);
  2120. if (x && (x->type == 'n' || x->type == 'o'))
  2121. return 1;
  2122. return 0;
  2123. }
  2124. pdf_xref_entry *
  2125. pdf_cache_object(fz_context *ctx, pdf_document *doc, int num)
  2126. {
  2127. pdf_xref_entry *x;
  2128. int rnum, rgen, try_repair;
  2129. fz_var(try_repair);
  2130. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2131. fz_throw(ctx, FZ_ERROR_FORMAT, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
  2132. object_updated:
  2133. try_repair = 0;
  2134. rnum = num;
  2135. x = pdf_get_xref_entry(ctx, doc, num);
  2136. if (x == NULL)
  2137. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
  2138. if (x->obj != NULL)
  2139. return x;
  2140. if (x->type == 'f')
  2141. {
  2142. x->obj = PDF_NULL;
  2143. }
  2144. else if (x->type == 'n')
  2145. {
  2146. fz_seek(ctx, doc->file, doc->bias + x->ofs, SEEK_SET);
  2147. fz_try(ctx)
  2148. {
  2149. x->obj = pdf_parse_ind_obj(ctx, doc, doc->file,
  2150. &rnum, &rgen, &x->stm_ofs, &try_repair);
  2151. }
  2152. fz_catch(ctx)
  2153. {
  2154. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2155. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  2156. if (!try_repair)
  2157. fz_rethrow(ctx);
  2158. else
  2159. fz_report_error(ctx);
  2160. }
  2161. if (!try_repair && rnum != num)
  2162. {
  2163. pdf_drop_obj(ctx, x->obj);
  2164. x->type = 'f';
  2165. x->ofs = -1;
  2166. x->gen = 0;
  2167. x->num = 0;
  2168. x->stm_ofs = 0;
  2169. x->obj = NULL;
  2170. try_repair = (doc->repair_attempted == 0);
  2171. }
  2172. if (try_repair)
  2173. {
  2174. perform_repair:
  2175. fz_try(ctx)
  2176. pdf_repair_xref(ctx, doc);
  2177. fz_catch(ctx)
  2178. {
  2179. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2180. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  2181. fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
  2182. fz_report_error(ctx);
  2183. if (rnum == num)
  2184. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot parse object (%d 0 R)", num);
  2185. else
  2186. fz_throw(ctx, FZ_ERROR_FORMAT, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
  2187. }
  2188. goto object_updated;
  2189. }
  2190. if (doc->crypt)
  2191. pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
  2192. }
  2193. else if (x->type == 'o')
  2194. {
  2195. if (!x->obj)
  2196. {
  2197. pdf_xref_entry *orig_x = x;
  2198. pdf_xref_entry *ox = x; /* This init is unused, but it shuts warnings up. */
  2199. orig_x->type = 'O'; /* Mark this node so we know we're recursing. */
  2200. fz_try(ctx)
  2201. x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
  2202. fz_always(ctx)
  2203. {
  2204. /* Most of the time ox == orig_x, but if pdf_load_obj_stm performed a
  2205. * repair, it may not be. It is safe to call pdf_get_xref_entry_no_change
  2206. * here, as it does not try/catch. */
  2207. ox = pdf_get_xref_entry_no_change(ctx, doc, num);
  2208. /* Bug 706762: ox can be NULL if the object went away during a repair. */
  2209. if (ox && ox->type == 'O')
  2210. ox->type = 'o'; /* Not recursing any more. */
  2211. }
  2212. fz_catch(ctx)
  2213. fz_rethrow(ctx);
  2214. if (x == NULL)
  2215. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot load object stream containing object (%d 0 R)", num);
  2216. if (!x->obj)
  2217. {
  2218. x->type = 'f';
  2219. if (ox)
  2220. ox->type = 'f';
  2221. if (doc->repair_attempted)
  2222. fz_throw(ctx, FZ_ERROR_FORMAT, "object (%d 0 R) was not found in its object stream", num);
  2223. goto perform_repair;
  2224. }
  2225. }
  2226. }
  2227. else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
  2228. {
  2229. goto object_updated;
  2230. }
  2231. else if (doc->file_length && doc->linear_pos < doc->file_length)
  2232. {
  2233. fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
  2234. }
  2235. else
  2236. {
  2237. fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find object in xref (%d 0 R)", num);
  2238. }
  2239. pdf_set_obj_parent(ctx, x->obj, num);
  2240. return x;
  2241. }
  2242. pdf_obj *
  2243. pdf_load_object(fz_context *ctx, pdf_document *doc, int num)
  2244. {
  2245. pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num);
  2246. return pdf_keep_obj(ctx, entry->obj);
  2247. }
  2248. pdf_obj *
  2249. pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
  2250. {
  2251. if (pdf_is_indirect(ctx, ref))
  2252. {
  2253. pdf_document *doc = pdf_get_indirect_document(ctx, ref);
  2254. int num = pdf_to_num(ctx, ref);
  2255. pdf_xref_entry *entry;
  2256. if (!doc)
  2257. return NULL;
  2258. if (num <= 0)
  2259. {
  2260. fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
  2261. return NULL;
  2262. }
  2263. fz_try(ctx)
  2264. entry = pdf_cache_object(ctx, doc, num);
  2265. fz_catch(ctx)
  2266. {
  2267. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2268. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  2269. fz_rethrow_if(ctx, FZ_ERROR_REPAIRED);
  2270. fz_report_error(ctx);
  2271. fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
  2272. return NULL;
  2273. }
  2274. ref = entry->obj;
  2275. }
  2276. return ref;
  2277. }
  2278. pdf_obj *
  2279. pdf_resolve_indirect_chain(fz_context *ctx, pdf_obj *ref)
  2280. {
  2281. int sanity = 10;
  2282. while (pdf_is_indirect(ctx, ref))
  2283. {
  2284. if (--sanity == 0)
  2285. {
  2286. fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
  2287. return NULL;
  2288. }
  2289. ref = pdf_resolve_indirect(ctx, ref);
  2290. }
  2291. return ref;
  2292. }
  2293. int
  2294. pdf_count_objects(fz_context *ctx, pdf_document *doc)
  2295. {
  2296. return pdf_xref_len(ctx, doc);
  2297. }
  2298. int
  2299. pdf_is_local_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
  2300. {
  2301. pdf_xref *xref = doc->local_xref;
  2302. pdf_xref_subsec *sub;
  2303. int num;
  2304. if (!pdf_is_indirect(ctx, obj))
  2305. return 0;
  2306. if (xref == NULL)
  2307. return 0; /* no local xref present */
  2308. num = pdf_to_num(ctx, obj);
  2309. /* Local xrefs only ever have 1 section, and it should be solid. */
  2310. sub = xref->subsec;
  2311. if (num >= sub->start && num < sub->start + sub->len)
  2312. return sub->table[num - sub->start].type != 0;
  2313. return 0;
  2314. }
  2315. static int
  2316. pdf_create_local_object(fz_context *ctx, pdf_document *doc)
  2317. {
  2318. /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
  2319. pdf_xref_entry *entry;
  2320. int num;
  2321. num = doc->local_xref->num_objects;
  2322. entry = pdf_get_local_xref_entry(ctx, doc, num);
  2323. entry->type = 'f';
  2324. entry->ofs = -1;
  2325. entry->gen = 0;
  2326. entry->num = num;
  2327. entry->stm_ofs = 0;
  2328. entry->stm_buf = NULL;
  2329. entry->obj = NULL;
  2330. return num;
  2331. }
  2332. int
  2333. pdf_create_object(fz_context *ctx, pdf_document *doc)
  2334. {
  2335. /* TODO: reuse free object slots by properly linking free object chains in the ofs field */
  2336. pdf_xref_entry *entry;
  2337. int num;
  2338. if (doc->local_xref && doc->local_xref_nesting > 0)
  2339. return pdf_create_local_object(ctx, doc);
  2340. num = pdf_xref_len(ctx, doc);
  2341. if (num > PDF_MAX_OBJECT_NUMBER)
  2342. fz_throw(ctx, FZ_ERROR_LIMIT, "too many objects stored in pdf");
  2343. entry = pdf_get_incremental_xref_entry(ctx, doc, num);
  2344. entry->type = 'f';
  2345. entry->ofs = -1;
  2346. entry->gen = 0;
  2347. entry->num = num;
  2348. entry->stm_ofs = 0;
  2349. entry->stm_buf = NULL;
  2350. entry->obj = NULL;
  2351. pdf_add_journal_fragment(ctx, doc, num, NULL, NULL, 1);
  2352. return num;
  2353. }
  2354. static void
  2355. pdf_delete_local_object(fz_context *ctx, pdf_document *doc, int num)
  2356. {
  2357. pdf_xref_entry *x;
  2358. if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
  2359. fz_throw(ctx, FZ_ERROR_ARGUMENT, "No local xref to delete from!");
  2360. if (num <= 0 || num >= doc->local_xref->num_objects)
  2361. {
  2362. fz_warn(ctx, "local object out of range (%d 0 R); xref size %d", num, doc->local_xref->num_objects);
  2363. return;
  2364. }
  2365. x = pdf_get_local_xref_entry(ctx, doc, num);
  2366. fz_drop_buffer(ctx, x->stm_buf);
  2367. pdf_drop_obj(ctx, x->obj);
  2368. x->type = 'f';
  2369. x->ofs = 0;
  2370. x->gen += 1;
  2371. x->num = 0;
  2372. x->stm_ofs = 0;
  2373. x->stm_buf = NULL;
  2374. x->obj = NULL;
  2375. }
  2376. void
  2377. pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
  2378. {
  2379. pdf_xref_entry *x;
  2380. pdf_xref *xref;
  2381. int j;
  2382. if (doc->local_xref && doc->local_xref_nesting > 0)
  2383. {
  2384. pdf_delete_local_object(ctx, doc, num);
  2385. return;
  2386. }
  2387. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2388. {
  2389. fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
  2390. return;
  2391. }
  2392. x = pdf_get_incremental_xref_entry(ctx, doc, num);
  2393. fz_drop_buffer(ctx, x->stm_buf);
  2394. pdf_drop_obj(ctx, x->obj);
  2395. x->type = 'f';
  2396. x->ofs = 0;
  2397. x->gen += 1;
  2398. x->num = 0;
  2399. x->stm_ofs = 0;
  2400. x->stm_buf = NULL;
  2401. x->obj = NULL;
  2402. /* Currently we've left a 'free' object in the incremental
  2403. * section. This is enough to cause us to think that the
  2404. * document has changes. Check back in the non-incremental
  2405. * sections to see if the last instance of the object there
  2406. * was free (or if this object never appeared). If so, we
  2407. * can mark this object as non-existent in the incremental
  2408. * xref. This is important so we can 'undo' back to emptiness
  2409. * after we save/when we reload a snapshot. */
  2410. for (j = 1; j < doc->num_xref_sections; j++)
  2411. {
  2412. xref = &doc->xref_sections[j];
  2413. if (num < xref->num_objects)
  2414. {
  2415. pdf_xref_subsec *sub;
  2416. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  2417. {
  2418. pdf_xref_entry *entry;
  2419. if (num < sub->start || num >= sub->start + sub->len)
  2420. continue;
  2421. entry = &sub->table[num - sub->start];
  2422. if (entry->type)
  2423. {
  2424. if (entry->type == 'f')
  2425. {
  2426. /* It was free already! */
  2427. x->type = 0;
  2428. x->gen = 0;
  2429. }
  2430. /* It was a real object. */
  2431. return;
  2432. }
  2433. }
  2434. }
  2435. }
  2436. /* It never appeared before. */
  2437. x->type = 0;
  2438. x->gen = 0;
  2439. }
  2440. static void
  2441. pdf_update_local_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
  2442. {
  2443. pdf_xref_entry *x;
  2444. if (doc->local_xref == NULL || doc->local_xref_nesting == 0)
  2445. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't update local object without a local xref");
  2446. if (!newobj)
  2447. {
  2448. pdf_delete_local_object(ctx, doc, num);
  2449. return;
  2450. }
  2451. x = pdf_get_local_xref_entry(ctx, doc, num);
  2452. pdf_drop_obj(ctx, x->obj);
  2453. x->type = 'n';
  2454. x->ofs = 0;
  2455. x->obj = pdf_keep_obj(ctx, newobj);
  2456. pdf_set_obj_parent(ctx, newobj, num);
  2457. }
  2458. void
  2459. pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
  2460. {
  2461. pdf_xref_entry *x;
  2462. if (!doc)
  2463. return;
  2464. if (doc->local_xref && doc->local_xref_nesting > 0)
  2465. {
  2466. pdf_update_local_object(ctx, doc, num, newobj);
  2467. return;
  2468. }
  2469. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2470. {
  2471. fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
  2472. return;
  2473. }
  2474. if (!newobj)
  2475. {
  2476. pdf_delete_object(ctx, doc, num);
  2477. return;
  2478. }
  2479. x = pdf_get_incremental_xref_entry(ctx, doc, num);
  2480. pdf_drop_obj(ctx, x->obj);
  2481. x->type = 'n';
  2482. x->ofs = 0;
  2483. x->obj = pdf_keep_obj(ctx, newobj);
  2484. pdf_set_obj_parent(ctx, newobj, num);
  2485. }
  2486. void
  2487. pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
  2488. {
  2489. int num;
  2490. pdf_xref_entry *x;
  2491. if (pdf_is_indirect(ctx, obj))
  2492. num = pdf_to_num(ctx, obj);
  2493. else
  2494. num = pdf_obj_parent_num(ctx, obj);
  2495. /* Write the Length first, as this has the effect of moving the
  2496. * old object into the journal for undo. This also moves the
  2497. * stream buffer with it, keeping it consistent. */
  2498. pdf_dict_put_int(ctx, obj, PDF_NAME(Length), fz_buffer_storage(ctx, newbuf, NULL));
  2499. if (doc->local_xref && doc->local_xref_nesting > 0)
  2500. {
  2501. x = pdf_get_local_xref_entry(ctx, doc, num);
  2502. }
  2503. else
  2504. {
  2505. if (num <= 0 || num >= pdf_xref_len(ctx, doc))
  2506. {
  2507. fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
  2508. return;
  2509. }
  2510. x = pdf_get_xref_entry_no_null(ctx, doc, num);
  2511. }
  2512. fz_drop_buffer(ctx, x->stm_buf);
  2513. x->stm_buf = fz_keep_buffer(ctx, newbuf);
  2514. if (!compressed)
  2515. {
  2516. pdf_dict_del(ctx, obj, PDF_NAME(Filter));
  2517. pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
  2518. }
  2519. }
  2520. int
  2521. pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, size_t size)
  2522. {
  2523. if (!strcmp(key, FZ_META_FORMAT))
  2524. {
  2525. int version = pdf_version(ctx, doc);
  2526. return 1 + (int)fz_snprintf(buf, size, "PDF %d.%d", version/10, version % 10);
  2527. }
  2528. if (!strcmp(key, FZ_META_ENCRYPTION))
  2529. {
  2530. if (doc->crypt)
  2531. {
  2532. const char *stream_method = pdf_crypt_stream_method(ctx, doc->crypt);
  2533. const char *string_method = pdf_crypt_string_method(ctx, doc->crypt);
  2534. if (stream_method == string_method)
  2535. return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
  2536. pdf_crypt_version(ctx, doc->crypt),
  2537. pdf_crypt_revision(ctx, doc->crypt),
  2538. pdf_crypt_length(ctx, doc->crypt),
  2539. pdf_crypt_string_method(ctx, doc->crypt));
  2540. else
  2541. return 1 + (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit streams: %s strings: %s",
  2542. pdf_crypt_version(ctx, doc->crypt),
  2543. pdf_crypt_revision(ctx, doc->crypt),
  2544. pdf_crypt_length(ctx, doc->crypt),
  2545. pdf_crypt_stream_method(ctx, doc->crypt),
  2546. pdf_crypt_string_method(ctx, doc->crypt));
  2547. }
  2548. else
  2549. return 1 + (int)fz_strlcpy(buf, "None", size);
  2550. }
  2551. if (strstr(key, "info:") == key)
  2552. {
  2553. pdf_obj *info;
  2554. const char *s;
  2555. int n;
  2556. info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
  2557. if (!info)
  2558. return -1;
  2559. info = pdf_dict_gets(ctx, info, key + 5);
  2560. if (!info)
  2561. return -1;
  2562. s = pdf_to_text_string(ctx, info);
  2563. if (strlen(s) <= 0)
  2564. return -1;
  2565. n = 1 + (int)fz_strlcpy(buf, s, size);
  2566. return n;
  2567. }
  2568. return -1;
  2569. }
  2570. void
  2571. pdf_set_metadata(fz_context *ctx, pdf_document *doc, const char *key, const char *value)
  2572. {
  2573. pdf_obj *info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
  2574. pdf_begin_operation(ctx, doc, "Set Metadata");
  2575. fz_try(ctx)
  2576. {
  2577. /* Ensure we have an Info dictionary. */
  2578. if (!pdf_is_dict(ctx, info))
  2579. {
  2580. info = pdf_add_new_dict(ctx, doc, 8);
  2581. pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info);
  2582. }
  2583. if (!strcmp(key, FZ_META_INFO_TITLE))
  2584. pdf_dict_put_text_string(ctx, info, PDF_NAME(Title), value);
  2585. else if (!strcmp(key, FZ_META_INFO_AUTHOR))
  2586. pdf_dict_put_text_string(ctx, info, PDF_NAME(Author), value);
  2587. else if (!strcmp(key, FZ_META_INFO_SUBJECT))
  2588. pdf_dict_put_text_string(ctx, info, PDF_NAME(Subject), value);
  2589. else if (!strcmp(key, FZ_META_INFO_KEYWORDS))
  2590. pdf_dict_put_text_string(ctx, info, PDF_NAME(Keywords), value);
  2591. else if (!strcmp(key, FZ_META_INFO_CREATOR))
  2592. pdf_dict_put_text_string(ctx, info, PDF_NAME(Creator), value);
  2593. else if (!strcmp(key, FZ_META_INFO_PRODUCER))
  2594. pdf_dict_put_text_string(ctx, info, PDF_NAME(Producer), value);
  2595. else if (!strcmp(key, FZ_META_INFO_CREATIONDATE))
  2596. {
  2597. int64_t time = pdf_parse_date(ctx, value);
  2598. if (time >= 0)
  2599. pdf_dict_put_date(ctx, info, PDF_NAME(CreationDate), time);
  2600. }
  2601. else if (!strcmp(key, FZ_META_INFO_MODIFICATIONDATE))
  2602. {
  2603. int64_t time = pdf_parse_date(ctx, value);
  2604. if (time >= 0)
  2605. pdf_dict_put_date(ctx, info, PDF_NAME(ModDate), time);
  2606. }
  2607. if (!strncmp(key, FZ_META_INFO, strlen(FZ_META_INFO)))
  2608. key += strlen(FZ_META_INFO);
  2609. pdf_dict_put_text_string(ctx, info, pdf_new_name(ctx, key), value);
  2610. pdf_end_operation(ctx, doc);
  2611. }
  2612. fz_catch(ctx)
  2613. {
  2614. pdf_abandon_operation(ctx, doc);
  2615. fz_rethrow(ctx);
  2616. }
  2617. }
  2618. static fz_link_dest
  2619. pdf_resolve_link_imp(fz_context *ctx, fz_document *doc_, const char *uri)
  2620. {
  2621. pdf_document *doc = (pdf_document*)doc_;
  2622. return pdf_resolve_link_dest(ctx, doc, uri);
  2623. }
  2624. char *pdf_format_link_uri(fz_context *ctx, fz_document *doc, fz_link_dest dest)
  2625. {
  2626. return pdf_new_uri_from_explicit_dest(ctx, dest);
  2627. }
  2628. static fz_document *
  2629. as_pdf(fz_context *ctx, fz_document *doc)
  2630. {
  2631. return doc;
  2632. }
  2633. /*
  2634. Initializers for the fz_document interface.
  2635. The functions are split across two files to allow calls to a
  2636. version of the constructor that does not link in the interpreter.
  2637. The interpreter references the built-in font and cmap resources
  2638. which are quite big. Not linking those into the mutool binary
  2639. saves roughly 6MB of space.
  2640. */
  2641. static fz_colorspace *pdf_document_output_intent_imp(fz_context *ctx, fz_document *doc)
  2642. {
  2643. return pdf_document_output_intent(ctx, (pdf_document*)doc);
  2644. }
  2645. int pdf_needs_password_imp(fz_context *ctx, fz_document *doc)
  2646. {
  2647. return pdf_needs_password(ctx, (pdf_document*)doc);
  2648. }
  2649. int pdf_authenticate_password_imp(fz_context *ctx, fz_document *doc, const char *pw)
  2650. {
  2651. return pdf_authenticate_password(ctx, (pdf_document*)doc, pw);
  2652. }
  2653. int pdf_has_permission_imp(fz_context *ctx, fz_document *doc, fz_permission p)
  2654. {
  2655. return pdf_has_permission(ctx, (pdf_document*)doc, p);
  2656. }
  2657. fz_outline_iterator *pdf_new_outline_iterator_imp(fz_context *ctx, fz_document *doc)
  2658. {
  2659. return pdf_new_outline_iterator(ctx, (pdf_document*)doc);
  2660. }
  2661. int pdf_lookup_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, char *ptr, size_t size)
  2662. {
  2663. return pdf_lookup_metadata(ctx, (pdf_document*)doc, key, ptr, size);
  2664. }
  2665. void pdf_set_metadata_imp(fz_context *ctx, fz_document *doc, const char *key, const char *value)
  2666. {
  2667. pdf_set_metadata(ctx, (pdf_document*)doc, key, value);
  2668. }
  2669. void pdf_run_document_structure_imp(fz_context *ctx, fz_document *doc, fz_device *dev, fz_cookie *cookie)
  2670. {
  2671. pdf_run_document_structure(ctx, (pdf_document*)doc, dev, cookie);
  2672. }
  2673. #ifndef NDEBUG
  2674. void pdf_verify_name_table_sanity(void);
  2675. #endif
  2676. static pdf_document *
  2677. pdf_new_document(fz_context *ctx, fz_stream *file)
  2678. {
  2679. pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
  2680. #ifndef NDEBUG
  2681. pdf_verify_name_table_sanity();
  2682. #endif
  2683. doc->super.drop_document = pdf_drop_document_imp;
  2684. doc->super.get_output_intent = pdf_document_output_intent_imp;
  2685. doc->super.needs_password = pdf_needs_password_imp;
  2686. doc->super.authenticate_password = pdf_authenticate_password_imp;
  2687. doc->super.has_permission = pdf_has_permission_imp;
  2688. doc->super.outline_iterator = pdf_new_outline_iterator_imp;
  2689. doc->super.resolve_link_dest = pdf_resolve_link_imp;
  2690. doc->super.format_link_uri = pdf_format_link_uri;
  2691. doc->super.count_pages = pdf_count_pages_imp;
  2692. doc->super.load_page = pdf_load_page_imp;
  2693. doc->super.page_label = pdf_page_label_imp;
  2694. doc->super.lookup_metadata = pdf_lookup_metadata_imp;
  2695. doc->super.set_metadata = pdf_set_metadata_imp;
  2696. doc->super.run_structure = pdf_run_document_structure_imp;
  2697. doc->super.as_pdf = as_pdf;
  2698. pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
  2699. doc->file = fz_keep_stream(ctx, file);
  2700. /* Default to PDF-1.7 if the version header is missing and for new documents */
  2701. doc->version = 17;
  2702. return doc;
  2703. }
  2704. pdf_document *
  2705. pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
  2706. {
  2707. pdf_document *doc = pdf_new_document(ctx, file);
  2708. fz_try(ctx)
  2709. {
  2710. pdf_init_document(ctx, doc);
  2711. }
  2712. fz_catch(ctx)
  2713. {
  2714. /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
  2715. char message[256];
  2716. int code;
  2717. fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
  2718. fz_drop_document(ctx, &doc->super);
  2719. fz_throw(ctx, code, "%s", message);
  2720. }
  2721. return doc;
  2722. }
  2723. /* Uncomment the following to test progressive loading. */
  2724. /* #define TEST_PROGRESSIVE_HACK */
  2725. pdf_document *
  2726. pdf_open_document(fz_context *ctx, const char *filename)
  2727. {
  2728. fz_stream *file = NULL;
  2729. pdf_document *doc = NULL;
  2730. fz_var(file);
  2731. fz_var(doc);
  2732. fz_try(ctx)
  2733. {
  2734. file = fz_open_file(ctx, filename);
  2735. #ifdef TEST_PROGRESSIVE_HACK
  2736. file->progressive = 1;
  2737. #endif
  2738. doc = pdf_new_document(ctx, file);
  2739. pdf_init_document(ctx, doc);
  2740. }
  2741. fz_always(ctx)
  2742. {
  2743. fz_drop_stream(ctx, file);
  2744. }
  2745. fz_catch(ctx)
  2746. {
  2747. /* fz_drop_document may clobber our error code/message so we have to stash them temporarily. */
  2748. char message[256];
  2749. int code;
  2750. fz_strlcpy(message, fz_convert_error(ctx, &code), sizeof message);
  2751. fz_drop_document(ctx, &doc->super);
  2752. fz_throw(ctx, code, "%s", message);
  2753. }
  2754. #ifdef TEST_PROGRESSIVE_HACK
  2755. if (doc->file_reading_linearly)
  2756. {
  2757. fz_try(ctx)
  2758. pdf_progressive_advance(ctx, doc, doc->linear_page_count-1);
  2759. fz_catch(ctx)
  2760. {
  2761. doc->file_reading_linearly = 0;
  2762. /* swallow the error */
  2763. }
  2764. }
  2765. #endif
  2766. return doc;
  2767. }
  2768. static void
  2769. pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum)
  2770. {
  2771. fz_stream *stream = NULL;
  2772. pdf_obj *dict;
  2773. fz_var(stream);
  2774. fz_var(dict);
  2775. fz_try(ctx)
  2776. {
  2777. int i, j, least_num_page_objs, page_obj_num_bits;
  2778. int least_page_len, page_len_num_bits, shared_hint_offset;
  2779. /* int least_page_offset, page_offset_num_bits; */
  2780. /* int least_content_stream_len, content_stream_len_num_bits; */
  2781. int num_shared_obj_num_bits, shared_obj_num_bits;
  2782. /* int numerator_bits, denominator_bits; */
  2783. int shared;
  2784. int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
  2785. int shared_obj_count_total;
  2786. int least_shared_group_len, shared_group_len_num_bits;
  2787. int max_object_num = pdf_xref_len(ctx, doc);
  2788. stream = pdf_open_stream_number(ctx, doc, objnum);
  2789. dict = pdf_get_xref_entry_no_null(ctx, doc, objnum)->obj;
  2790. if (dict == NULL || !pdf_is_dict(ctx, dict))
  2791. fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint object");
  2792. shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
  2793. /* Malloc the structures (use realloc to cope with the fact we
  2794. * may try this several times before enough data is loaded) */
  2795. doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+1, pdf_hint_page);
  2796. memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->linear_page_count+1));
  2797. doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
  2798. memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
  2799. doc->hint_obj_offsets_max = max_object_num;
  2800. /* Read the page object hints table: Header first */
  2801. least_num_page_objs = fz_read_bits(ctx, stream, 32);
  2802. /* The following is sometimes a lie, but we read this version,
  2803. * as other table values are built from it. In
  2804. * pdf_reference17.pdf, this points to 2 objects before the
  2805. * first pages page object. */
  2806. doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
  2807. if (doc->hint_page[0].offset > doc->hint_object_offset)
  2808. doc->hint_page[0].offset += doc->hint_object_length;
  2809. page_obj_num_bits = fz_read_bits(ctx, stream, 16);
  2810. least_page_len = fz_read_bits(ctx, stream, 32);
  2811. page_len_num_bits = fz_read_bits(ctx, stream, 16);
  2812. /* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
  2813. /* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
  2814. /* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
  2815. /* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
  2816. num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
  2817. shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
  2818. /* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
  2819. /* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
  2820. /* Item 1: Page object numbers */
  2821. doc->hint_page[0].number = doc->linear_page1_obj_num;
  2822. /* We don't care about the number of objects in the first page */
  2823. (void)fz_read_bits(ctx, stream, page_obj_num_bits);
  2824. j = 1;
  2825. for (i = 1; i < doc->linear_page_count; i++)
  2826. {
  2827. int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
  2828. doc->hint_page[i].number = j;
  2829. j += least_num_page_objs + delta_page_objs;
  2830. }
  2831. doc->hint_page[i].number = j; /* Not a real page object */
  2832. fz_sync_bits(ctx, stream);
  2833. /* Item 2: Page lengths */
  2834. j = doc->hint_page[0].offset;
  2835. for (i = 0; i < doc->linear_page_count; i++)
  2836. {
  2837. int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
  2838. int old = j;
  2839. doc->hint_page[i].offset = j;
  2840. j += least_page_len + delta_page_len;
  2841. if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
  2842. j += doc->hint_object_length;
  2843. }
  2844. doc->hint_page[i].offset = j;
  2845. fz_sync_bits(ctx, stream);
  2846. /* Item 3: Shared references */
  2847. shared = 0;
  2848. for (i = 0; i < doc->linear_page_count; i++)
  2849. {
  2850. int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
  2851. doc->hint_page[i].index = shared;
  2852. shared += num_shared_objs;
  2853. }
  2854. doc->hint_page[i].index = shared;
  2855. doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
  2856. memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
  2857. fz_sync_bits(ctx, stream);
  2858. /* Item 4: Shared references */
  2859. for (i = 0; i < shared; i++)
  2860. {
  2861. int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
  2862. doc->hint_shared_ref[i] = ref;
  2863. }
  2864. /* Skip items 5,6,7 as we don't use them */
  2865. fz_seek(ctx, stream, doc->bias + shared_hint_offset, SEEK_SET);
  2866. /* Read the shared object hints table: Header first */
  2867. shared_obj_num = fz_read_bits(ctx, stream, 32);
  2868. shared_obj_offset = fz_read_bits(ctx, stream, 32);
  2869. if (shared_obj_offset > doc->hint_object_offset)
  2870. shared_obj_offset += doc->hint_object_length;
  2871. shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
  2872. shared_obj_count_total = fz_read_bits(ctx, stream, 32);
  2873. shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
  2874. least_shared_group_len = fz_read_bits(ctx, stream, 32);
  2875. shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
  2876. /* Sanity check the references in Item 4 above to ensure we
  2877. * don't access out of range with malicious files. */
  2878. for (i = 0; i < shared; i++)
  2879. {
  2880. if (doc->hint_shared_ref[i] >= shared_obj_count_total)
  2881. {
  2882. fz_throw(ctx, FZ_ERROR_FORMAT, "malformed hint stream (shared refs)");
  2883. }
  2884. }
  2885. doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+1, pdf_hint_shared);
  2886. memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
  2887. /* Item 1: Shared references */
  2888. j = doc->hint_page[0].offset;
  2889. for (i = 0; i < shared_obj_count_page1; i++)
  2890. {
  2891. int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
  2892. int old = j;
  2893. doc->hint_shared[i].offset = j;
  2894. j += off + least_shared_group_len;
  2895. if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
  2896. j += doc->hint_object_length;
  2897. }
  2898. /* FIXME: We would have problems recreating the length of the
  2899. * last page 1 shared reference group. But we'll never need
  2900. * to, so ignore it. */
  2901. j = shared_obj_offset;
  2902. for (; i < shared_obj_count_total; i++)
  2903. {
  2904. int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
  2905. int old = j;
  2906. doc->hint_shared[i].offset = j;
  2907. j += off + least_shared_group_len;
  2908. if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
  2909. j += doc->hint_object_length;
  2910. }
  2911. doc->hint_shared[i].offset = j;
  2912. fz_sync_bits(ctx, stream);
  2913. /* Item 2: Signature flags: read these just so we can skip */
  2914. for (i = 0; i < shared_obj_count_total; i++)
  2915. {
  2916. doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
  2917. }
  2918. fz_sync_bits(ctx, stream);
  2919. /* Item 3: Signatures: just skip */
  2920. for (i = 0; i < shared_obj_count_total; i++)
  2921. {
  2922. if (doc->hint_shared[i].number)
  2923. {
  2924. (void) fz_read_bits(ctx, stream, 128);
  2925. }
  2926. }
  2927. fz_sync_bits(ctx, stream);
  2928. /* Item 4: Shared object object numbers */
  2929. j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
  2930. for (i = 0; i < shared_obj_count_page1; i++)
  2931. {
  2932. doc->hint_shared[i].number = j;
  2933. j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
  2934. }
  2935. j = shared_obj_num;
  2936. for (; i < shared_obj_count_total; i++)
  2937. {
  2938. doc->hint_shared[i].number = j;
  2939. j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
  2940. }
  2941. doc->hint_shared[i].number = j;
  2942. /* Now, actually use the data we have gathered. */
  2943. for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
  2944. {
  2945. if (doc->hint_shared[i].number >= 0 && doc->hint_shared[i].number < max_object_num)
  2946. doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
  2947. }
  2948. for (i = 0; i < doc->linear_page_count; i++)
  2949. {
  2950. if (doc->hint_page[i].number >= 0 && doc->hint_page[i].number < max_object_num)
  2951. doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
  2952. }
  2953. }
  2954. fz_always(ctx)
  2955. {
  2956. fz_drop_stream(ctx, stream);
  2957. }
  2958. fz_catch(ctx)
  2959. {
  2960. fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
  2961. /* Don't try to load hints again */
  2962. doc->hints_loaded = 1;
  2963. /* We won't use the linearized object anymore. */
  2964. doc->file_reading_linearly = 0;
  2965. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  2966. /* Any other error becomes a TRYLATER */
  2967. fz_report_error(ctx);
  2968. fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
  2969. }
  2970. doc->hints_loaded = 1;
  2971. }
  2972. static void
  2973. pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
  2974. {
  2975. pdf_lexbuf *buf = &doc->lexbuf.base;
  2976. int64_t curr_pos;
  2977. curr_pos = fz_tell(ctx, doc->file);
  2978. fz_seek(ctx, doc->file, doc->bias + doc->hint_object_offset, SEEK_SET);
  2979. fz_try(ctx)
  2980. {
  2981. while (1)
  2982. {
  2983. pdf_obj *page = NULL;
  2984. int num, tok;
  2985. tok = pdf_lex(ctx, doc->file, buf);
  2986. if (tok != PDF_TOK_INT)
  2987. break;
  2988. num = buf->i;
  2989. tok = pdf_lex(ctx, doc->file, buf);
  2990. if (tok != PDF_TOK_INT)
  2991. break;
  2992. /* Ignore gen = buf->i */
  2993. tok = pdf_lex(ctx, doc->file, buf);
  2994. if (tok != PDF_TOK_OBJ)
  2995. break;
  2996. (void)pdf_repair_obj(ctx, doc, buf, NULL, NULL, NULL, NULL, &page, NULL, NULL);
  2997. pdf_load_hints(ctx, doc, num);
  2998. }
  2999. }
  3000. fz_always(ctx)
  3001. {
  3002. fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
  3003. }
  3004. fz_catch(ctx)
  3005. {
  3006. fz_rethrow(ctx);
  3007. }
  3008. }
  3009. pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
  3010. {
  3011. int curr_pos;
  3012. pdf_obj *page = NULL;
  3013. pdf_load_hinted_page(ctx, doc, pagenum);
  3014. if (pagenum < 0 || pagenum >= doc->linear_page_count)
  3015. fz_throw(ctx, FZ_ERROR_ARGUMENT, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
  3016. if (doc->linear_pos == doc->file_length)
  3017. return doc->linear_page_refs[pagenum];
  3018. /* Only load hints once, and then only after we have got page 0 */
  3019. if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
  3020. {
  3021. /* Found hint object */
  3022. pdf_load_hint_object(ctx, doc);
  3023. }
  3024. DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
  3025. curr_pos = fz_tell(ctx, doc->file);
  3026. fz_var(page);
  3027. fz_try(ctx)
  3028. {
  3029. int eof;
  3030. do
  3031. {
  3032. int num;
  3033. eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
  3034. pdf_drop_obj(ctx, page);
  3035. page = NULL;
  3036. }
  3037. while (!eof);
  3038. {
  3039. pdf_obj *catalog;
  3040. pdf_obj *pages;
  3041. doc->linear_pos = doc->file_length;
  3042. pdf_load_xref(ctx, doc);
  3043. catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  3044. pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
  3045. if (!pdf_is_dict(ctx, pages))
  3046. fz_throw(ctx, FZ_ERROR_FORMAT, "missing page tree");
  3047. break;
  3048. }
  3049. }
  3050. fz_always(ctx)
  3051. {
  3052. fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
  3053. }
  3054. fz_catch(ctx)
  3055. {
  3056. pdf_drop_obj(ctx, page);
  3057. if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
  3058. {
  3059. if (doc->linear_page_refs[pagenum] == NULL)
  3060. {
  3061. /* Still not got a page */
  3062. fz_rethrow(ctx);
  3063. }
  3064. // TODO: should we really swallow this error?
  3065. fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
  3066. fz_report_error(ctx);
  3067. }
  3068. else
  3069. fz_rethrow(ctx);
  3070. }
  3071. return doc->linear_page_refs[pagenum];
  3072. }
  3073. pdf_document *fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
  3074. {
  3075. if (!ptr || !ptr->as_pdf)
  3076. return NULL;
  3077. return (pdf_document *)fz_keep_document(ctx, ptr->as_pdf(ctx, ptr));
  3078. }
  3079. pdf_document *pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
  3080. {
  3081. return (pdf_document *)((ptr && ptr->count_pages == pdf_count_pages_imp) ? ptr : NULL);
  3082. }
  3083. pdf_page *pdf_page_from_fz_page(fz_context *ctx, fz_page *page)
  3084. {
  3085. if (pdf_document_from_fz_document(ctx, page->doc))
  3086. return (pdf_page*) page;
  3087. return NULL;
  3088. }
  3089. pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
  3090. {
  3091. return pdf_document_from_fz_document(ctx, doc);
  3092. }
  3093. pdf_obj *
  3094. pdf_add_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
  3095. {
  3096. pdf_document *orig_doc;
  3097. int num;
  3098. orig_doc = pdf_get_bound_document(ctx, obj);
  3099. if (orig_doc && orig_doc != doc)
  3100. fz_throw(ctx, FZ_ERROR_ARGUMENT, "tried to add an object belonging to a different document");
  3101. if (pdf_is_indirect(ctx, obj))
  3102. return pdf_keep_obj(ctx, obj);
  3103. num = pdf_create_object(ctx, doc);
  3104. pdf_update_object(ctx, doc, num, obj);
  3105. return pdf_new_indirect(ctx, doc, num, 0);
  3106. }
  3107. pdf_obj *
  3108. pdf_add_object_drop(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
  3109. {
  3110. pdf_obj *ind = NULL;
  3111. fz_try(ctx)
  3112. ind = pdf_add_object(ctx, doc, obj);
  3113. fz_always(ctx)
  3114. pdf_drop_obj(ctx, obj);
  3115. fz_catch(ctx)
  3116. fz_rethrow(ctx);
  3117. return ind;
  3118. }
  3119. pdf_obj *
  3120. pdf_add_new_dict(fz_context *ctx, pdf_document *doc, int initial)
  3121. {
  3122. return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
  3123. }
  3124. pdf_obj *
  3125. pdf_add_new_array(fz_context *ctx, pdf_document *doc, int initial)
  3126. {
  3127. return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
  3128. }
  3129. pdf_obj *
  3130. pdf_add_stream(fz_context *ctx, pdf_document *doc, fz_buffer *buf, pdf_obj *obj, int compressed)
  3131. {
  3132. pdf_obj *ind;
  3133. if (!obj)
  3134. ind = pdf_add_new_dict(ctx, doc, 4);
  3135. else
  3136. ind = pdf_add_object(ctx, doc, obj);
  3137. fz_try(ctx)
  3138. pdf_update_stream(ctx, doc, ind, buf, compressed);
  3139. fz_catch(ctx)
  3140. {
  3141. pdf_drop_obj(ctx, ind);
  3142. fz_rethrow(ctx);
  3143. }
  3144. return ind;
  3145. }
  3146. pdf_document *pdf_create_document(fz_context *ctx)
  3147. {
  3148. pdf_document *doc;
  3149. pdf_obj *root;
  3150. pdf_obj *pages;
  3151. pdf_obj *trailer = NULL;
  3152. fz_var(trailer);
  3153. doc = pdf_new_document(ctx, NULL);
  3154. fz_try(ctx)
  3155. {
  3156. doc->file_size = 0;
  3157. doc->startxref = 0;
  3158. doc->num_xref_sections = 0;
  3159. doc->num_incremental_sections = 0;
  3160. doc->xref_base = 0;
  3161. doc->disallow_new_increments = 0;
  3162. pdf_get_populating_xref_entry(ctx, doc, 0);
  3163. trailer = pdf_new_dict(ctx, doc, 2);
  3164. pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), 3);
  3165. pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, 2));
  3166. pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
  3167. pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, 3));
  3168. pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
  3169. pdf_dict_put_int(ctx, pages, PDF_NAME(Count), 0);
  3170. pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), 1);
  3171. /* Set the trailer of the final xref section. */
  3172. doc->xref_sections[0].trailer = trailer;
  3173. }
  3174. fz_catch(ctx)
  3175. {
  3176. pdf_drop_obj(ctx, trailer);
  3177. fz_drop_document(ctx, &doc->super);
  3178. fz_rethrow(ctx);
  3179. }
  3180. return doc;
  3181. }
  3182. static const char *pdf_extensions[] =
  3183. {
  3184. "pdf",
  3185. "fdf",
  3186. "pclm",
  3187. "ai",
  3188. NULL
  3189. };
  3190. static const char *pdf_mimetypes[] =
  3191. {
  3192. "application/pdf",
  3193. "application/PCLm",
  3194. NULL
  3195. };
  3196. static int
  3197. pdf_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
  3198. {
  3199. const char *match = "%PDF-";
  3200. const char *match2 = "%FDF-";
  3201. int pos = 0;
  3202. int n = 4096+5;
  3203. int c;
  3204. if (state)
  3205. *state = NULL;
  3206. if (free_state)
  3207. *free_state = NULL;
  3208. if (stream == NULL)
  3209. return 0;
  3210. do
  3211. {
  3212. c = fz_read_byte(ctx, stream);
  3213. if (c == EOF)
  3214. return 0;
  3215. if (c == match[pos] || c == match2[pos])
  3216. {
  3217. pos++;
  3218. if (pos == 5)
  3219. return 100;
  3220. }
  3221. else
  3222. {
  3223. /* Restart matching, but recheck c against the start. */
  3224. pos = (c == match[0]);
  3225. }
  3226. }
  3227. while (--n > 0);
  3228. return 0;
  3229. }
  3230. static fz_document *
  3231. open_document(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *zip, void *state)
  3232. {
  3233. if (file == NULL)
  3234. return NULL;
  3235. return (fz_document *)pdf_open_document_with_stream(ctx, file);
  3236. }
  3237. fz_document_handler pdf_document_handler =
  3238. {
  3239. NULL,
  3240. open_document,
  3241. pdf_extensions,
  3242. pdf_mimetypes,
  3243. pdf_recognize_doc_content
  3244. };
  3245. void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
  3246. {
  3247. int x, e;
  3248. for (x = 0; x < doc->num_xref_sections; x++)
  3249. {
  3250. pdf_xref *xref = &doc->xref_sections[x];
  3251. pdf_xref_subsec *sub;
  3252. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  3253. {
  3254. for (e = 0; e < sub->len; e++)
  3255. {
  3256. pdf_xref_entry *entry = &sub->table[e];
  3257. if (entry->obj)
  3258. {
  3259. entry->marked = 1;
  3260. }
  3261. }
  3262. }
  3263. }
  3264. }
  3265. void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
  3266. {
  3267. int x, e;
  3268. for (x = 0; x < doc->num_xref_sections; x++)
  3269. {
  3270. pdf_xref *xref = &doc->xref_sections[x];
  3271. pdf_xref_subsec *sub;
  3272. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  3273. {
  3274. for (e = 0; e < sub->len; e++)
  3275. {
  3276. pdf_xref_entry *entry = &sub->table[e];
  3277. /* We cannot drop objects if the stream
  3278. * buffer has been updated */
  3279. if (entry->obj != NULL && entry->stm_buf == NULL)
  3280. {
  3281. if (pdf_obj_refs(ctx, entry->obj) == 1)
  3282. {
  3283. pdf_drop_obj(ctx, entry->obj);
  3284. entry->obj = NULL;
  3285. }
  3286. }
  3287. }
  3288. }
  3289. }
  3290. }
  3291. void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
  3292. {
  3293. int x, e;
  3294. for (x = 0; x < doc->num_xref_sections; x++)
  3295. {
  3296. pdf_xref *xref = &doc->xref_sections[x];
  3297. pdf_xref_subsec *sub;
  3298. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  3299. {
  3300. for (e = 0; e < sub->len; e++)
  3301. {
  3302. pdf_xref_entry *entry = &sub->table[e];
  3303. /* We cannot drop objects if the stream buffer has
  3304. * been updated */
  3305. if (entry->obj != NULL && entry->stm_buf == NULL)
  3306. {
  3307. if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == 1)
  3308. {
  3309. pdf_drop_obj(ctx, entry->obj);
  3310. entry->obj = NULL;
  3311. }
  3312. }
  3313. }
  3314. }
  3315. }
  3316. }
  3317. int
  3318. pdf_count_versions(fz_context *ctx, pdf_document *doc)
  3319. {
  3320. return doc->num_xref_sections-doc->num_incremental_sections-doc->has_linearization_object;
  3321. }
  3322. int
  3323. pdf_count_unsaved_versions(fz_context *ctx, pdf_document *doc)
  3324. {
  3325. return doc->num_incremental_sections;
  3326. }
  3327. int
  3328. pdf_doc_was_linearized(fz_context *ctx, pdf_document *doc)
  3329. {
  3330. return doc->has_linearization_object;
  3331. }
  3332. static int pdf_obj_exists(fz_context *ctx, pdf_document *doc, int i)
  3333. {
  3334. pdf_xref_subsec *sub;
  3335. int j;
  3336. if (i < 0)
  3337. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Negative object number requested");
  3338. if (i <= doc->max_xref_len)
  3339. j = doc->xref_index[i];
  3340. else
  3341. j = 0;
  3342. /* We may be accessing an earlier version of the document using xref_base
  3343. * and j may be an index into a later xref section */
  3344. if (doc->xref_base > j)
  3345. j = doc->xref_base;
  3346. /* Find the first xref section where the entry is defined. */
  3347. for (; j < doc->num_xref_sections; j++)
  3348. {
  3349. pdf_xref *xref = &doc->xref_sections[j];
  3350. if (i < xref->num_objects)
  3351. {
  3352. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  3353. {
  3354. if (i < sub->start || i >= sub->start + sub->len)
  3355. continue;
  3356. if (sub->table[i - sub->start].type)
  3357. return 1;
  3358. }
  3359. }
  3360. }
  3361. return 0;
  3362. }
  3363. enum {
  3364. FIELD_CHANGED = 1,
  3365. FIELD_CHANGE_VALID = 2,
  3366. FIELD_CHANGE_INVALID = 4
  3367. };
  3368. typedef struct
  3369. {
  3370. int num_obj;
  3371. int obj_changes[FZ_FLEXIBLE_ARRAY];
  3372. } pdf_changes;
  3373. static int
  3374. check_unchanged_between(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *nobj, pdf_obj *oobj)
  3375. {
  3376. int marked = 0;
  3377. int changed = 0;
  3378. /* Trivially identical => trivially unchanged. */
  3379. if (nobj == oobj)
  3380. return 0;
  3381. /* Strictly speaking we shouldn't need to call fz_var,
  3382. * but I suspect static analysis tools are not smart
  3383. * enough to figure that out. */
  3384. fz_var(marked);
  3385. if (pdf_is_indirect(ctx, nobj))
  3386. {
  3387. int o_xref_base = doc->xref_base;
  3388. /* Both must be indirect if one is. */
  3389. if (!pdf_is_indirect(ctx, oobj))
  3390. {
  3391. changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
  3392. return 1;
  3393. }
  3394. /* Handle recursing back into ourselves. */
  3395. if (pdf_obj_marked(ctx, nobj))
  3396. {
  3397. if (pdf_obj_marked(ctx, oobj))
  3398. return 0;
  3399. changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
  3400. return 1;
  3401. }
  3402. else if (pdf_obj_marked(ctx, oobj))
  3403. {
  3404. changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
  3405. return 1;
  3406. }
  3407. nobj = pdf_resolve_indirect_chain(ctx, nobj);
  3408. doc->xref_base = o_xref_base+1;
  3409. fz_try(ctx)
  3410. {
  3411. oobj = pdf_resolve_indirect_chain(ctx, oobj);
  3412. if (oobj != nobj)
  3413. {
  3414. /* Different objects, so lock them */
  3415. if (!pdf_obj_marked(ctx, nobj) && !pdf_obj_marked(ctx, oobj))
  3416. {
  3417. (void)pdf_mark_obj(ctx, nobj);
  3418. (void)pdf_mark_obj(ctx, oobj);
  3419. marked = 1;
  3420. }
  3421. }
  3422. }
  3423. fz_always(ctx)
  3424. doc->xref_base = o_xref_base;
  3425. fz_catch(ctx)
  3426. fz_rethrow(ctx);
  3427. if (nobj == oobj)
  3428. return 0; /* Trivially identical */
  3429. }
  3430. fz_var(changed);
  3431. fz_try(ctx)
  3432. {
  3433. if (pdf_is_dict(ctx, nobj))
  3434. {
  3435. int i, n = pdf_dict_len(ctx, nobj);
  3436. if (!pdf_is_dict(ctx, oobj) || n != pdf_dict_len(ctx, oobj))
  3437. {
  3438. change_found:
  3439. changes->obj_changes[pdf_to_num(ctx, nobj)] |= FIELD_CHANGE_INVALID;
  3440. changed = 1;
  3441. break;
  3442. }
  3443. for (i = 0; i < n; i++)
  3444. {
  3445. pdf_obj *key = pdf_dict_get_key(ctx, nobj, i);
  3446. pdf_obj *nval = pdf_dict_get(ctx, nobj, key);
  3447. pdf_obj *oval = pdf_dict_get(ctx, oobj, key);
  3448. changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
  3449. }
  3450. }
  3451. else if (pdf_is_array(ctx, nobj))
  3452. {
  3453. int i, n = pdf_array_len(ctx, nobj);
  3454. if (!pdf_is_array(ctx, oobj) || n != pdf_array_len(ctx, oobj))
  3455. goto change_found;
  3456. for (i = 0; i < n; i++)
  3457. {
  3458. pdf_obj *nval = pdf_array_get(ctx, nobj, i);
  3459. pdf_obj *oval = pdf_array_get(ctx, oobj, i);
  3460. changed |= check_unchanged_between(ctx, doc, changes, nval, oval);
  3461. }
  3462. }
  3463. else if (pdf_objcmp(ctx, nobj, oobj))
  3464. goto change_found;
  3465. }
  3466. fz_always(ctx)
  3467. {
  3468. if (marked)
  3469. {
  3470. pdf_unmark_obj(ctx, nobj);
  3471. pdf_unmark_obj(ctx, oobj);
  3472. }
  3473. }
  3474. fz_catch(ctx)
  3475. fz_rethrow(ctx);
  3476. return changed;
  3477. }
  3478. typedef struct
  3479. {
  3480. int max;
  3481. int len;
  3482. char **list;
  3483. } char_list;
  3484. /* This structure is used to hold the definition of which fields
  3485. * are locked. */
  3486. struct pdf_locked_fields
  3487. {
  3488. int p;
  3489. int all;
  3490. char_list includes;
  3491. char_list excludes;
  3492. };
  3493. static void
  3494. free_char_list(fz_context *ctx, char_list *c)
  3495. {
  3496. int i;
  3497. if (c == NULL)
  3498. return;
  3499. for (i = c->len-1; i >= 0; i--)
  3500. fz_free(ctx, c->list[i]);
  3501. fz_free(ctx, c->list);
  3502. c->len = 0;
  3503. c->max = 0;
  3504. }
  3505. void
  3506. pdf_drop_locked_fields(fz_context *ctx, pdf_locked_fields *fl)
  3507. {
  3508. if (fl == NULL)
  3509. return;
  3510. free_char_list(ctx, &fl->includes);
  3511. free_char_list(ctx, &fl->excludes);
  3512. fz_free(ctx, fl);
  3513. }
  3514. static void
  3515. char_list_append(fz_context *ctx, char_list *list, const char *s)
  3516. {
  3517. if (list->len == list->max)
  3518. {
  3519. int n = list->max * 2;
  3520. if (n == 0) n = 4;
  3521. list->list = fz_realloc_array(ctx, list->list, n, char *);
  3522. list->max = n;
  3523. }
  3524. list->list[list->len] = fz_strdup(ctx, s);
  3525. list->len++;
  3526. }
  3527. int
  3528. pdf_is_field_locked(fz_context *ctx, pdf_locked_fields *locked, const char *name)
  3529. {
  3530. int i;
  3531. if (locked->p == 1)
  3532. {
  3533. /* Permissions were set, and say that field changes are not to be allowed. */
  3534. return 1; /* Locked */
  3535. }
  3536. if(locked->all)
  3537. {
  3538. /* The only way we might not be unlocked is if
  3539. * we are listed in the excludes. */
  3540. for (i = 0; i < locked->excludes.len; i++)
  3541. if (!strcmp(locked->excludes.list[i], name))
  3542. return 0;
  3543. return 1;
  3544. }
  3545. /* The only way we can be locked is for us to be in the includes. */
  3546. for (i = 0; i < locked->includes.len; i++)
  3547. if (strcmp(locked->includes.list[i], name) == 0)
  3548. return 1;
  3549. /* Anything else is unlocked */
  3550. return 0;
  3551. }
  3552. /* Unfortunately, in C, there is no legal way to define a function
  3553. * type that returns itself. We therefore have to use a struct
  3554. * wrapper. */
  3555. typedef struct filter_wrap
  3556. {
  3557. struct filter_wrap (*func)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
  3558. } filter_wrap;
  3559. typedef struct filter_wrap (*filter_fn)(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
  3560. #define RETURN_FILTER(f) { filter_wrap rf; rf.func = (f); return rf; }
  3561. static filter_wrap filter_simple(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3562. {
  3563. RETURN_FILTER(NULL);
  3564. }
  3565. static filter_wrap filter_transformparams(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3566. {
  3567. if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
  3568. pdf_name_eq(ctx, key, PDF_NAME(P)) ||
  3569. pdf_name_eq(ctx, key, PDF_NAME(V)) ||
  3570. pdf_name_eq(ctx, key, PDF_NAME(Document)) ||
  3571. pdf_name_eq(ctx, key, PDF_NAME(Msg)) ||
  3572. pdf_name_eq(ctx, key, PDF_NAME(V)) ||
  3573. pdf_name_eq(ctx, key, PDF_NAME(Annots)) ||
  3574. pdf_name_eq(ctx, key, PDF_NAME(Form)) ||
  3575. pdf_name_eq(ctx, key, PDF_NAME(FormEx)) ||
  3576. pdf_name_eq(ctx, key, PDF_NAME(EF)) ||
  3577. pdf_name_eq(ctx, key, PDF_NAME(P)) ||
  3578. pdf_name_eq(ctx, key, PDF_NAME(Action)) ||
  3579. pdf_name_eq(ctx, key, PDF_NAME(Fields)))
  3580. RETURN_FILTER(&filter_simple);
  3581. RETURN_FILTER(NULL);
  3582. }
  3583. static filter_wrap filter_reference(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3584. {
  3585. if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
  3586. pdf_name_eq(ctx, key, PDF_NAME(TransformMethod)) ||
  3587. pdf_name_eq(ctx, key, PDF_NAME(DigestMethod)) ||
  3588. pdf_name_eq(ctx, key, PDF_NAME(DigestValue)) ||
  3589. pdf_name_eq(ctx, key, PDF_NAME(DigestLocation)))
  3590. RETURN_FILTER(&filter_simple);
  3591. if (pdf_name_eq(ctx, key, PDF_NAME(TransformParams)))
  3592. RETURN_FILTER(&filter_transformparams);
  3593. RETURN_FILTER(NULL);
  3594. }
  3595. static filter_wrap filter_prop_build_sub(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3596. {
  3597. if (pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
  3598. pdf_name_eq(ctx, key, PDF_NAME(Date)) ||
  3599. pdf_name_eq(ctx, key, PDF_NAME(R)) ||
  3600. pdf_name_eq(ctx, key, PDF_NAME(PreRelease)) ||
  3601. pdf_name_eq(ctx, key, PDF_NAME(OS)) ||
  3602. pdf_name_eq(ctx, key, PDF_NAME(NonEFontNoWarn)) ||
  3603. pdf_name_eq(ctx, key, PDF_NAME(TrustedMode)) ||
  3604. pdf_name_eq(ctx, key, PDF_NAME(V)) ||
  3605. pdf_name_eq(ctx, key, PDF_NAME(REx)) ||
  3606. pdf_name_eq(ctx, key, PDF_NAME(Preview)))
  3607. RETURN_FILTER(&filter_simple);
  3608. RETURN_FILTER(NULL);
  3609. }
  3610. static filter_wrap filter_prop_build(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3611. {
  3612. if (pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
  3613. pdf_name_eq(ctx, key, PDF_NAME(PubSec)) ||
  3614. pdf_name_eq(ctx, key, PDF_NAME(App)) ||
  3615. pdf_name_eq(ctx, key, PDF_NAME(SigQ)))
  3616. RETURN_FILTER(&filter_prop_build_sub);
  3617. RETURN_FILTER(NULL);
  3618. }
  3619. static filter_wrap filter_v(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3620. {
  3621. /* Text can point to a stream object */
  3622. if (pdf_name_eq(ctx, key, PDF_NAME(Length)) && pdf_is_stream(ctx, dict))
  3623. RETURN_FILTER(&filter_simple);
  3624. /* Sigs point to a dict. */
  3625. if (pdf_name_eq(ctx, key, PDF_NAME(Type)) ||
  3626. pdf_name_eq(ctx, key, PDF_NAME(Filter)) ||
  3627. pdf_name_eq(ctx, key, PDF_NAME(SubFilter)) ||
  3628. pdf_name_eq(ctx, key, PDF_NAME(Contents)) ||
  3629. pdf_name_eq(ctx, key, PDF_NAME(Cert)) ||
  3630. pdf_name_eq(ctx, key, PDF_NAME(ByteRange)) ||
  3631. pdf_name_eq(ctx, key, PDF_NAME(Changes)) ||
  3632. pdf_name_eq(ctx, key, PDF_NAME(Name)) ||
  3633. pdf_name_eq(ctx, key, PDF_NAME(M)) ||
  3634. pdf_name_eq(ctx, key, PDF_NAME(Location)) ||
  3635. pdf_name_eq(ctx, key, PDF_NAME(Reason)) ||
  3636. pdf_name_eq(ctx, key, PDF_NAME(ContactInfo)) ||
  3637. pdf_name_eq(ctx, key, PDF_NAME(R)) ||
  3638. pdf_name_eq(ctx, key, PDF_NAME(V)) ||
  3639. pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthTime)) ||
  3640. pdf_name_eq(ctx, key, PDF_NAME(Prop_AuthType)))
  3641. RETURN_FILTER(&filter_simple);
  3642. if (pdf_name_eq(ctx, key, PDF_NAME(Reference)))
  3643. RETURN_FILTER(filter_reference);
  3644. if (pdf_name_eq(ctx, key, PDF_NAME(Prop_Build)))
  3645. RETURN_FILTER(filter_prop_build);
  3646. RETURN_FILTER(NULL);
  3647. }
  3648. static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key);
  3649. static filter_wrap filter_xobject_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3650. {
  3651. /* FIXME: Infinite recursion possible here? */
  3652. RETURN_FILTER(&filter_appearance);
  3653. }
  3654. static filter_wrap filter_font(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3655. {
  3656. /* In the example I've seen the /Name field was dropped, so we'll allow
  3657. * local changes, but none that follow an indirection. */
  3658. RETURN_FILTER(NULL);
  3659. }
  3660. /* FIXME: One idea here is to make filter_font_list and filter_xobject_list
  3661. * only accept NEW objects as changes. Will think about this. */
  3662. static filter_wrap filter_font_list(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3663. {
  3664. RETURN_FILTER(&filter_font);
  3665. }
  3666. static filter_wrap filter_resources(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3667. {
  3668. if (pdf_name_eq(ctx, key, PDF_NAME(XObject)))
  3669. RETURN_FILTER(&filter_xobject_list);
  3670. if (pdf_name_eq(ctx, key, PDF_NAME(Font)))
  3671. RETURN_FILTER(&filter_font_list);
  3672. RETURN_FILTER(NULL);
  3673. }
  3674. static filter_wrap filter_appearance(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3675. {
  3676. if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
  3677. RETURN_FILTER(&filter_resources);
  3678. RETURN_FILTER(NULL);
  3679. }
  3680. static filter_wrap filter_ap(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3681. {
  3682. /* Just the /N entry for now. May need to add more later. */
  3683. if (pdf_name_eq(ctx, key, PDF_NAME(N)) && pdf_is_stream(ctx, pdf_dict_get(ctx, dict, key)))
  3684. RETURN_FILTER(&filter_appearance);
  3685. RETURN_FILTER(NULL);
  3686. }
  3687. static filter_wrap filter_xfa(fz_context *ctx, pdf_obj *dict, pdf_obj *key)
  3688. {
  3689. /* Text can point to a stream object */
  3690. if (pdf_is_stream(ctx, dict))
  3691. RETURN_FILTER(&filter_simple);
  3692. RETURN_FILTER(NULL);
  3693. }
  3694. static void
  3695. filter_changes_accepted(fz_context *ctx, pdf_changes *changes, pdf_obj *obj, filter_fn filter)
  3696. {
  3697. int obj_num;
  3698. if (obj == NULL || pdf_obj_marked(ctx, obj))
  3699. return;
  3700. obj_num = pdf_to_num(ctx, obj);
  3701. fz_try(ctx)
  3702. {
  3703. if (obj_num != 0)
  3704. {
  3705. (void)pdf_mark_obj(ctx, obj);
  3706. changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
  3707. }
  3708. if (filter == NULL)
  3709. break;
  3710. if (pdf_is_dict(ctx, obj))
  3711. {
  3712. int i, n = pdf_dict_len(ctx, obj);
  3713. for (i = 0; i < n; i++)
  3714. {
  3715. pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
  3716. pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
  3717. filter_fn f = (filter(ctx, obj, key)).func;
  3718. if (f != NULL)
  3719. filter_changes_accepted(ctx, changes, val, f);
  3720. }
  3721. }
  3722. else if (pdf_is_array(ctx, obj))
  3723. {
  3724. int i, n = pdf_array_len(ctx, obj);
  3725. for (i = 0; i < n; i++)
  3726. {
  3727. pdf_obj *val = pdf_array_get(ctx, obj, i);
  3728. filter_changes_accepted(ctx, changes, val, filter);
  3729. }
  3730. }
  3731. }
  3732. fz_always(ctx)
  3733. if (obj_num != 0)
  3734. pdf_unmark_obj(ctx, obj);
  3735. fz_catch(ctx)
  3736. fz_rethrow(ctx);
  3737. }
  3738. static void
  3739. check_field(fz_context *ctx, pdf_document *doc, pdf_changes *changes, pdf_obj *obj, pdf_locked_fields *locked, const char *name_prefix, pdf_obj *new_v, pdf_obj *old_v)
  3740. {
  3741. pdf_obj *old_obj, *new_obj, *n_v, *o_v;
  3742. int o_xref_base;
  3743. int obj_num;
  3744. char *field_name = NULL;
  3745. /* All fields MUST be indirections, either in the Fields array
  3746. * or AcroForms, or in the Kids array of other Fields. */
  3747. if (!pdf_is_indirect(ctx, obj))
  3748. return;
  3749. obj_num = pdf_to_num(ctx, obj);
  3750. o_xref_base = doc->xref_base;
  3751. new_obj = pdf_resolve_indirect_chain(ctx, obj);
  3752. /* Similarly, all fields must be dicts */
  3753. if (!pdf_is_dict(ctx, new_obj))
  3754. return;
  3755. if (pdf_obj_marked(ctx, obj))
  3756. return;
  3757. fz_var(field_name);
  3758. fz_try(ctx)
  3759. {
  3760. int i, len;
  3761. const char *name;
  3762. size_t n;
  3763. pdf_obj *t;
  3764. int is_locked;
  3765. (void)pdf_mark_obj(ctx, obj);
  3766. /* Do this within the try, so we can catch any problems */
  3767. doc->xref_base = o_xref_base+1;
  3768. old_obj = pdf_resolve_indirect_chain(ctx, obj);
  3769. t = pdf_dict_get(ctx, old_obj, PDF_NAME(T));
  3770. if (t != NULL)
  3771. {
  3772. name = pdf_dict_get_text_string(ctx, old_obj, PDF_NAME(T));
  3773. n = strlen(name)+1;
  3774. if (*name_prefix)
  3775. n += 1 + strlen(name_prefix);
  3776. field_name = fz_malloc(ctx, n);
  3777. if (*name_prefix)
  3778. {
  3779. strcpy(field_name, name_prefix);
  3780. strcat(field_name, ".");
  3781. }
  3782. else
  3783. *field_name = 0;
  3784. strcat(field_name, name);
  3785. name_prefix = field_name;
  3786. }
  3787. doc->xref_base = o_xref_base;
  3788. if (!pdf_is_dict(ctx, old_obj))
  3789. break;
  3790. /* Check V explicitly, allowing for it being inherited. */
  3791. n_v = pdf_dict_get(ctx, new_obj, PDF_NAME(V));
  3792. if (n_v == NULL)
  3793. n_v = new_v;
  3794. o_v = pdf_dict_get(ctx, old_obj, PDF_NAME(V));
  3795. if (o_v == NULL)
  3796. o_v = old_v;
  3797. is_locked = pdf_is_field_locked(ctx, locked, name_prefix);
  3798. if (pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Type)), PDF_NAME(Annot)) &&
  3799. pdf_name_eq(ctx, pdf_dict_get(ctx, new_obj, PDF_NAME(Subtype)), PDF_NAME(Widget)))
  3800. {
  3801. if (is_locked)
  3802. {
  3803. /* If locked, V must not change! */
  3804. if (check_unchanged_between(ctx, doc, changes, n_v, o_v))
  3805. changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
  3806. }
  3807. else
  3808. {
  3809. /* If not locked, V can change to be filled in! */
  3810. filter_changes_accepted(ctx, changes, n_v, &filter_v);
  3811. changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
  3812. }
  3813. }
  3814. /* Check all the fields in the new object are
  3815. * either the same as the old object, or are
  3816. * expected changes. */
  3817. len = pdf_dict_len(ctx, new_obj);
  3818. for (i = 0; i < len; i++)
  3819. {
  3820. pdf_obj *key = pdf_dict_get_key(ctx, new_obj, i);
  3821. pdf_obj *nval = pdf_dict_get(ctx, new_obj, key);
  3822. pdf_obj *oval = pdf_dict_get(ctx, old_obj, key);
  3823. /* Kids arrays shouldn't change. */
  3824. if (pdf_name_eq(ctx, key, PDF_NAME(Kids)))
  3825. {
  3826. int j, m;
  3827. /* Kids must be an array. If it's not, count it as a difference. */
  3828. if (!pdf_is_array(ctx, nval) || !pdf_is_array(ctx, oval))
  3829. {
  3830. change_found:
  3831. changes->obj_changes[obj_num] |= FIELD_CHANGE_INVALID;
  3832. break;
  3833. }
  3834. m = pdf_array_len(ctx, nval);
  3835. /* Any change in length counts as a difference */
  3836. if (m != pdf_array_len(ctx, oval))
  3837. goto change_found;
  3838. for (j = 0; j < m; j++)
  3839. {
  3840. pdf_obj *nkid = pdf_array_get(ctx, nval, j);
  3841. pdf_obj *okid = pdf_array_get(ctx, oval, j);
  3842. /* Kids arrays are supposed to all be indirect. If they aren't,
  3843. * count it as a difference. */
  3844. if (!pdf_is_indirect(ctx, nkid) || !pdf_is_indirect(ctx, okid))
  3845. goto change_found;
  3846. /* For now at least, we'll count any change in number as a difference. */
  3847. if (pdf_to_num(ctx, nkid) != pdf_to_num(ctx, okid))
  3848. goto change_found;
  3849. check_field(ctx, doc, changes, nkid, locked, name_prefix, n_v, o_v);
  3850. }
  3851. }
  3852. else if (pdf_name_eq(ctx, key, PDF_NAME(V)))
  3853. {
  3854. /* V is checked above */
  3855. }
  3856. else if (pdf_name_eq(ctx, key, PDF_NAME(AP)))
  3857. {
  3858. /* If we're locked, then nothing can change. If not,
  3859. * we can change to be filled in. */
  3860. if (is_locked)
  3861. check_unchanged_between(ctx, doc, changes, nval, oval);
  3862. else
  3863. filter_changes_accepted(ctx, changes, nval, &filter_ap);
  3864. }
  3865. /* All other fields can't change */
  3866. else
  3867. check_unchanged_between(ctx, doc, changes, nval, oval);
  3868. }
  3869. /* Now check all the fields in the old object to
  3870. * make sure none were dropped. */
  3871. len = pdf_dict_len(ctx, old_obj);
  3872. for (i = 0; i < len; i++)
  3873. {
  3874. pdf_obj *key = pdf_dict_get_key(ctx, old_obj, i);
  3875. pdf_obj *nval, *oval;
  3876. /* V is checked above */
  3877. if (pdf_name_eq(ctx, key, PDF_NAME(V)))
  3878. continue;
  3879. nval = pdf_dict_get(ctx, new_obj, key);
  3880. oval = pdf_dict_get(ctx, old_obj, key);
  3881. if (nval == NULL && oval != NULL)
  3882. changes->obj_changes[pdf_to_num(ctx, nval)] |= FIELD_CHANGE_INVALID;
  3883. }
  3884. changes->obj_changes[obj_num] |= FIELD_CHANGE_VALID;
  3885. }
  3886. fz_always(ctx)
  3887. {
  3888. pdf_unmark_obj(ctx, obj);
  3889. fz_free(ctx, field_name);
  3890. doc->xref_base = o_xref_base;
  3891. }
  3892. fz_catch(ctx)
  3893. fz_rethrow(ctx);
  3894. }
  3895. static int
  3896. pdf_obj_changed_in_version(fz_context *ctx, pdf_document *doc, int num, int version)
  3897. {
  3898. if (num < 0 || num > doc->max_xref_len)
  3899. fz_throw(ctx, FZ_ERROR_ARGUMENT, "Invalid object number requested");
  3900. return version == doc->xref_index[num];
  3901. }
  3902. static void
  3903. merge_lock_specification(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *lock)
  3904. {
  3905. pdf_obj *action;
  3906. int i, r, w;
  3907. if (lock == NULL)
  3908. return;
  3909. action = pdf_dict_get(ctx, lock, PDF_NAME(Action));
  3910. if (pdf_name_eq(ctx, action, PDF_NAME(All)))
  3911. {
  3912. /* All fields locked means we don't need any stored
  3913. * includes/excludes. */
  3914. fields->all = 1;
  3915. free_char_list(ctx, &fields->includes);
  3916. free_char_list(ctx, &fields->excludes);
  3917. }
  3918. else
  3919. {
  3920. pdf_obj *f = pdf_dict_get(ctx, lock, PDF_NAME(Fields));
  3921. int len = pdf_array_len(ctx, f);
  3922. if (pdf_name_eq(ctx, action, PDF_NAME(Include)))
  3923. {
  3924. if (fields->all)
  3925. {
  3926. /* Current state = "All except <excludes> are locked".
  3927. * We need to remove <Fields> from <excludes>. */
  3928. for (i = 0; i < len; i++)
  3929. {
  3930. const char *s = pdf_array_get_text_string(ctx, f, i);
  3931. for (r = w = 0; r < fields->excludes.len; r++)
  3932. {
  3933. if (strcmp(s, fields->excludes.list[r]))
  3934. fields->excludes.list[w++] = fields->excludes.list[r];
  3935. }
  3936. fields->excludes.len = w;
  3937. }
  3938. }
  3939. else
  3940. {
  3941. /* Current state = <includes> are locked.
  3942. * We need to add <Fields> to <include> (avoiding repetition). */
  3943. for (i = 0; i < len; i++)
  3944. {
  3945. const char *s = pdf_array_get_text_string(ctx, f, i);
  3946. for (r = 0; r < fields->includes.len; r++)
  3947. {
  3948. if (!strcmp(s, fields->includes.list[r]))
  3949. break;
  3950. }
  3951. if (r == fields->includes.len)
  3952. char_list_append(ctx, &fields->includes, s);
  3953. }
  3954. }
  3955. }
  3956. else if (pdf_name_eq(ctx, action, PDF_NAME(Exclude)))
  3957. {
  3958. if (fields->all)
  3959. {
  3960. /* Current state = "All except <excludes> are locked.
  3961. * We need to remove anything from <excludes> that isn't in <Fields>. */
  3962. for (r = w = 0; r < fields->excludes.len; r++)
  3963. {
  3964. for (i = 0; i < len; i++)
  3965. {
  3966. const char *s = pdf_array_get_text_string(ctx, f, i);
  3967. if (!strcmp(s, fields->excludes.list[r]))
  3968. break;
  3969. }
  3970. if (i != len) /* we found a match */
  3971. fields->excludes.list[w++] = fields->excludes.list[r];
  3972. }
  3973. fields->excludes.len = w;
  3974. }
  3975. else
  3976. {
  3977. /* Current state = <includes> are locked.
  3978. * Set all. <excludes> becomes <Fields> less <includes>. Remove <includes>. */
  3979. fields->all = 1;
  3980. for (i = 0; i < len; i++)
  3981. {
  3982. const char *s = pdf_array_get_text_string(ctx, f, i);
  3983. for (r = 0; r < fields->includes.len; r++)
  3984. {
  3985. if (!strcmp(s, fields->includes.list[r]))
  3986. break;
  3987. }
  3988. if (r == fields->includes.len)
  3989. char_list_append(ctx, &fields->excludes, s);
  3990. }
  3991. free_char_list(ctx, &fields->includes);
  3992. }
  3993. }
  3994. }
  3995. }
  3996. static void
  3997. find_locked_fields_value(fz_context *ctx, pdf_locked_fields *fields, pdf_obj *v)
  3998. {
  3999. pdf_obj *ref = pdf_dict_get(ctx, v, PDF_NAME(Reference));
  4000. int i, n;
  4001. if (!ref)
  4002. return;
  4003. n = pdf_array_len(ctx, ref);
  4004. for (i = 0; i < n; i++)
  4005. {
  4006. pdf_obj *sr = pdf_array_get(ctx, ref, i);
  4007. pdf_obj *tm, *tp, *type;
  4008. /* Type is optional, but if it exists, it'd better be SigRef. */
  4009. type = pdf_dict_get(ctx, sr, PDF_NAME(Type));
  4010. if (type != NULL && !pdf_name_eq(ctx, type, PDF_NAME(SigRef)))
  4011. continue;
  4012. tm = pdf_dict_get(ctx, sr, PDF_NAME(TransformMethod));
  4013. tp = pdf_dict_get(ctx, sr, PDF_NAME(TransformParams));
  4014. if (pdf_name_eq(ctx, tm, PDF_NAME(DocMDP)))
  4015. {
  4016. int p = pdf_dict_get_int(ctx, tp, PDF_NAME(P));
  4017. if (p == 0)
  4018. p = 2;
  4019. if (fields->p == 0)
  4020. fields->p = p;
  4021. else
  4022. fields->p = fz_mini(fields->p, p);
  4023. }
  4024. else if (pdf_name_eq(ctx, tm, PDF_NAME(FieldMDP)))
  4025. merge_lock_specification(ctx, fields, tp);
  4026. }
  4027. }
  4028. static void
  4029. find_locked_fields_aux(fz_context *ctx, pdf_obj *field, pdf_locked_fields *fields, pdf_obj *inherit_v, pdf_obj *inherit_ft)
  4030. {
  4031. int i, n;
  4032. if (!pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Type)), PDF_NAME(Annot)))
  4033. return;
  4034. if (pdf_obj_marked(ctx, field))
  4035. return;
  4036. fz_try(ctx)
  4037. {
  4038. pdf_obj *kids, *v, *ft;
  4039. (void)pdf_mark_obj(ctx, field);
  4040. v = pdf_dict_get(ctx, field, PDF_NAME(V));
  4041. if (v == NULL)
  4042. v = inherit_v;
  4043. ft = pdf_dict_get(ctx, field, PDF_NAME(FT));
  4044. if (ft == NULL)
  4045. ft = inherit_ft;
  4046. /* We are looking for Widget annotations of type Sig that are
  4047. * signed (i.e. have a 'V' field). */
  4048. if (pdf_name_eq(ctx, pdf_dict_get(ctx, field, PDF_NAME(Subtype)), PDF_NAME(Widget)) &&
  4049. pdf_name_eq(ctx, ft, PDF_NAME(Sig)) &&
  4050. pdf_name_eq(ctx, pdf_dict_get(ctx, v, PDF_NAME(Type)), PDF_NAME(Sig)))
  4051. {
  4052. /* Signed Sig Widgets (i.e. ones with a 'V' field) need
  4053. * to have their lock field respected. */
  4054. merge_lock_specification(ctx, fields, pdf_dict_get(ctx, field, PDF_NAME(Lock)));
  4055. /* Look for DocMDP and FieldMDP entries to see what
  4056. * flavours of alterations are allowed. */
  4057. find_locked_fields_value(ctx, fields, v);
  4058. }
  4059. /* Recurse as required */
  4060. kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
  4061. if (kids)
  4062. {
  4063. n = pdf_array_len(ctx, kids);
  4064. for (i = 0; i < n; i++)
  4065. find_locked_fields_aux(ctx, pdf_array_get(ctx, kids, i), fields, v, ft);
  4066. }
  4067. }
  4068. fz_always(ctx)
  4069. pdf_unmark_obj(ctx, field);
  4070. fz_catch(ctx)
  4071. fz_rethrow(ctx);
  4072. }
  4073. pdf_locked_fields *
  4074. pdf_find_locked_fields(fz_context *ctx, pdf_document *doc, int version)
  4075. {
  4076. pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
  4077. int o_xref_base = doc->xref_base;
  4078. doc->xref_base = version;
  4079. fz_var(fields);
  4080. fz_try(ctx)
  4081. {
  4082. pdf_obj *fobj = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
  4083. int i, len = pdf_array_len(ctx, fobj);
  4084. if (len == 0)
  4085. break;
  4086. for (i = 0; i < len; i++)
  4087. find_locked_fields_aux(ctx, pdf_array_get(ctx, fobj, i), fields, NULL, NULL);
  4088. /* Add in any DocMDP referenced directly from the Perms dict. */
  4089. find_locked_fields_value(ctx, fields, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Perms/DocMDP"));
  4090. }
  4091. fz_always(ctx)
  4092. doc->xref_base = o_xref_base;
  4093. fz_catch(ctx)
  4094. {
  4095. pdf_drop_locked_fields(ctx, fields);
  4096. fz_rethrow(ctx);
  4097. }
  4098. return fields;
  4099. }
  4100. pdf_locked_fields *
  4101. pdf_find_locked_fields_for_sig(fz_context *ctx, pdf_document *doc, pdf_obj *sig)
  4102. {
  4103. pdf_locked_fields *fields = fz_malloc_struct(ctx, pdf_locked_fields);
  4104. fz_var(fields);
  4105. fz_try(ctx)
  4106. {
  4107. pdf_obj *ref;
  4108. int i, len;
  4109. /* Ensure it really is a sig */
  4110. if (!pdf_name_eq(ctx, pdf_dict_get(ctx, sig, PDF_NAME(Subtype)), PDF_NAME(Widget)) ||
  4111. !pdf_name_eq(ctx, pdf_dict_get_inheritable(ctx, sig, PDF_NAME(FT)), PDF_NAME(Sig)))
  4112. break;
  4113. /* Check the locking details given in the V (i.e. what the signature value
  4114. * claims to lock). */
  4115. ref = pdf_dict_getp(ctx, sig, "V/Reference");
  4116. len = pdf_array_len(ctx, ref);
  4117. for (i = 0; i < len; i++)
  4118. {
  4119. pdf_obj *tp = pdf_dict_get(ctx, pdf_array_get(ctx, ref, i), PDF_NAME(TransformParams));
  4120. merge_lock_specification(ctx, fields, tp);
  4121. }
  4122. /* Also, check the locking details given in the Signature definition. This may
  4123. * not strictly be necessary as it's supposed to be "what the form author told
  4124. * the signature that it should lock". A well-formed signature should lock
  4125. * at least that much (possibly with extra fields locked from the XFA). If the
  4126. * signature doesn't lock as much as it was told to, we should be suspicious
  4127. * of the signing application. It is not clear that this test is actually
  4128. * necessary, or in keeping with what Acrobat does. */
  4129. merge_lock_specification(ctx, fields, pdf_dict_get(ctx, sig, PDF_NAME(Lock)));
  4130. }
  4131. fz_catch(ctx)
  4132. {
  4133. pdf_drop_locked_fields(ctx, fields);
  4134. fz_rethrow(ctx);
  4135. }
  4136. return fields;
  4137. }
  4138. static int
  4139. validate_locked_fields(fz_context *ctx, pdf_document *doc, int version, pdf_locked_fields *locked)
  4140. {
  4141. int o_xref_base = doc->xref_base;
  4142. pdf_changes *changes;
  4143. int num_objs;
  4144. int i, n;
  4145. int all_indirects = 1;
  4146. num_objs = doc->max_xref_len;
  4147. changes = fz_malloc_flexible(ctx, pdf_changes, obj_changes, num_objs);
  4148. changes->num_obj = num_objs;
  4149. fz_try(ctx)
  4150. {
  4151. pdf_obj *acroform, *new_acroform, *old_acroform;
  4152. int len, acroform_num;
  4153. doc->xref_base = version;
  4154. /* Detect every object that has changed */
  4155. for (i = 1; i < num_objs; i++)
  4156. {
  4157. if (pdf_obj_changed_in_version(ctx, doc, i, version))
  4158. changes->obj_changes[i] = FIELD_CHANGED;
  4159. }
  4160. /* FIXME: Compare PageTrees and NumberTrees (just to allow for them being regenerated
  4161. * and having produced stuff that represents the same stuff). */
  4162. /* The metadata of a document may be regenerated. Allow for that. */
  4163. filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Metadata"), &filter_simple);
  4164. /* The ModDate of document info may be regenerated. Allow for that. */
  4165. /* FIXME: We accept all changes in document info, when maybe we ought to just
  4166. * accept ModDate? */
  4167. filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Info"), &filter_simple);
  4168. /* The Encryption dict may be rewritten for the new Xref. */
  4169. filter_changes_accepted(ctx, changes, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Encrypt"), &filter_simple);
  4170. /* We have to accept certain changes in the top level AcroForms dict,
  4171. * so get the 2 versions... */
  4172. acroform = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
  4173. acroform_num = pdf_to_num(ctx, acroform);
  4174. new_acroform = pdf_resolve_indirect_chain(ctx, acroform);
  4175. doc->xref_base = version+1;
  4176. old_acroform = pdf_resolve_indirect_chain(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm"));
  4177. doc->xref_base = version;
  4178. n = pdf_dict_len(ctx, new_acroform);
  4179. for (i = 0; i < n; i++)
  4180. {
  4181. pdf_obj *key = pdf_dict_get_key(ctx, new_acroform, i);
  4182. pdf_obj *nval = pdf_dict_get(ctx, new_acroform, key);
  4183. pdf_obj *oval = pdf_dict_get(ctx, old_acroform, key);
  4184. if (pdf_name_eq(ctx, key, PDF_NAME(Fields)))
  4185. {
  4186. int j;
  4187. len = pdf_array_len(ctx, nval);
  4188. for (j = 0; j < len; j++)
  4189. {
  4190. pdf_obj *field = pdf_array_get(ctx, nval, j);
  4191. if (!pdf_is_indirect(ctx, field))
  4192. all_indirects = 0;
  4193. check_field(ctx, doc, changes, field, locked, "", NULL, NULL);
  4194. }
  4195. }
  4196. else if (pdf_name_eq(ctx, key, PDF_NAME(SigFlags)))
  4197. {
  4198. /* Accept this */
  4199. changes->obj_changes[acroform_num] |= FIELD_CHANGE_VALID;
  4200. }
  4201. else if (pdf_name_eq(ctx, key, PDF_NAME(DR)))
  4202. {
  4203. /* Accept any changes from within the Document Resources */
  4204. filter_changes_accepted(ctx, changes, nval, &filter_resources);
  4205. }
  4206. else if (pdf_name_eq(ctx, key, PDF_NAME(XFA)))
  4207. {
  4208. /* Allow any changes within the XFA streams. */
  4209. filter_changes_accepted(ctx, changes, nval, &filter_xfa);
  4210. }
  4211. else if (pdf_objcmp(ctx, nval, oval))
  4212. {
  4213. changes->obj_changes[acroform_num] |= FIELD_CHANGE_INVALID;
  4214. }
  4215. }
  4216. /* Allow for any object streams/XRefs to be changed. */
  4217. doc->xref_base = version+1;
  4218. for (i = 1; i < num_objs; i++)
  4219. {
  4220. pdf_obj *oobj, *otype;
  4221. if (changes->obj_changes[i] != FIELD_CHANGED)
  4222. continue;
  4223. if (!pdf_obj_exists(ctx, doc, i))
  4224. {
  4225. /* Not present this version - must be newly created, can't be a change. */
  4226. changes->obj_changes[i] |= FIELD_CHANGE_VALID;
  4227. continue;
  4228. }
  4229. oobj = pdf_load_object(ctx, doc, i);
  4230. otype = pdf_dict_get(ctx, oobj, PDF_NAME(Type));
  4231. if (pdf_name_eq(ctx, otype, PDF_NAME(ObjStm)) ||
  4232. pdf_name_eq(ctx, otype, PDF_NAME(XRef)))
  4233. {
  4234. changes->obj_changes[i] |= FIELD_CHANGE_VALID;
  4235. }
  4236. pdf_drop_obj(ctx, oobj);
  4237. }
  4238. }
  4239. fz_always(ctx)
  4240. doc->xref_base = o_xref_base;
  4241. fz_catch(ctx)
  4242. {
  4243. fz_free(ctx, changes);
  4244. fz_rethrow(ctx);
  4245. }
  4246. for (i = 1; i < num_objs; i++)
  4247. {
  4248. if (changes->obj_changes[i] == FIELD_CHANGED)
  4249. /* Change with no reason */
  4250. break;
  4251. if (changes->obj_changes[i] & FIELD_CHANGE_INVALID)
  4252. /* Illegal Change */
  4253. break;
  4254. }
  4255. fz_free(ctx, changes);
  4256. return (i == num_objs) && all_indirects;
  4257. }
  4258. int
  4259. pdf_validate_changes(fz_context *ctx, pdf_document *doc, int version)
  4260. {
  4261. int unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
  4262. int n = pdf_count_versions(ctx, doc);
  4263. pdf_locked_fields *locked = NULL;
  4264. int result;
  4265. if (version < 0 || version >= n)
  4266. fz_throw(ctx, FZ_ERROR_ARGUMENT, "There aren't that many changes to find in this document!");
  4267. /* We are wanting to compare version+1 with version to make sure
  4268. * that the only changes made in going to version are conformant
  4269. * with what was allowed in version+1. The production of version
  4270. * might have involved signing a signature field and locking down
  4271. * more fields - this means that taking the list of locked things
  4272. * from version rather than version+1 will give us bad results! */
  4273. locked = pdf_find_locked_fields(ctx, doc, unsaved_versions+version+1);
  4274. fz_try(ctx)
  4275. {
  4276. if (!locked->all && locked->includes.len == 0 && locked->p == 0)
  4277. {
  4278. /* If nothing is locked at all, then all changes are permissible. */
  4279. result = 1;
  4280. }
  4281. else
  4282. result = validate_locked_fields(ctx, doc, unsaved_versions+version, locked);
  4283. }
  4284. fz_always(ctx)
  4285. pdf_drop_locked_fields(ctx, locked);
  4286. fz_catch(ctx)
  4287. fz_rethrow(ctx);
  4288. return result;
  4289. }
  4290. int
  4291. pdf_validate_change_history(fz_context *ctx, pdf_document *doc)
  4292. {
  4293. int num_versions = pdf_count_versions(ctx, doc);
  4294. int v;
  4295. if (num_versions < 2)
  4296. return 0; /* Unless there are at least 2 versions, there have been no updates. */
  4297. for(v = num_versions - 2; v >= 0; v--)
  4298. {
  4299. if (!pdf_validate_changes(ctx, doc, v))
  4300. return v+1;
  4301. }
  4302. return 0;
  4303. }
  4304. /* Return the version that obj appears in, or -1 for not found. */
  4305. static int
  4306. pdf_find_incremental_update_num_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
  4307. {
  4308. pdf_xref *xref = NULL;
  4309. pdf_xref_subsec *sub;
  4310. int i, j;
  4311. if (obj == NULL)
  4312. return -1;
  4313. /* obj needs to be indirect for us to get a num out of it. */
  4314. i = pdf_to_num(ctx, obj);
  4315. if (i <= 0)
  4316. return -1;
  4317. /* obj can't be indirect below, so resolve it here. */
  4318. obj = pdf_resolve_indirect_chain(ctx, obj);
  4319. /* Find the first xref section where the entry is defined. */
  4320. for (j = 0; j < doc->num_xref_sections; j++)
  4321. {
  4322. xref = &doc->xref_sections[j];
  4323. if (i < xref->num_objects)
  4324. {
  4325. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  4326. {
  4327. pdf_xref_entry *entry;
  4328. if (i < sub->start || i >= sub->start + sub->len)
  4329. continue;
  4330. entry = &sub->table[i - sub->start];
  4331. if (entry->obj == obj)
  4332. return j;
  4333. }
  4334. }
  4335. }
  4336. return -1;
  4337. }
  4338. int pdf_find_version_for_obj(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
  4339. {
  4340. int v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
  4341. int n;
  4342. if (v == -1)
  4343. return -1;
  4344. n = pdf_count_versions(ctx, doc) + pdf_count_unsaved_versions(ctx, doc);
  4345. if (v > n)
  4346. return n;
  4347. return v;
  4348. }
  4349. int pdf_validate_signature(fz_context *ctx, pdf_annot *widget)
  4350. {
  4351. pdf_document *doc;
  4352. int unsaved_versions, num_versions, version, i;
  4353. pdf_locked_fields *locked = NULL;
  4354. int o_xref_base;
  4355. if (!widget->page)
  4356. fz_throw(ctx, FZ_ERROR_ARGUMENT, "annotation not bound to any page");
  4357. doc = widget->page->doc;
  4358. unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
  4359. num_versions = pdf_count_versions(ctx, doc) + unsaved_versions;
  4360. version = pdf_find_version_for_obj(ctx, doc, widget->obj);
  4361. if (version > num_versions-1)
  4362. version = num_versions-1;
  4363. /* Get the locked definition from the object when it was signed. */
  4364. o_xref_base = doc->xref_base;
  4365. doc->xref_base = version;
  4366. fz_var(locked); /* Not really needed, but it stops warnings */
  4367. fz_try(ctx)
  4368. {
  4369. locked = pdf_find_locked_fields_for_sig(ctx, doc, widget->obj);
  4370. for (i = version-1; i >= unsaved_versions; i--)
  4371. {
  4372. doc->xref_base = i;
  4373. if (!validate_locked_fields(ctx, doc, i, locked))
  4374. break;
  4375. }
  4376. }
  4377. fz_always(ctx)
  4378. {
  4379. doc->xref_base = o_xref_base;
  4380. pdf_drop_locked_fields(ctx, locked);
  4381. }
  4382. fz_catch(ctx)
  4383. fz_rethrow(ctx);
  4384. return i+1-unsaved_versions;
  4385. }
  4386. int pdf_was_pure_xfa(fz_context *ctx, pdf_document *doc)
  4387. {
  4388. int num_unsaved_versions = pdf_count_unsaved_versions(ctx, doc);
  4389. int num_versions = pdf_count_versions(ctx, doc);
  4390. int v;
  4391. int o_xref_base = doc->xref_base;
  4392. int pure_xfa = 0;
  4393. fz_var(pure_xfa);
  4394. fz_try(ctx)
  4395. {
  4396. for(v = num_versions + num_unsaved_versions; !pure_xfa && v >= num_unsaved_versions; v--)
  4397. {
  4398. pdf_obj *o;
  4399. doc->xref_base = v;
  4400. o = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm");
  4401. /* If we find a version that had an empty Root/AcroForm/Fields, but had a
  4402. * Root/AcroForm/XFA entry, then we deduce that this was at one time a
  4403. * pure XFA form. */
  4404. if (pdf_array_len(ctx, pdf_dict_get(ctx, o, PDF_NAME(Fields))) == 0 &&
  4405. pdf_dict_get(ctx, o, PDF_NAME(XFA)) != NULL)
  4406. pure_xfa = 1;
  4407. }
  4408. }
  4409. fz_always(ctx)
  4410. doc->xref_base = o_xref_base;
  4411. fz_catch(ctx)
  4412. fz_rethrow(ctx);
  4413. return pure_xfa;
  4414. }
  4415. pdf_xref *pdf_new_local_xref(fz_context *ctx, pdf_document *doc)
  4416. {
  4417. int n = pdf_xref_len(ctx, doc);
  4418. pdf_xref *xref = fz_malloc_struct(ctx, pdf_xref);
  4419. xref->subsec = NULL;
  4420. xref->num_objects = n;
  4421. xref->trailer = NULL;
  4422. xref->pre_repair_trailer = NULL;
  4423. xref->unsaved_sigs = NULL;
  4424. xref->unsaved_sigs_end = NULL;
  4425. fz_try(ctx)
  4426. {
  4427. xref->subsec = fz_malloc_struct(ctx, pdf_xref_subsec);
  4428. xref->subsec->len = n;
  4429. xref->subsec->start = 0;
  4430. xref->subsec->table = fz_malloc_struct_array(ctx, n, pdf_xref_entry);
  4431. xref->subsec->next = NULL;
  4432. }
  4433. fz_catch(ctx)
  4434. {
  4435. fz_free(ctx, xref->subsec);
  4436. fz_free(ctx, xref);
  4437. fz_rethrow(ctx);
  4438. }
  4439. return xref;
  4440. }
  4441. void pdf_drop_local_xref(fz_context *ctx, pdf_xref *xref)
  4442. {
  4443. if (xref == NULL)
  4444. return;
  4445. pdf_drop_xref_subsec(ctx, xref);
  4446. fz_free(ctx, xref);
  4447. }
  4448. void pdf_drop_local_xref_and_resources(fz_context *ctx, pdf_document *doc)
  4449. {
  4450. pdf_purge_local_resources(ctx, doc);
  4451. pdf_purge_locals_from_store(ctx, doc);
  4452. pdf_drop_local_xref(ctx, doc->local_xref);
  4453. doc->local_xref = NULL;
  4454. doc->resynth_required = 1;
  4455. }
  4456. void
  4457. pdf_debug_doc_changes(fz_context *ctx, pdf_document *doc)
  4458. {
  4459. int i, j;
  4460. if (doc->num_incremental_sections == 0)
  4461. fz_write_printf(ctx, fz_stddbg(ctx), "No incremental xrefs");
  4462. else
  4463. {
  4464. for (i = 0; i < doc->num_incremental_sections; i++)
  4465. {
  4466. pdf_xref *xref = &doc->xref_sections[i];
  4467. pdf_xref_subsec *sub;
  4468. fz_write_printf(ctx, fz_stddbg(ctx), "Incremental xref:\n");
  4469. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  4470. {
  4471. fz_write_printf(ctx, fz_stddbg(ctx), " Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
  4472. for (j = 0; j < sub->len; j++)
  4473. {
  4474. pdf_xref_entry *e = &sub->table[j];
  4475. if (e->type == 0)
  4476. continue;
  4477. fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
  4478. pdf_debug_obj(ctx, e->obj);
  4479. fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
  4480. }
  4481. }
  4482. }
  4483. }
  4484. if (doc->local_xref == NULL)
  4485. fz_write_printf(ctx, fz_stddbg(ctx), "No local xref");
  4486. else
  4487. {
  4488. for (i = 0; i < doc->num_incremental_sections; i++)
  4489. {
  4490. pdf_xref *xref = doc->local_xref;
  4491. pdf_xref_subsec *sub;
  4492. fz_write_printf(ctx, fz_stddbg(ctx), "Local xref (%sin force):\n", doc->local_xref_nesting == 0 ? "not " : "");
  4493. for (sub = xref->subsec; sub != NULL; sub = sub->next)
  4494. {
  4495. fz_write_printf(ctx, fz_stddbg(ctx), " Objects %d->%d\n", sub->start, sub->start + sub->len - 1);
  4496. for (j = 0; j < sub->len; j++)
  4497. {
  4498. pdf_xref_entry *e = &sub->table[j];
  4499. if (e->type == 0)
  4500. continue;
  4501. fz_write_printf(ctx, fz_stddbg(ctx), "%d %d obj (%c)\n", j + sub->start, e->gen, e->type);
  4502. pdf_debug_obj(ctx, e->obj);
  4503. fz_write_printf(ctx, fz_stddbg(ctx), "\nendobj\n");
  4504. }
  4505. }
  4506. }
  4507. }
  4508. }
  4509. pdf_obj *
  4510. pdf_metadata(fz_context *ctx, pdf_document *doc)
  4511. {
  4512. int initial = doc->xref_base;
  4513. pdf_obj *obj = NULL;
  4514. fz_var(obj);
  4515. fz_try(ctx)
  4516. {
  4517. do
  4518. {
  4519. pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
  4520. obj = pdf_dict_get(ctx, root, PDF_NAME(Metadata));
  4521. if (obj)
  4522. break;
  4523. doc->xref_base++;
  4524. }
  4525. while (doc->xref_base < doc->num_xref_sections);
  4526. }
  4527. fz_always(ctx)
  4528. doc->xref_base = initial;
  4529. fz_catch(ctx)
  4530. fz_rethrow(ctx);
  4531. return obj;
  4532. }
  4533. int pdf_obj_is_incremental(fz_context *ctx, pdf_obj *obj)
  4534. {
  4535. pdf_document *doc = pdf_get_bound_document(ctx, obj);
  4536. int v;
  4537. if (doc == NULL || doc->num_incremental_sections == 0)
  4538. return 0;
  4539. v = pdf_find_incremental_update_num_for_obj(ctx, doc, obj);
  4540. return (v == 0);
  4541. }
  4542. void pdf_minimize_document(fz_context *ctx, pdf_document *doc)
  4543. {
  4544. int i;
  4545. /* Don't throw anything away if we've done a repair! */
  4546. if (doc == NULL || doc->repair_attempted)
  4547. return;
  4548. /* Don't throw anything away in the incremental section, as that's where
  4549. * all our changes will be. */
  4550. for (i = doc->num_incremental_sections; i < doc->num_xref_sections; i++)
  4551. {
  4552. pdf_xref *xref = &doc->xref_sections[i];
  4553. pdf_xref_subsec *sub;
  4554. for (sub = xref->subsec; sub; sub = sub->next)
  4555. {
  4556. int len = sub->len;
  4557. int j;
  4558. for (j = 0; j < len; j++)
  4559. {
  4560. pdf_xref_entry *e = &sub->table[j];
  4561. if (e->obj == NULL)
  4562. continue;
  4563. e->obj = pdf_drop_singleton_obj(ctx, e->obj);
  4564. }
  4565. }
  4566. }
  4567. }
  4568. void pdf_repair_xref(fz_context *ctx, pdf_document *doc)
  4569. {
  4570. pdf_repair_xref_aux(ctx, doc, pdf_prime_xref_index);
  4571. }