| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- /*
- For the purposes of this code, and to save my tiny brain from
- overload, we will adopt the following notation:
- 1) The PDF file contains bytes of data. These bytes are looked
- up in the MuPDF font handling to resolve to 'glyph ids' (gids).
- These account for all the different encodings etc in use,
- including the 'cmap' table within the font.
- 2) We are given the list of gids that are used in the document.
- We arrange to keep any entries in the cmap or post tables that
- maps to these gids.
- We map the gids to the bottom of the range. This means that the
- cmap and post tables need to be updated.
- A similar optimisation would be to compress the range of cids
- used to a prefix of the range used. This would mean that the
- calling code needs to rewrite the data within the PDF file -
- both in terms of the strings used with the PDF streams, and in
- terms of the ToUnicode tables there (and the Widths etc).
- For now, we'll ignore this optimisation.
- Possibly, in the case of 'Identity' Tounicode mappings we
- wouldn't actually want to do this range compression? It'd only
- make the file larger.
- */
- typedef struct
- {
- uint16_t pid;
- uint16_t psid;
- uint32_t max;
- uint16_t gid[256];
- } encoding_t;
- typedef struct
- {
- uint32_t tag;
- uint32_t checksum;
- fz_buffer *tab;
- } tagged_table_t;
- typedef struct
- {
- int is_otf;
- int symbolic;
- encoding_t *encoding;
- uint16_t orig_num_glyphs;
- uint16_t new_num_glyphs;
- uint16_t index_to_loc_format;
- uint8_t *index_to_loc_formatp;
- uint16_t orig_num_long_hor_metrics;
- uint16_t new_num_long_hor_metrics;
- /* Pointer to the old tables (in the tagged table below) */
- uint8_t *loca;
- size_t *loca_len;
- uint8_t *maxp;
- /* Maps from old gid to new gid */
- uint16_t *gid_renum;
- int max;
- int len;
- tagged_table_t *table;
- } ttf_t;
- static uint32_t
- checksum(fz_buffer *buf)
- {
- size_t i;
- const uint8_t *d = (const uint8_t *)buf->data;
- uint32_t cs = 0;
- for (i = buf->len>>2; i > 0; i--)
- {
- cs += d[0]<<24;
- cs += d[1]<<16;
- cs += d[2]<<8;
- cs += d[3];
- d += 4;
- }
- i = buf->len - (buf->len & ~3);
- switch (i)
- {
- case 3:
- cs += d[2]<<8;
- /* fallthrough */
- case 2:
- cs += d[1]<<16;
- /* fallthrough */
- case 1:
- cs += d[0]<<24;
- default:
- break;
- }
- return cs;
- }
- static uint32_t
- find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len)
- {
- int num_tables;
- int i;
- fz_seek(ctx, stm, 4, SEEK_SET);
- num_tables = fz_read_int16(ctx, stm);
- fz_seek(ctx, stm, 12, SEEK_SET);
- for (i = 0; i < num_tables; i++)
- {
- uint32_t t = fz_read_uint32(ctx, stm);
- uint32_t cs = fz_read_uint32(ctx, stm);
- uint32_t off = fz_read_uint32(ctx, stm);
- (void) cs; /* UNUSED */
- *len = fz_read_uint32(ctx, stm);
- if (t == tag)
- return off;
- }
- return 0;
- }
- static fz_buffer *
- read_table(fz_context *ctx, fz_stream *stm, uint32_t tag, int compulsory)
- {
- uint32_t size;
- uint32_t off = find_table(ctx, stm, tag, &size);
- fz_buffer *buf;
- if (off == 0)
- {
- if (compulsory)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Required %c%c%c%c table missing", tag>>24, (tag>>16)&0xff, (tag>>8)&0xff, tag & 0xff);
- return NULL;
- }
- fz_seek(ctx, stm, off, SEEK_SET);
- buf = fz_new_buffer(ctx, size);
- fz_try(ctx)
- {
- fz_read(ctx, stm, buf->data, size);
- buf->len = size;
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, buf);
- fz_rethrow(ctx);
- }
- return buf;
- }
- #define TAG(s) \
- ( (((uint8_t)s[0])<<24) | \
- (((uint8_t)s[1])<<16) | \
- (((uint8_t)s[2])<<8) | \
- (((uint8_t)s[3])))
- static void
- add_table(fz_context *ctx, ttf_t *ttf, uint32_t tag, fz_buffer *tab)
- {
- fz_try(ctx)
- {
- if (ttf->max == ttf->len)
- {
- int n = ttf->max * 2;
- if (n == 0)
- n = 16;
- ttf->table = fz_realloc(ctx, ttf->table, sizeof(*ttf->table) * n);
- ttf->max = n;
- }
- ttf->table[ttf->len].tag = tag;
- ttf->table[ttf->len].tab = tab;
- ttf->len++;
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, tab);
- fz_rethrow(ctx);
- }
- }
- static void
- copy_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm, uint32_t tag, int compulsory)
- {
- fz_buffer *t;
- t = read_table(ctx, stm, tag, compulsory);
- if (t)
- add_table(ctx, ttf, tag, t);
- }
- static int
- tabcmp(const void *a_, const void *b_)
- {
- const tagged_table_t *a = (const tagged_table_t *)a_;
- const tagged_table_t *b = (const tagged_table_t *)b_;
- return (a->tag - b->tag);
- }
- static void
- sort_tables(fz_context *ctx, ttf_t *ttf)
- {
- /* Avoid scanbuild/coverity false warning with this unnecessary test */
- if (ttf->table == NULL || ttf->len == 0)
- return;
- qsort(ttf->table, ttf->len, sizeof(tagged_table_t), tabcmp);
- }
- static void
- checksum_tables(fz_context *ctx, ttf_t *ttf)
- {
- int i;
- for (i = 0; i < ttf->len; i++)
- ttf->table[i].checksum = checksum(ttf->table[i].tab);
- }
- static void
- write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out)
- {
- int i = 0;
- uint32_t offset;
- /* scalar type - TTF for now - may need to cope with other types later. */
- if (ttf->is_otf)
- fz_write_int32_be(ctx, out, 0x4f54544f);
- else
- fz_write_int32_be(ctx, out, 0x00010000);
- /* number of tables */
- fz_write_uint16_be(ctx, out, ttf->len);
- while (1<<(i+1) <= ttf->len)
- i++;
- /* searchRange */
- fz_write_uint16_be(ctx, out, (1<<i)<<4);
- /* entrySelector */
- fz_write_uint16_be(ctx, out, i);
- /* rangeShift*/
- fz_write_uint16_be(ctx, out, (ttf->len - (1<<i))<<4);
- /* Table directory */
- offset = 12 + ttf->len * 16;
- for (i = 0; i < ttf->len; i++)
- {
- fz_write_uint32_be(ctx, out, ttf->table[i].tag);
- fz_write_uint32_be(ctx, out, ttf->table[i].checksum);
- fz_write_uint32_be(ctx, out, offset);
- fz_write_uint32_be(ctx, out, (uint32_t)ttf->table[i].tab->len);
- offset += (uint32_t)ttf->table[i].tab->len;
- }
- /* Now the tables in turn */
- for (i = 0; i < ttf->len; i++)
- {
- fz_write_buffer(ctx, out, ttf->table[i].tab);
- }
- }
- static void
- fix_checksum(fz_context *ctx, fz_buffer *buf)
- {
- uint8_t *data;
- uint32_t sum = 0;
- size_t len = fz_buffer_storage(ctx, buf, &data);
- uint32_t namesize;
- fz_stream *stm = fz_open_buffer(ctx, buf);
- uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8;
- (void) len; // UNUSED
- fz_drop_stream(ctx, stm);
- /* First off, blat the old checksum */
- memset(data+csumpos, 0, 4);
- sum = checksum(buf);
- sum = 0xb1b0afba-sum;
- /* Insert it. */
- data[csumpos] = sum>>24;
- data[csumpos+1] = sum>>16;
- data[csumpos+2] = sum>>8;
- data[csumpos+3] = sum;
- }
- typedef struct
- {
- uint16_t platform_id;
- uint16_t platform_specific_id;
- uint16_t language_id;
- uint16_t name_id;
- uint16_t len;
- uint16_t offset;
- } name_record_t;
- static uint32_t get32(const uint8_t *d)
- {
- return (d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3];
- }
- static uint32_t get16(const uint8_t *d)
- {
- return (d[0]<<8)|d[1];
- }
- static void put32(uint8_t *d, uint32_t v)
- {
- d[0] = v>>24;
- d[1] = v>>16;
- d[2] = v>>8;
- d[3] = v;
- }
- static void put16(uint8_t *d, uint32_t v)
- {
- d[0] = v>>8;
- d[1] = v;
- }
- typedef struct
- {
- /* First 2 fields aren't actually needed for the pointer list
- * operation, but they serve as bounds for all the offsets used
- * within the ptr list. */
- uint8_t *block;
- size_t block_len;
- uint32_t len;
- uint32_t max;
- uint8_t **ptr;
- } ptr_list_t;
- static void
- ptr_list_add(fz_context *ctx, ptr_list_t *pl, uint8_t *ptr)
- {
- if (pl->len == pl->max)
- {
- int n = pl->max * 2;
- if (n == 0)
- n = 32;
- pl->ptr = fz_realloc(ctx, pl->ptr, sizeof(*pl->ptr) * n);
- pl->max = n;
- }
- pl->ptr[pl->len++] = ptr;
- }
- typedef int (cmp_t)(const uint8_t **a, const uint8_t **b);
- typedef int (void_cmp_t)(const void *, const void *);
- static void
- ptr_list_sort(fz_context *ctx, ptr_list_t *pl, cmp_t *cmp)
- {
- /* Avoid scanbuild/coverity false warning with this unnecessary test */
- if (pl->ptr == NULL || pl->len == 0)
- return;
- qsort(pl->ptr, pl->len, sizeof(*pl->ptr), (void_cmp_t *)cmp);
- }
- static void
- drop_ptr_list(fz_context *ctx, ptr_list_t *pl)
- {
- fz_free(ctx, pl->ptr);
- }
- /* return 1 to keep, 0 to drop. */
- typedef int (filter_t)(const uint8_t *ptr, const uint8_t *blk, size_t len);
- /* This makes a pointer list from a filtered block, moving the underlying data as it filters. */
- static void
- ptr_list_compact(fz_context *ctx, ptr_list_t *pl, filter_t *fil, uint8_t *base, int n, size_t eltsize, uint8_t *block, size_t block_len)
- {
- int i;
- uint8_t *s = base;
- uint8_t *d = base;
- pl->block = block;
- pl->block_len = block_len;
- if (base < block || (size_t)(base - block) > block_len || (size_t)(base - block) + n * eltsize >= block_len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Ptr List creation failed");
- for (i = 0; i < n; i++)
- {
- if (fil(s, block, block_len))
- {
- ptr_list_add(ctx, pl, d);
- if (s != d)
- memmove(d, s, eltsize);
- d += eltsize;
- }
- s += eltsize;
- }
- }
- static int
- names_by_size(const uint8_t **a, const uint8_t **b)
- {
- return get16((*b)+8) - get16((*a)+8);
- }
- static int
- filter_name_tables(const uint8_t *ptr, const uint8_t *block, size_t block_len)
- {
- /* FIXME: For now, we keep everything. */
- return 1;
- }
- #define UNFOUND ((uint32_t)-1)
- static uint32_t
- find_string_in_block(const uint8_t *str, size_t str_len, const uint8_t *block, size_t block_len)
- {
- const uint8_t *b = block;
- if (block_len == 0)
- return UNFOUND;
- assert(block_len >= str_len);
- block_len -= str_len-1;
- while (block_len--)
- {
- if (!memcmp(str, b, str_len))
- return (uint32_t)(b - block);
- b++;
- }
- return UNFOUND;
- }
- static void
- subset_name_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("name"), 0);
- uint8_t *d;
- uint32_t i, n, off;
- ptr_list_t pl = { 0 };
- size_t name_data_size;
- uint8_t *new_name_data = NULL;
- size_t new_len;
- if (t == NULL)
- return; /* No name table */
- d = t->data;
- fz_var(new_name_data);
- fz_try(ctx)
- {
- if (get16(d) != 0 || t->len < 6)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported name table format");
- n = get16(d+2);
- off = get16(d+4);
- name_data_size = t->len - 6 - 12*n;
- if (t->len < 6 + 12*n)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name table");
- ptr_list_compact(ctx, &pl, filter_name_tables, d+6, n, 12, d, t->len);
- /* Sort our list so that the ones with the largest name data blocks come first. */
- ptr_list_sort(ctx, &pl, names_by_size);
- new_name_data = fz_malloc(ctx, name_data_size);
- new_len = 0;
- for (i = 0; i < pl.len; i++)
- {
- uint32_t name_len, offset, name_off;
- uint8_t *name;
- if (t->len < (size_t) (pl.ptr[i] - t->data) + 8 + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name length in name table");
- name_len = get16(pl.ptr[i] + 8);
- if (t->len < (size_t) (pl.ptr[i] - t->data) + 10 + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name offset in name table");
- name_off = off + get16(pl.ptr[i] + 10);
- name = d + name_off;
- if (t->len < name_off + name_len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name in name table");
- offset = find_string_in_block(name, name_len, new_name_data, new_len);
- if (offset == UNFOUND)
- {
- if (name_data_size < new_len + name_len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad name table in TTF");
- memcpy(new_name_data + new_len, name, name_len);
- offset = (uint32_t)new_len;
- new_len += name_len;
- }
- put16(pl.ptr[i]+10, offset);
- }
- memcpy(d + 6 + 12*pl.len, new_name_data, new_len);
- t->len = 6 + 12*pl.len + new_len;
- put16(d+4, 6 + 12*pl.len);
- }
- fz_always(ctx)
- {
- drop_ptr_list(ctx, &pl);
- fz_free(ctx, new_name_data);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, t);
- fz_rethrow(ctx);
- }
- add_table(ctx, ttf, TAG("name"), t);
- }
- static encoding_t *
- load_enc_tab0(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
- {
- encoding_t *enc;
- int i;
- if (data_size < 262)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap 0 format table");
- enc = fz_malloc_struct(ctx, encoding_t);
- d += offset + 6;
- enc->max = 256;
- for (i = 0; i < 256; i++)
- enc->gid[i] = d[i];
- return enc;
- }
- static encoding_t *
- load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
- {
- encoding_t *enc;
- uint16_t seg_count;
- uint32_t i;
- if (data_size < offset + 26)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
- seg_count = get16(d+offset+6); /* 2 * seg_count */
- if (seg_count & 1)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table");
- seg_count >>= 1;
- enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (65536 - 256));
- enc->max = 65536;
- fz_try(ctx)
- {
- /* Run through the segments, counting how many are used. */
- for (i = 0; i < seg_count; i++)
- {
- uint16_t seg_end, seg_start, delta, target, inner_offset;
- uint32_t offset_ptr, s;
- if (data_size < offset + 14 + 6 * seg_count + 2 + 2 * i + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
- seg_end = get16(d + offset + 14 + 2 * i);
- seg_start = get16(d + offset + 14 + 2 * seg_count + 2 + 2 * i);
- delta = get16(d + offset + 14 + 4 * seg_count + 2 + 2 * i);
- offset_ptr = offset + 14 + 6 * seg_count + 2 + 2 * i;
- inner_offset = get16(d + offset_ptr);
- if (seg_start >= enc->max || seg_end >= enc->max || seg_end < seg_start)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table.");
- for (s = seg_start; s <= seg_end; s++)
- {
- if (inner_offset == 0)
- {
- target = delta + s;
- }
- else
- {
- if (data_size < offset_ptr + inner_offset + 2 * (s - seg_start) + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small");
- /* Yes. This is very screwy. The inner_offset is from the offset_ptr in use. */
- target = get16(d + offset_ptr + inner_offset + 2 * (s - seg_start));
- if (target != 0)
- target += delta;
- }
- if (target != 0)
- enc->gid[s] = target;
- }
- }
- }
- fz_catch(ctx)
- {
- fz_free(ctx, enc);
- fz_rethrow(ctx);
- }
- return enc;
- }
- static encoding_t *
- load_enc_tab6(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset)
- {
- encoding_t *enc;
- uint16_t first_code;
- uint16_t entry_count;
- uint16_t length;
- uint32_t i;
- if (data_size < 10)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cmap6 too small");
- length = get16(d+offset+2);
- first_code = get16(d+offset+6);
- entry_count = get16(d+offset+8);
- if (length < entry_count*2 + 10)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap6 table");
- enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (first_code + entry_count - 256));
- enc->max = first_code + entry_count;
- /* Run through the segments, counting how many are used. */
- for (i = 0; i < entry_count; i++)
- {
- enc->gid[first_code+i] = get16(d+offset+10+i*2);
- }
- return enc;
- }
- static int
- is_encoding_all_zeros(fz_context *ctx, encoding_t *enc)
- {
- uint32_t i;
- if (enc != NULL)
- for (i = 0; i < enc->max; i++)
- if (enc->gid[i] != 0)
- return 0;
- return 1;
- }
- static encoding_t *
- load_enc(fz_context *ctx, fz_buffer *t, int pid, int psid)
- {
- uint8_t *d = t->data;
- size_t data_size = t->len;
- uint32_t i, n;
- if (data_size < 6 || get16(d) != 0)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format");
- n = get16(d+2);
- if (data_size < 4 + 8*n)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap table");
- for (i = 0; i < n; i++)
- {
- uint16_t plat_id = get16(d + 4 + i * 8);
- uint16_t plat_spec_id = get16(d + 4 + i * 8 + 2);
- uint32_t offset = get32(d + 4 + i * 8 + 4);
- uint16_t fmt;
- encoding_t *enc;
- if (plat_id != pid || plat_spec_id != psid)
- continue;
- if (offset < 4 + 8 * n || offset + 2 >= data_size)
- fz_throw(ctx, FZ_ERROR_FORMAT, "cmap table data out of range");
- fmt = get16(d+offset);
- switch(fmt)
- {
- case 0:
- enc = load_enc_tab0(ctx, d, data_size, offset);
- break;
- case 4:
- enc = load_enc_tab4(ctx, d, data_size, offset);
- break;
- case 6:
- enc = load_enc_tab6(ctx, d, data_size, offset);
- break;
- default:
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format %d", fmt);
- }
- enc->pid = pid;
- enc->psid = psid;
- if (is_encoding_all_zeros(ctx, enc))
- {
- // ignore any encoding that is all zeros
- fz_free(ctx, enc);
- enc = NULL;
- }
- return enc;
- }
- return NULL;
- }
- static void
- load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("cmap"), 1);
- encoding_t *enc = NULL;
- fz_var(enc);
- fz_try(ctx)
- {
- if (ttf->symbolic)
- {
- /* For symbolic fonts, we look for (3,0) as per PDF Spec, then (1,0). */
- enc = load_enc(ctx, t, 3, 0);
- if (!enc)
- enc = load_enc(ctx, t, 1, 0);
- }
- else
- {
- /* For non symbolic fonts, we look for (3,1) then (1,0), then (0,1), and finally (0,3). */
- enc = load_enc(ctx, t, 3, 1);
- if (!enc)
- enc = load_enc(ctx, t, 1, 0);
- if (!enc)
- enc = load_enc(ctx, t, 0, 1);
- if (!enc)
- enc = load_enc(ctx, t, 0, 3);
- }
- if (!enc)
- fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found");
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, t);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
- ttf->encoding = enc;
- }
- static void
- reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids)
- {
- int i;
- encoding_t *enc = ttf->encoding;
- int n = enc->max;
- for (i = 0; i < n; i++)
- {
- int gid = enc->gid[i];
- int lo, hi;
- if (gid == 0)
- continue;
- lo = 0;
- hi = num_gids;
- while (lo < hi)
- {
- int mid = (lo + hi)>>1;
- int g = gids[mid];
- if (g < gid)
- lo = mid+1;
- else if (g > gid)
- hi = mid;
- else
- goto found; /* Leave this one as is. */
- }
- /* Not found */
- enc->gid[i] = 0;
- found:
- {}
- }
- }
- static void
- make_cmap(fz_context *ctx, ttf_t *ttf)
- {
- uint32_t i;
- uint32_t len;
- uint32_t segs = 0;
- uint32_t seg, seg_start, seg_end;
- encoding_t *enc = ttf->encoding;
- uint32_t n = enc->max;
- uint32_t entries = 0;
- fz_buffer *buf;
- uint8_t *d;
- uint32_t offset;
- /* Make a type 4 table. */
- /* Count the number of segments. */
- for (i = 0; i < n; i++)
- {
- if (enc->gid[i] == 0)
- continue;
- seg_start = i;
- seg_end = i;
- for (i++; i<n; i++)
- {
- if (enc->gid[i] != 0)
- seg_end = i;
- else if (i - seg_end > 4)
- break;
- }
- entries += seg_end - seg_start + 1;
- segs++;
- }
- segs++; /* For the terminator */
- len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2;
- buf = fz_new_buffer(ctx, len);
- d = buf->data;
- /* cmap header */
- put16(d, 0); /* version */
- put16(d+2, 1); /* num sub tables */
- put16(d+4, enc->pid);
- put16(d+6, enc->psid);
- put32(d+8, 12); /* offset */
- d += 12;
- put16(d, 4); /* Format */
- put16(d + 2, len-12); /* Length */
- put16(d + 4, 0); /* FIXME: Language */
- put16(d + 6, segs * 2);
- i = 0;
- while (1U<<(i+1) <= segs)
- i++;
- /* So 1<<i <= segs < 1<<(i+1) */
- put16(d + 8, 1<<(i+1)); /* searchRange */
- put16(d + 10, i); /* entrySelector */
- put16(d + 12, 2 * segs - (1<<(i+1))); /* rangeShift */
- put16(d + 14 + segs * 2, 0); /* reserved */
- /* Now output the segment data */
- entries = 14 + segs * 2 * 4 + 2; /* offset of where to put entries.*/
- seg = 0;
- for (i = 0; i < n; i++)
- {
- if (enc->gid[i] == 0)
- continue;
- seg_start = i;
- seg_end = i;
- offset = 14 + segs * 2 * 3 + 2 + seg * 2;
- put16(d + offset - segs * 2, 0); /* Delta - always 0 for now. */
- put16(d + offset, entries - offset); /* offset */
- /* Insert an entry */
- if (!ttf->is_otf && ttf->gid_renum && i < enc->max && enc->gid[i] < ttf->orig_num_glyphs)
- put16(d + entries, (ttf->is_otf || ttf->gid_renum == NULL) ? enc->gid[i] : ttf->gid_renum[enc->gid[i]]);
- else
- put16(d + entries, enc->gid[i]);
- entries += 2;
- for (i++; i < n; i++)
- {
- if (enc->gid[i] != 0)
- {
- /* Include i in the range, which means we need to add entries for
- * seg_end to i inclusive. */
- while (seg_end < i)
- {
- seg_end++;
- if (!ttf->is_otf && ttf->gid_renum && seg_end < enc->max && enc->gid[seg_end] < ttf->orig_num_glyphs)
- put16(d + entries, ttf->gid_renum[enc->gid[seg_end]]);
- else
- put16(d + entries, enc->gid[seg_end]);
- entries += 2;
- }
- }
- else if (i - seg_end > 4)
- break;
- }
- put16(d + 14 + segs * 2 + seg * 2 + 2, seg_start);
- put16(d + 14 + seg * 2, seg_end);
- seg++;
- }
- offset = 14 + segs * 2 * 3 + 2 + seg * 2;
- put16(d + 14 + segs * 2 + seg * 2 + 2, 0xffff);
- put16(d + 14 + seg * 2, 0xffff);
- put16(d + offset - segs * 2, 1); /* Delta */
- put16(d + offset, 0); /* offset */
- buf->len = entries + 12;
- assert(buf->len == buf->cap);
- add_table(ctx, ttf, TAG("cmap"), buf);
- }
- static void
- read_maxp(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("maxp"), 1);
- if (t->len < 6)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "truncated maxp table");
- }
- ttf->orig_num_glyphs = get16(t->data+4);
- add_table(ctx, ttf, TAG("maxp"), t);
- ttf->maxp = t->data;
- }
- static void
- read_head(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- uint32_t version;
- fz_buffer *t = read_table(ctx, stm, TAG("head"), 1);
- if (t->len < 54)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "truncated head table");
- }
- version = get32(t->data);
- if (version != 0x00010000)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported head table version 0x%08x", version);
- }
- ttf->index_to_loc_formatp = t->data+50;
- ttf->index_to_loc_format = get16(ttf->index_to_loc_formatp);
- if (ttf->index_to_loc_format & ~1)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported index_to_loc_format 0x%04x", ttf->index_to_loc_format);
- }
- add_table(ctx, ttf, TAG("head"), t);
- }
- static void
- read_loca(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- fz_buffer *t;
- uint32_t len = (2<<ttf->index_to_loc_format) * (ttf->orig_num_glyphs+1);
- t = read_table(ctx, stm, TAG("loca"), 1);
- if (t->len < len)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "truncated loca table");
- }
- ttf->loca = t->data;
- ttf->loca_len = &t->len;
- add_table(ctx, ttf, TAG("loca"), t);
- }
- static void
- read_hhea(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- uint32_t version;
- fz_buffer *t = read_table(ctx, stm, TAG("hhea"), 1);
- uint16_t i;
- if (t->len < 36)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "truncated hhea table");
- }
- version = get32(t->data);
- if (version != 0x00010000)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported hhea table version 0x%08x", version);
- }
- ttf->orig_num_long_hor_metrics = get16(t->data+34);
- if (ttf->orig_num_long_hor_metrics > ttf->orig_num_glyphs)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong hhea table");
- }
- add_table(ctx, ttf, TAG("hhea"), t);
- /* Previously gids 0 to orig_num_long_hor_metrics-1 were described with
- * hor metrics, and the ones afterwards were fixed widths. Find where
- * that dividing line is in our new reduced set. */
- if (ttf->encoding && !ttf->is_otf && ttf->orig_num_long_hor_metrics > 0)
- {
- /* i = 0 is always kept long in subset_hmtx(). */
- ttf->new_num_long_hor_metrics = 1;
- for (i = ttf->orig_num_long_hor_metrics-1; i > 0; i--)
- if (ttf->gid_renum[i])
- {
- ttf->new_num_long_hor_metrics = ttf->gid_renum[i]+1;
- break;
- }
- put16(t->data+34, ttf->new_num_long_hor_metrics);
- }
- else
- {
- ttf->new_num_long_hor_metrics = ttf->orig_num_long_hor_metrics;
- }
- }
- static uint32_t
- get_loca(fz_context *ctx, ttf_t *ttf, uint32_t n)
- {
- if (ttf->index_to_loc_format == 0)
- {
- /* Short index - convert from words to bytes */
- return get16(ttf->loca + n*2) * 2;
- }
- else
- {
- /* Long index - in bytes already */
- return get32(ttf->loca + n*4);
- }
- }
- static void
- put_loca(fz_context *ctx, ttf_t *ttf, uint32_t n, uint32_t off)
- {
- if (ttf->index_to_loc_format == 0)
- {
- /* Short index - convert from bytes to words */
- assert((off & 1) == 0);
- put16(ttf->loca + n*2, off/2);
- }
- else
- {
- /* Long index - in bytes already */
- put32(ttf->loca + n*4, off);
- }
- }
- static void
- glyph_used(fz_context *ctx, ttf_t *ttf, fz_buffer *glyf, uint16_t i)
- {
- uint32_t offset, len;
- const uint8_t *data;
- uint16_t flags;
- if (i >= ttf->orig_num_glyphs)
- {
- fz_warn(ctx, "TTF subsetting; gid >= num_gids!");
- return;
- }
- if (ttf->gid_renum[i] != 0)
- return;
- ttf->gid_renum[i] = 1;
- /* If this glyf is composite, then we need to add any dependencies of it. */
- offset = get_loca(ctx, ttf, i);
- len = get_loca(ctx, ttf, i+1) - offset;
- if (len == 0)
- return;
- if (offset+2 > glyf->len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- data = glyf->data + offset;
- if ((int16_t)get16(data) >= 0)
- return; /* Single glyph - no dependencies */
- data += 4 * 2 + 2;
- if (len < 4*2 + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- len -= 4 * 2 + 2;
- do
- {
- uint16_t idx, skip;
- if (len < 4)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- flags = get16(data);
- idx = get16(data+2);
- glyph_used(ctx, ttf, glyf, idx);
- #define ARGS_1_AND_2_ARE_WORDS 1
- #define ARGS_ARE_XY_VALUES 2
- #define WE_HAVE_A_SCALE 8
- #define MORE_COMPONENTS 32
- #define WE_HAVE_AN_X_AND_Y_SCALE 64
- #define WE_HAVE_A_TWO_BY_TWO 128
- /* Skip the X and Y offsets */
- if (flags & ARGS_1_AND_2_ARE_WORDS)
- skip = 4 + 4;
- else
- skip = 4 + 2;
- /* Skip the transformation */
- switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
- {
- case 0:
- /* No extra to skip */
- break;
- case WE_HAVE_A_SCALE:
- skip += 2;
- break;
- case WE_HAVE_AN_X_AND_Y_SCALE:
- skip += 4;
- break;
- case WE_HAVE_A_TWO_BY_TWO:
- skip += 8;
- break;
- }
- if (len < skip)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- data += skip;
- len -= skip;
- }
- while(flags & MORE_COMPONENTS);
- }
- static void
- renumber_composite(fz_context *ctx, ttf_t *ttf, uint8_t *data, uint32_t len)
- {
- uint16_t flags;
- uint16_t x;
- data += 4 * 2 + 2;
- if (len < 4*2 + 2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- len -= 4 * 2 + 2;
- do
- {
- uint16_t skip;
- if (len < 4)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- flags = get16(data);
- x = get16(data+2);
- if (x >= ttf->orig_num_glyphs)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- put16(data+2, ttf->gid_renum[x]);
- /* Skip the X and Y offsets */
- if (flags & ARGS_1_AND_2_ARE_WORDS)
- skip = 4 + 4;
- else
- skip = 4 + 2;
- /* Skip the transformation */
- switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO))
- {
- case 0:
- /* No extra to skip */
- break;
- case WE_HAVE_A_SCALE:
- skip += 2;
- break;
- case WE_HAVE_AN_X_AND_Y_SCALE:
- skip += 4;
- break;
- case WE_HAVE_A_TWO_BY_TWO:
- skip += 8;
- break;
- }
- if (len < skip)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data");
- data += skip;
- len -= skip;
- }
- while(flags & MORE_COMPONENTS);
- }
- static void
- read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
- {
- uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs);
- fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1);
- encoding_t *enc = ttf->encoding;
- uint32_t last_loca, i, j, k;
- uint32_t new_start, old_start, old_end, last_loca_ofs;
- if (t->len < len)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "truncated glyf table");
- }
- add_table(ctx, ttf, TAG("glyf"), t);
- /* Now, make the renumber list for the glyphs. */
- ttf->gid_renum = fz_calloc(ctx, ttf->orig_num_glyphs, sizeof(uint16_t));
- /* Initially, we'll use it just as a usage list. 0 = unused, 1 used */
- /* glyph 0 is always used. */
- glyph_used(ctx, ttf, t, 0);
- if (enc)
- {
- uint32_t n = enc->max;
- /* If we have an encoding table, run through it, and keep anything needed from there. */
- for (i = 0; i < n; i++)
- if (enc->gid[i])
- glyph_used(ctx, ttf, t, enc->gid[i]);
- /* Now convert from a usage table to a renumbering table. */
- if (ttf->orig_num_glyphs > 0)
- {
- ttf->gid_renum[0] = 0;
- j = 1;
- for (i = 1; i < ttf->orig_num_glyphs; i++)
- if (ttf->gid_renum[i])
- ttf->gid_renum[i] = j++;
- ttf->new_num_glyphs = j;
- }
- else
- {
- ttf->new_num_glyphs = 0;
- }
- }
- else
- {
- /* We're a cid font. The cids are gids. */
- for (i = 0; i < (uint32_t)num_gids; i++)
- glyph_used(ctx, ttf, t, gids[i]);
- ttf->new_num_glyphs = ttf->orig_num_glyphs;
- }
- /* Now subset the glyf table. */
- if (enc)
- {
- old_start = get_loca(ctx, ttf, 0);
- if (old_start > t->len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
- old_end = get_loca(ctx, ttf, 1);
- if (old_end > t->len || old_end < old_start)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
- len = old_end - old_start;
- new_start = 0;
- put_loca(ctx, ttf, 0, new_start);
- last_loca = 0;
- last_loca_ofs = len;
- for (i = 0; i < ttf->orig_num_glyphs; i++)
- {
- old_end = get_loca(ctx, ttf, i + 1);
- if (old_end > t->len || old_end < old_start)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
- len = old_end - old_start;
- if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0))
- {
- memmove(t->data + new_start, t->data + old_start, len);
- if ((int16_t)get16(t->data + new_start) < 0)
- renumber_composite(ctx, ttf, t->data + new_start, len);
- for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++)
- put_loca(ctx, ttf, k, last_loca_ofs);
- new_start += len;
- last_loca = ttf->gid_renum[i];
- last_loca_ofs = new_start;
- }
- old_start = old_end;
- }
- for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++)
- put_loca(ctx, ttf, k, last_loca_ofs);
- }
- else
- {
- new_start = 0;
- old_start = get_loca(ctx, ttf, 0);
- if (old_start > t->len)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
- for (i = 0; i < ttf->orig_num_glyphs; i++)
- {
- old_end = get_loca(ctx, ttf, i + 1);
- if (old_end > t->len || old_end < old_start)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value");
- len = old_end - old_start;
- if (len > 0 && ttf->gid_renum[i] != 0)
- {
- memmove(t->data + new_start, t->data + old_start, len);
- put_loca(ctx, ttf, i, new_start);
- new_start += len;
- }
- else
- {
- put_loca(ctx, ttf, i, new_start);
- }
- old_start = old_end;
- }
- put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start);
- }
- *ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<<ttf->index_to_loc_format);
- t->len = new_start;
- }
- static void
- update_num_glyphs(fz_context *ctx, ttf_t *ttf)
- {
- put16(ttf->maxp + 4, ttf->new_num_glyphs);
- }
- static void
- subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1);
- uint16_t long_metrics, short_metrics, i, k;
- uint8_t *s = t->data;
- uint8_t *d = t->data;
- int cidfont = (ttf->encoding == NULL);
- long_metrics = ttf->orig_num_long_hor_metrics;
- if (long_metrics > ttf->orig_num_glyphs)
- long_metrics = ttf->orig_num_glyphs;
- if (long_metrics > t->len / 4)
- long_metrics = (uint16_t)(t->len / 4);
- short_metrics = (uint16_t)((t->len - long_metrics * 4) / 2);
- if (short_metrics > ttf->orig_num_glyphs - long_metrics)
- short_metrics = ttf->orig_num_glyphs - long_metrics;
- for (i = 0; i < long_metrics; i++)
- {
- if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
- {
- put32(d, get32(s));
- d += 4;
- }
- else if (cidfont)
- {
- put32(d, 0);
- d += 4;
- }
- s += 4;
- }
- for (k = 0 ; k < short_metrics; k++, i++)
- {
- if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i]))
- {
- put16(d, get16(s));
- d += 2;
- }
- else if (cidfont)
- {
- put16(d, 0);
- d += 2;
- }
- s += 2;
- }
- t->len = (d - t->data);
- add_table(ctx, ttf, TAG("hmtx"), t);
- }
- static void
- shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf)
- {
- uint32_t len;
- uint16_t i, n;
- uint8_t *loca;
- if (ttf->index_to_loc_format == 0)
- return; /* Can't shrink cos it's already shrunk! */
- n = ttf->new_num_glyphs;
- len = get_loca(ctx, ttf, n);
- if (len >= 65536)
- return; /* We can't shrink it, cos it's too big. */
- loca = ttf->loca;
- for (i = 0; i <= n; i++)
- {
- if (get32(loca + 4*i) & 1)
- return; /* Can't shrink it, because an offset is not even */
- }
- for (i = 0; i <= n; i++)
- {
- put16(loca + 2*i, get32(loca + 4*i)/2);
- }
- *ttf->loca_len = 2*(n+1);
- put16(ttf->index_to_loc_formatp, 0);
- }
- static struct { const char *charname; int idx; } macroman[] =
- {
- { ".notdef", 0},
- { ".null", 1},
- { "A", 36},
- { "AE", 144},
- { "Aacute", 201},
- { "Acircumflex", 199},
- { "Adieresis", 98},
- { "Agrave", 173},
- { "Aring", 99},
- { "Atilde", 174},
- { "B", 37},
- { "C", 38},
- { "Cacute", 253},
- { "Ccaron", 255},
- { "Ccedilla", 100},
- { "D", 39},
- { "Delta", 168},
- { "E", 40},
- { "Eacute", 101},
- { "Ecircumflex", 200},
- { "Edieresis", 202},
- { "Egrave", 203},
- { "Eth", 233},
- { "F", 41},
- { "G", 42},
- { "Gbreve", 248},
- { "H", 43},
- { "I", 44},
- { "Iacute", 204},
- { "Icircumflex", 205},
- { "Idieresis", 206},
- { "Idotaccent", 250},
- { "Igrave", 207},
- { "J", 45},
- { "K", 46},
- { "L", 47},
- { "Lslash", 226},
- { "M", 48},
- { "N", 49},
- { "Ntilde", 102},
- { "O", 50},
- { "OE", 176},
- { "Oacute", 208},
- { "Ocircumflex", 209},
- { "Odieresis", 103},
- { "Ograve", 211},
- { "Omega", 159},
- { "Oslash", 145},
- { "Otilde", 175},
- { "P", 51},
- { "Q", 52},
- { "R", 53},
- { "S", 54},
- { "Scaron", 228},
- { "Scedilla", 251},
- { "T", 55},
- { "Thorn", 237},
- { "U", 56},
- { "Uacute", 212},
- { "Ucircumflex", 213},
- { "Udieresis", 104},
- { "Ugrave", 214},
- { "V", 57},
- { "W", 58},
- { "X", 59},
- { "Y", 60},
- { "Yacute", 235},
- { "Ydieresis", 187},
- { "Z", 61},
- { "Zcaron", 230},
- { "a", 68},
- { "aacute", 105},
- { "acircumflex", 107},
- { "acute", 141},
- { "adieresis", 108},
- { "ae", 160},
- { "agrave", 106},
- { "ampersand", 9},
- { "apple", 210},
- { "approxequal", 167},
- { "aring", 110},
- { "asciicircum", 65},
- { "asciitilde", 97},
- { "asterisk", 13},
- { "at", 35},
- { "atilde", 109},
- { "b", 69},
- { "backslash", 63},
- { "bar", 95},
- { "braceleft", 94},
- { "braceright", 96},
- { "bracketleft", 62},
- { "bracketright", 64},
- { "breve", 219},
- { "brokenbar", 232},
- { "bullet", 135},
- { "c", 70},
- { "cacute", 254},
- { "caron", 225},
- { "ccaron", 256},
- { "ccedilla", 111},
- { "cedilla", 222},
- { "cent", 132},
- { "circumflex", 216},
- { "colon", 29},
- { "comma", 15},
- { "copyright", 139},
- { "currency", 189},
- { "d", 71},
- { "dagger", 130},
- { "daggerdbl", 194},
- { "dcroat", 257},
- { "degree", 131},
- { "dieresis", 142},
- { "divide", 184},
- { "dollar", 7},
- { "dotaccent", 220},
- { "dotlessi", 215},
- { "e", 72},
- { "eacute", 112},
- { "ecircumflex", 114},
- { "edieresis", 115},
- { "egrave", 113},
- { "eight", 27},
- { "ellipsis", 171},
- { "emdash", 179},
- { "endash", 178},
- { "equal", 32},
- { "eth", 234},
- { "exclam", 4},
- { "exclamdown", 163},
- { "f", 73},
- { "fi", 192},
- { "five", 24},
- { "fl", 193},
- { "florin", 166},
- { "four", 23},
- { "fraction", 188},
- { "franc", 247},
- { "g", 74},
- { "gbreve", 249},
- { "germandbls", 137},
- { "grave", 67},
- { "greater", 33},
- { "greaterequal", 149},
- { "guillemotleft", 169},
- { "guillemotright", 170},
- { "guilsinglleft", 190},
- { "guilsinglright", 191},
- { "h", 75},
- { "hungarumlaut", 223},
- { "hyphen", 16},
- { "i", 76},
- { "iacute", 116},
- { "icircumflex", 118},
- { "idieresis", 119},
- { "igrave", 117},
- { "infinity", 146},
- { "integral", 156},
- { "j", 77},
- { "k", 78},
- { "l", 79},
- { "less", 31},
- { "lessequal", 148},
- { "logicalnot", 164},
- { "lozenge", 185},
- { "lslash", 227},
- { "m", 80},
- { "macron", 218},
- { "minus", 239},
- { "mu", 151},
- { "multiply", 240},
- { "n", 81},
- { "nine", 28},
- { "nonbreakingspace", 172},
- { "nonmarkingreturn", 2},
- { "notequal", 143},
- { "ntilde", 120},
- { "numbersign", 6},
- { "o", 82},
- { "oacute", 121},
- { "ocircumflex", 123},
- { "odieresis", 124},
- { "oe", 177},
- { "ogonek", 224},
- { "ograve", 122},
- { "one", 20},
- { "onehalf", 244},
- { "onequarter", 245},
- { "onesuperior", 241},
- { "ordfeminine", 157},
- { "ordmasculine", 158},
- { "oslash", 161},
- { "otilde", 125},
- { "p", 83},
- { "paragraph", 136},
- { "parenleft", 11},
- { "parenright", 12},
- { "partialdiff", 152},
- { "percent", 8},
- { "period", 17},
- { "periodcentered", 195},
- { "perthousand", 198},
- { "pi", 155},
- { "plus", 14},
- { "plusminus", 147},
- { "product", 154},
- { "q", 84},
- { "question", 34},
- { "questiondown", 162},
- { "quotedbl", 5},
- { "quotedblbase", 197},
- { "quotedblleft", 180},
- { "quotedblright", 181},
- { "quoteleft", 182},
- { "quoteright", 183},
- { "quotesinglbase", 196},
- { "quotesingle", 10},
- { "r", 85},
- { "radical", 165},
- { "registered", 138},
- { "ring", 221},
- { "s", 86},
- { "scaron", 229},
- { "scedilla", 252},
- { "section", 134},
- { "semicolon", 30},
- { "seven", 26},
- { "six", 25},
- { "slash", 18},
- { "space", 3},
- { "sterling", 133},
- { "summation", 153},
- { "t", 87},
- { "thorn", 238},
- { "three", 22},
- { "threequarters", 246},
- { "threesuperior", 243},
- { "tilde", 217},
- { "trademark", 140},
- { "two", 21},
- { "twosuperior", 242},
- { "u", 88},
- { "uacute", 126},
- { "ucircumflex", 128},
- { "udieresis", 129},
- { "ugrave", 127},
- { "underscore", 66},
- { "v", 89},
- { "w", 90},
- { "x", 91},
- { "y", 92},
- { "yacute", 236},
- { "ydieresis", 186},
- { "yen", 150},
- { "z", 93},
- { "zcaron", 231},
- { "zero", 19},
- };
- static int
- find_macroman_string(const char *s)
- {
- int l, r, m;
- int comparison;
- l = 0;
- r = nelem(macroman);
- while (l <= r)
- {
- m = (l + r) >> 1;
- comparison = strcmp(s, macroman[m].charname);
- if (comparison < 0)
- r = m - 1;
- else if (comparison > 0)
- l = m + 1;
- else
- return macroman[m].idx;
- }
- return -1;
- }
- static size_t
- subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids)
- {
- int i, n, new_glyphs, old_strings, new_strings;
- int j;
- fz_int2_heap heap = { 0 };
- uint8_t *d0, *e, *p;
- if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
- n = get16(d);
- if ((uint32_t)n != ttf->orig_num_glyphs)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
- d0 = d;
- d += 2; len -= 2;
- e = d;
- p = d;
- /* Store all kept indexes. */
- if (len < (size_t)n*2)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
- old_strings = 0;
- new_strings = 0;
- new_glyphs = 0;
- j = 0;
- len -= (size_t)n*2;
- for (i = 0; i < n; i++)
- {
- uint16_t o = get16(d);
- fz_int2 i2;
- p += 2;
- if (o >= 258)
- old_strings++;
- /* We're only keeping gids we want. */
- /* Note we need to keep both the gids we were given by the caller, but also
- * those required as composites (in gid_renum, if we have it). */
- if (i != 0 && (j >= num_gids || gids[j] != i) && (ttf->gid_renum == NULL || ttf->gid_renum[i] == 0))
- {
- memmove(d, d + 2, (n - i - 1) * 2);
- continue;
- }
- if (j < num_gids && gids[j] == i)
- j++;
- d += 2;
- e += 2;
- /* We want this gid. */
- new_glyphs++;
- /* 257 or smaller: same as in the basic order, keep it as such. */
- if (o <= 257)
- continue;
- /* check if string is one of the macroman standard ones, and use its index if so. */
- {
- uint8_t *q = d0 + 2 + (size_t) n * 2;
- int k;
- char buf[257] = { 0 };
- int macidx;
- for (k = 0; k < o - 258; k++)
- q += 1 + *q;
- for (k = 0; k < *q; k++)
- buf[k] = *(q + 1 + k);
- macidx = find_macroman_string(buf);
- if (macidx >= 0)
- {
- put16(d - 2, macidx);
- continue;
- }
- }
- /* We want this gid, and it is a string. */
- new_strings++;
- /* Store the index. */
- i2.a = o - 258;
- i2.b = i;
- fz_int2_heap_insert(ctx, &heap, i2);
- /* Update string index value in table entry. */
- put16(d - 2, 257 + new_strings);
- }
- d = p;
- /* Update number of indexes */
- put16(d0, new_glyphs);
- fz_int2_heap_sort(ctx, &heap);
- /* So, the heap is sorted on i2.a (the string indexes we want to keep),
- * and i2.b is the gid that refers to that index. */
- /* Run through the list moving the strings down that we care about. */
- j = 0;
- n = old_strings;
- for (i = 0; i < n; i++)
- {
- uint8_t slen;
- if (len < 1)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
- slen = *d+1;
- if (len < slen)
- fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table");
- len -= slen;
- if (j >= heap.len || heap.heap[j].a != i)
- {
- /* Drop this one. */
- d += slen;
- continue;
- }
- memmove(e, d, slen);
- d += slen;
- e += slen;
- j++;
- }
- fz_free(ctx, heap.heap);
- return e - d0;
- }
- static void
- subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("post"), 0);
- uint8_t *d;
- size_t len;
- uint32_t fmt;
- if (t == NULL)
- return;
- d = t->data;
- len = t->len;
- if (len < 32)
- {
- fz_drop_buffer(ctx, t);
- fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table");
- }
- fmt = get32(d);
- if (fmt != 0x00020000)
- {
- /* Fmt 1: Nothing to be gained by having this table. The cmap should
- * have all the mappings anyway, and we'll have broken it by renumbering
- * the gids down anyway. */
- /* Fmt 2.5 deprecated. */
- /* Fmt 3 and 4: should not be used for PDF. */
- /* No other formats defined. */
- fz_drop_buffer(ctx, t);
- return;
- }
- d += 32; len -= 32;
- fz_try(ctx)
- len = subset_post2(ctx, ttf, d, len, gids, num_gids);
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, t);
- fz_rethrow(ctx);
- }
- t->len = 32 + len;
- add_table(ctx, ttf, TAG("post"), t);
- }
- static void
- subset_CFF(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids, int symbolic, int cidfont)
- {
- fz_buffer *t = read_table(ctx, stm, TAG("CFF "), 1);
- fz_buffer *sub = NULL;
- fz_var(sub);
- fz_try(ctx)
- sub = fz_subset_cff_for_gids(ctx, t, gids, num_gids, symbolic, cidfont);
- fz_always(ctx)
- fz_drop_buffer(ctx, t);
- fz_catch(ctx)
- fz_rethrow(ctx);
- add_table(ctx, ttf, TAG("CFF "), sub);
- }
- fz_buffer *
- fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont)
- {
- fz_stream *stm = fz_open_buffer(ctx, orig);
- ttf_t ttf = { 0 };
- fz_buffer *newbuf = NULL;
- fz_output *out = NULL;
- fz_var(newbuf);
- fz_var(out);
- fz_try(ctx)
- {
- ttf.is_otf = (fz_read_uint32_le(ctx, stm) == 0x4f54544f);
- ttf.symbolic = symbolic;
- /* Subset the name table. No other dependencies. */
- subset_name_table(ctx, &ttf, stm);
- if (!cidfont)
- {
- /* Load the encoding. Populates the encoding table from the cmap table
- * in the original. cmap table is then discarded. */
- load_encoding(ctx, &ttf, stm);
- /* Blank out the bits of the encoding we don't need. */
- reduce_encoding(ctx, &ttf, gids, num_gids);
- }
- /* Read maxp and store the table. Remember orig_num_glyphs. */
- read_maxp(ctx, &ttf, stm);
- /* Read head and store the table. Remember the loca index size. */
- read_head(ctx, &ttf, stm);
- if (ttf.is_otf)
- {
- subset_CFF(ctx, &ttf, stm, gids, num_gids, symbolic, cidfont);
- }
- /* Read loca and store it. Stash a pointer to the table for quick access. */
- if (!ttf.is_otf)
- {
- read_loca(ctx, &ttf, stm);
- /* Read the glyf data, and scan it for composites. This makes the gid_renum table,
- * subsets the glyf data, and rewrites the loca table. */
- read_glyf(ctx, &ttf, stm, gids, num_gids);
- }
- /* Read hhea and store it. Remember numOfLongHorMetrics. */
- read_hhea(ctx, &ttf, stm);
- /* Read and subset hmtx. */
- subset_hmtx(ctx, &ttf, stm);
- #ifdef DEBUG_SUBSETTING
- if (!cidfont)
- {
- encoding_t *enc = ttf.encoding;
- uint32_t i, n = enc->max;
- for (i = 0; i < n; i++)
- if (enc->gid[i])
- printf("cid %x '%c'-> orig gid %d -> gid %d\n", i, (char)i, enc->gid[i], ttf.gid_renum[enc->gid[i]]);
- }
- {
- uint32_t i;
- for (i = 0; i < ttf.orig_num_glyphs; i++)
- if (ttf.gid_renum[i])
- printf("gid %d -> %d\n", i, ttf.gid_renum[i]);
- for (i = 0; i <= ttf.new_num_glyphs; i++)
- printf("LOCA %d = %x\n", i, get_loca(ctx, &ttf, i));
- }
- #endif
- if (!ttf.is_otf)
- {
- shrink_loca_if_possible(ctx, &ttf);
- update_num_glyphs(ctx, &ttf);
- }
- if (!cidfont)
- {
- /* Now we can make the new cmap. */
- make_cmap(ctx, &ttf);
- }
- if (!cidfont)
- {
- /* subset the post table */
- subset_post(ctx, &ttf, stm, gids, num_gids);
- }
- copy_table(ctx, &ttf, stm, TAG("OS/2"), 0);
- copy_table(ctx, &ttf, stm, TAG("cvt "), 0);
- copy_table(ctx, &ttf, stm, TAG("fpgm"), 0);
- copy_table(ctx, &ttf, stm, TAG("prep"), 0);
- sort_tables(ctx, &ttf);
- checksum_tables(ctx, &ttf);
- newbuf = fz_new_buffer(ctx, 1024);
- out = fz_new_output_with_buffer(ctx, newbuf);
- write_tables(ctx, &ttf, out);
- fz_close_output(ctx, out);
- fix_checksum(ctx, newbuf);
- }
- fz_always(ctx)
- {
- int i;
- fz_drop_output(ctx, out);
- fz_drop_stream(ctx, stm);
- for (i = 0; i < ttf.len; i++)
- fz_drop_buffer(ctx, ttf.table[i].tab);
- fz_free(ctx, ttf.table);
- fz_free(ctx, ttf.gid_renum);
- fz_free(ctx, ttf.encoding);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, newbuf);
- fz_rethrow(ctx);
- }
- return newbuf;
- }
|