| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "mupdf/pdf.h"
- #include <assert.h>
- #include <ft2build.h>
- #include FT_FREETYPE_H
- #include FT_ADVANCES_H
- #ifdef FT_FONT_FORMATS_H
- #include FT_FONT_FORMATS_H
- #else
- #include FT_XFREE86_H
- #endif
- #include FT_TRUETYPE_TABLES_H
- #ifndef FT_SFNT_HEAD
- #define FT_SFNT_HEAD ft_sfnt_head
- #endif
- void
- pdf_load_encoding(const char **estrings, const char *encoding)
- {
- const char * const *bstrings = NULL;
- int i;
- if (!strcmp(encoding, "StandardEncoding"))
- bstrings = fz_glyph_name_from_adobe_standard;
- if (!strcmp(encoding, "MacRomanEncoding"))
- bstrings = fz_glyph_name_from_mac_roman;
- if (!strcmp(encoding, "MacExpertEncoding"))
- bstrings = fz_glyph_name_from_mac_expert;
- if (!strcmp(encoding, "WinAnsiEncoding"))
- bstrings = fz_glyph_name_from_win_ansi;
- if (bstrings)
- for (i = 0; i < 256; i++)
- estrings[i] = bstrings[i];
- }
- static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
- const char *collection, const char *basefont, int iscidfont);
- static const char *base_font_names[][10] =
- {
- { "Courier", "CourierNew", "CourierNewPSMT", NULL },
- { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
- "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
- { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
- "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
- { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
- "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
- { "Helvetica", "ArialMT", "Arial", NULL },
- { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
- "Helvetica,Bold", NULL },
- { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
- "Helvetica,Italic", "Helvetica-Italic", NULL },
- { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
- "Arial,BoldItalic", "Arial-BoldItalic",
- "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
- { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
- "TimesNewRomanPS", NULL },
- { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
- "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
- { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
- "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
- { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
- "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
- "TimesNewRoman-BoldItalic", NULL },
- { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
- "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
- { "ZapfDingbats", NULL }
- };
- const unsigned char *
- pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
- {
- if (mono) {
- if (bold) {
- if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
- else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
- } else {
- if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
- else return fz_lookup_base14_font(ctx, "Courier", len);
- }
- } else if (serif) {
- if (bold) {
- if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
- else return fz_lookup_base14_font(ctx, "Times-Bold", len);
- } else {
- if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
- else return fz_lookup_base14_font(ctx, "Times-Roman", len);
- }
- } else {
- if (bold) {
- if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
- else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
- } else {
- if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
- else return fz_lookup_base14_font(ctx, "Helvetica", len);
- }
- }
- }
- static int is_dynalab(char *name)
- {
- if (strstr(name, "HuaTian"))
- return 1;
- if (strstr(name, "MingLi"))
- return 1;
- if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
- return 1;
- if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
- return 1;
- return 0;
- }
- static int strcmp_ignore_space(const char *a, const char *b)
- {
- while (1)
- {
- while (*a == ' ')
- a++;
- while (*b == ' ')
- b++;
- if (*a != *b)
- return 1;
- if (*a == 0)
- return *a != *b;
- if (*b == 0)
- return *a != *b;
- a++;
- b++;
- }
- }
- const char *pdf_clean_font_name(const char *fontname)
- {
- int i, k;
- for (i = 0; i < (int)nelem(base_font_names); i++)
- for (k = 0; base_font_names[i][k]; k++)
- if (!strcmp_ignore_space(base_font_names[i][k], fontname))
- return base_font_names[i][0];
- return fontname;
- }
- /*
- * FreeType and Rendering glue
- */
- enum { UNKNOWN, TYPE1, TRUETYPE };
- static int ft_kind(fz_context *ctx, FT_Face face)
- {
- const char *kind;
- fz_ft_lock(ctx);
- #ifdef FT_FONT_FORMATS_H
- kind = FT_Get_Font_Format(face);
- #else
- kind = FT_Get_X11_Font_Format(face);
- #endif
- fz_ft_unlock(ctx);
- if (!strcmp(kind, "TrueType")) return TRUETYPE;
- if (!strcmp(kind, "Type 1")) return TYPE1;
- if (!strcmp(kind, "CFF")) return TYPE1;
- if (!strcmp(kind, "CID Type 1")) return TYPE1;
- return UNKNOWN;
- }
- static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
- {
- if (fontdesc->to_ttf_cmap)
- {
- cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
- /* vertical presentation forms */
- if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
- {
- switch (cid)
- {
- case 0x0021: cid = 0xFE15; break; /* ! */
- case 0x0028: cid = 0xFE35; break; /* ( */
- case 0x0029: cid = 0xFE36; break; /* ) */
- case 0x002C: cid = 0xFE10; break; /* , */
- case 0x003A: cid = 0xFE13; break; /* : */
- case 0x003B: cid = 0xFE14; break; /* ; */
- case 0x003F: cid = 0xFE16; break; /* ? */
- case 0x005B: cid = 0xFE47; break; /* [ */
- case 0x005D: cid = 0xFE48; break; /* ] */
- case 0x005F: cid = 0xFE33; break; /* _ */
- case 0x007B: cid = 0xFE37; break; /* { */
- case 0x007D: cid = 0xFE38; break; /* } */
- case 0x2013: cid = 0xFE32; break; /* EN DASH */
- case 0x2014: cid = 0xFE31; break; /* EM DASH */
- case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
- case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
- case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
- case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
- case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
- case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
- case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
- case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
- case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
- case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
- case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
- case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
- case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
- case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
- case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
- case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
- case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
- case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
- case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
- case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
- case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
- case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
- case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
- case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
- case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
- case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
- case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
- case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
- case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
- case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
- case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
- case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
- }
- }
- return ft_char_index(fontdesc->font->ft_face, cid);
- }
- if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
- return fontdesc->cid_to_gid[cid];
- return cid;
- }
- int
- pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
- {
- if (fontdesc->font->ft_face)
- {
- int gid;
- fz_ft_lock(ctx);
- gid = ft_cid_to_gid(fontdesc, cid);
- fz_ft_unlock(ctx);
- return gid;
- }
- return cid;
- }
- static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
- {
- int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
- int gid = ft_cid_to_gid(fontdesc, cid);
- FT_Fixed adv = 0;
- int fterr;
- FT_Face face = fontdesc->font->ft_face;
- FT_UShort units_per_EM;
- fterr = FT_Get_Advance(face, gid, mask, &adv);
- if (fterr && fterr != FT_Err_Invalid_Argument)
- fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
- units_per_EM = face->units_per_EM;
- if (units_per_EM == 0)
- units_per_EM = 2048;
- return adv * 1000 / units_per_EM;
- }
- static const struct { int code; const char *name; } mre_diff_table[] =
- {
- { 173, "notequal" },
- { 176, "infinity" },
- { 178, "lessequal" },
- { 179, "greaterequal" },
- { 182, "partialdiff" },
- { 183, "summation" },
- { 184, "product" },
- { 185, "pi" },
- { 186, "integral" },
- { 189, "Omega" },
- { 195, "radical" },
- { 197, "approxequal" },
- { 198, "Delta" },
- { 215, "lozenge" },
- { 219, "Euro" },
- { 240, "apple" },
- };
- static int lookup_mre_code(const char *name)
- {
- int i;
- for (i = 0; i < (int)nelem(mre_diff_table); ++i)
- if (!strcmp(name, mre_diff_table[i].name))
- return mre_diff_table[i].code;
- for (i = 0; i < 256; i++)
- if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
- return i;
- return -1;
- }
- static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
- {
- int unicode, glyph;
- /* Prefer exact unicode match if available. */
- unicode = fz_unicode_from_glyph_name_strict(name);
- if (unicode > 0)
- {
- glyph = ft_char_index(face, unicode);
- if (glyph > 0)
- return glyph;
- }
- /* Fall back to font glyph name if we can. */
- glyph = ft_name_index(face, name);
- if (glyph > 0)
- return glyph;
- /* Fuzzy unicode match as last attempt. */
- unicode = fz_unicode_from_glyph_name(name);
- if (unicode > 0)
- return ft_char_index(face, unicode);
- /* Failed. */
- return 0;
- }
- /*
- * Load font files.
- */
- static void
- pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
- {
- FT_Face face;
- const char *clean_name = pdf_clean_font_name(fontname);
- if (clean_name == fontname)
- clean_name = "Times-Roman";
- fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
- if (!fontdesc->font)
- {
- const unsigned char *data;
- int len;
- data = fz_lookup_base14_font(ctx, clean_name, &len);
- if (!data)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
- fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
- fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
- }
- if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
- fontdesc->flags |= PDF_FD_SYMBOLIC;
- face = fontdesc->font->ft_face;
- fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
- fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
- }
- static void
- pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
- {
- fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
- if (!fontdesc->font)
- {
- const unsigned char *data;
- int len;
- data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
- if (!data)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
- fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
- fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
- fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
- fontdesc->font->flags.is_mono = mono;
- fontdesc->font->flags.is_serif = serif;
- fontdesc->font->flags.is_bold = bold;
- fontdesc->font->flags.is_italic = italic;
- }
- fontdesc->font->flags.ft_substitute = 1;
- fontdesc->font->flags.ft_stretch = 1;
- }
- static void
- pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
- {
- fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
- if (!fontdesc->font)
- {
- const unsigned char *data;
- int size;
- int subfont;
- data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
- if (!data)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
- /* A glyph bbox cache is too big for CJK fonts. */
- fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
- }
- fontdesc->font->flags.ft_substitute = 1;
- fontdesc->font->flags.ft_stretch = 0;
- fontdesc->font->flags.cjk = 1;
- fontdesc->font->flags.cjk_lang = ros;
- }
- static struct { int ros, serif; const char *name; } known_cjk_fonts[] = {
- { FZ_ADOBE_GB, 1, "SimFang" },
- { FZ_ADOBE_GB, 0, "SimHei" },
- { FZ_ADOBE_GB, 1, "SimKai" },
- { FZ_ADOBE_GB, 1, "SimLi" },
- { FZ_ADOBE_GB, 1, "SimSun" },
- { FZ_ADOBE_GB, 1, "Song" },
- { FZ_ADOBE_CNS, 1, "MingLiU" },
- { FZ_ADOBE_JAPAN, 0, "Gothic" },
- { FZ_ADOBE_JAPAN, 1, "Mincho" },
- { FZ_ADOBE_KOREA, 1, "Batang" },
- { FZ_ADOBE_KOREA, 0, "Gulim" },
- { FZ_ADOBE_KOREA, 0, "Dotum" },
- };
- static int match_font_name(const char *s, const char *ref)
- {
- return !!strstr(s, ref);
- }
- static void
- pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
- {
- int bold = 0;
- int italic = 0;
- int serif = 0;
- int mono = 0;
- if (strstr(fontname, "Bold"))
- bold = 1;
- if (strstr(fontname, "Italic"))
- italic = 1;
- if (strstr(fontname, "Oblique"))
- italic = 1;
- if (fontdesc->flags & PDF_FD_FIXED_PITCH)
- mono = 1;
- if (fontdesc->flags & PDF_FD_SERIF)
- serif = 1;
- if (fontdesc->flags & PDF_FD_ITALIC)
- italic = 1;
- if (fontdesc->flags & PDF_FD_FORCE_BOLD)
- bold = 1;
- if (collection)
- {
- if (!strcmp(collection, "Adobe-CNS1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
- else if (!strcmp(collection, "Adobe-GB1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
- else if (!strcmp(collection, "Adobe-Japan1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
- else if (!strcmp(collection, "Adobe-Korea1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
- else
- {
- size_t i;
- if (strcmp(collection, "Adobe-Identity") != 0)
- fz_warn(ctx, "unknown cid collection: %s", collection);
- // Recognize common CJK fonts when using Identity or other non-CJK CMap
- for (i = 0; i < nelem(known_cjk_fonts); ++i)
- {
- if (match_font_name(fontname, known_cjk_fonts[i].name))
- {
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname,
- known_cjk_fonts[i].ros, known_cjk_fonts[i].serif);
- return;
- }
- }
- pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
- }
- }
- else
- {
- pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
- }
- }
- #define TTF_U16(p) ((uint16_t) ((p)[0]<<8) | ((p)[1]))
- #define TTF_U32(p) ((uint32_t) ((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | ((p)[3]))
- static fz_buffer *
- pdf_extract_cff_subtable(fz_context *ctx, unsigned char *data, size_t size)
- {
- size_t num_tables = TTF_U16(data + 4);
- size_t i;
- if (12 + num_tables * 16 > size)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF header");
- for (i = 0; i < num_tables; ++i)
- {
- unsigned char *record = data + 12 + i * 16;
- if (!memcmp("CFF ", record, 4))
- {
- uint64_t offset = TTF_U32(record + 8);
- uint64_t length = TTF_U32(record + 12);
- uint64_t end = offset + length;
- if (end > size)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF subtable offset/length");
- return fz_new_buffer_from_copied_data(ctx, data + offset, length);
- }
- }
- return NULL;
- }
- static void
- pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
- {
- fz_buffer *buf;
- unsigned char *data;
- size_t size;
- fz_var(buf);
- buf = pdf_load_stream(ctx, stmref);
- fz_try(ctx)
- {
- /* Extract CFF subtable for OpenType fonts: */
- size = fz_buffer_storage(ctx, buf, &data);
- if (size > 12) {
- if (!memcmp("OTTO", data, 4)) {
- fz_buffer *cff = pdf_extract_cff_subtable(ctx, data, size);
- if (cff)
- {
- fz_drop_buffer(ctx, buf);
- buf = cff;
- }
- }
- }
- fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
- }
- fz_always(ctx)
- fz_drop_buffer(ctx, buf);
- fz_catch(ctx)
- fz_rethrow(ctx);
- fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
- fontdesc->is_embedded = 1;
- }
- /*
- * Create and destroy
- */
- pdf_font_desc *
- pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
- {
- return fz_keep_storable(ctx, &fontdesc->storable);
- }
- void
- pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
- {
- fz_drop_storable(ctx, &fontdesc->storable);
- }
- static int
- pdf_font_is_droppable(fz_context *ctx, fz_storable *fontdesc)
- {
- /* If we aren't holding the FT lock, then we can drop. */
- return !fz_ft_lock_held(ctx);
- }
- static void
- pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
- {
- pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
- fz_drop_font(ctx, fontdesc->font);
- pdf_drop_cmap(ctx, fontdesc->encoding);
- pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
- pdf_drop_cmap(ctx, fontdesc->to_unicode);
- fz_free(ctx, fontdesc->cid_to_gid);
- fz_free(ctx, fontdesc->cid_to_ucs);
- fz_free(ctx, fontdesc->hmtx);
- fz_free(ctx, fontdesc->vmtx);
- fz_free(ctx, fontdesc);
- }
- pdf_font_desc *
- pdf_new_font_desc(fz_context *ctx)
- {
- pdf_font_desc *fontdesc;
- fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
- FZ_INIT_AWKWARD_STORABLE(fontdesc, 1, pdf_drop_font_imp, pdf_font_is_droppable);
- fontdesc->size = sizeof(pdf_font_desc);
- fontdesc->font = NULL;
- fontdesc->flags = 0;
- fontdesc->italic_angle = 0;
- fontdesc->ascent = 800;
- fontdesc->descent = -200;
- fontdesc->cap_height = 800;
- fontdesc->x_height = 500;
- fontdesc->missing_width = 0;
- fontdesc->encoding = NULL;
- fontdesc->to_ttf_cmap = NULL;
- fontdesc->cid_to_gid_len = 0;
- fontdesc->cid_to_gid = NULL;
- fontdesc->to_unicode = NULL;
- fontdesc->cid_to_ucs_len = 0;
- fontdesc->cid_to_ucs = NULL;
- fontdesc->wmode = 0;
- fontdesc->hmtx_cap = 0;
- fontdesc->vmtx_cap = 0;
- fontdesc->hmtx_len = 0;
- fontdesc->vmtx_len = 0;
- fontdesc->hmtx = NULL;
- fontdesc->vmtx = NULL;
- fontdesc->dhmtx.lo = 0x0000;
- fontdesc->dhmtx.hi = 0xFFFF;
- fontdesc->dhmtx.w = 1000;
- fontdesc->dvmtx.lo = 0x0000;
- fontdesc->dvmtx.hi = 0xFFFF;
- fontdesc->dvmtx.x = 0;
- fontdesc->dvmtx.y = 880;
- fontdesc->dvmtx.w = -1000;
- fontdesc->is_embedded = 0;
- return fontdesc;
- }
- /*
- * Simple fonts (Type1 and TrueType)
- */
- static FT_CharMap
- select_type1_cmap(FT_Face face)
- {
- int i;
- for (i = 0; i < face->num_charmaps; i++)
- if (face->charmaps[i]->platform_id == 7)
- return face->charmaps[i];
- if (face->num_charmaps > 0)
- return face->charmaps[0];
- return NULL;
- }
- static FT_CharMap
- select_truetype_cmap(fz_context *ctx, FT_Face face, int symbolic)
- {
- int i;
- /* First look for a Microsoft symbolic cmap, if applicable */
- if (symbolic)
- {
- for (i = 0; i < face->num_charmaps; i++)
- if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
- return face->charmaps[i];
- }
- fz_ft_lock(ctx);
- /* Then look for a Microsoft Unicode cmap */
- for (i = 0; i < face->num_charmaps; i++)
- if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
- if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
- {
- fz_ft_unlock(ctx);
- return face->charmaps[i];
- }
- /* Finally look for an Apple MacRoman cmap */
- for (i = 0; i < face->num_charmaps; i++)
- if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
- if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
- {
- fz_ft_unlock(ctx);
- return face->charmaps[i];
- }
- if (face->num_charmaps > 0)
- if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
- {
- fz_ft_unlock(ctx);
- return face->charmaps[0];
- }
- fz_ft_unlock(ctx);
- return NULL;
- }
- static FT_CharMap
- select_unknown_cmap(FT_Face face)
- {
- if (face->num_charmaps > 0)
- return face->charmaps[0];
- return NULL;
- }
- static int use_s22pdf_workaround(fz_context *ctx, pdf_obj *dict, pdf_obj *descriptor)
- {
- if (descriptor)
- {
- if (pdf_dict_get(ctx, dict, PDF_NAME(Encoding)) != PDF_NAME(WinAnsiEncoding))
- return 0;
- if (pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) != 4)
- return 0;
- return 1;
- }
- return 0;
- }
- static pdf_font_desc *
- pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
- {
- const char *basefont;
- pdf_obj *descriptor;
- pdf_obj *encoding;
- pdf_obj *widths;
- unsigned short *etable = NULL;
- pdf_font_desc *fontdesc = NULL;
- pdf_obj *subtype;
- FT_Face face;
- FT_CharMap cmap;
- int symbolic;
- int kind;
- int glyph;
- const char *estrings[256];
- char ebuffer[256][32];
- int i, k, n;
- int fterr;
- int has_lock = 0;
- fz_var(fontdesc);
- fz_var(etable);
- fz_var(has_lock);
- /* Load font file */
- fz_try(ctx)
- {
- fontdesc = pdf_new_font_desc(ctx);
- basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
- descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
- if (descriptor)
- pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
- else
- pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
- /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
- if (use_s22pdf_workaround(ctx, dict, descriptor))
- {
- char *cp936fonts[] = {
- "\xCB\xCE\xCC\xE5", "SimSun,Regular",
- "\xBA\xDA\xCC\xE5", "SimHei,Regular",
- "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
- "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
- "\xC1\xA5\xCA\xE9", "SimLi,Regular",
- NULL
- };
- for (i = 0; cp936fonts[i]; i += 2)
- if (!strcmp(basefont, cp936fonts[i]))
- break;
- if (cp936fonts[i])
- {
- fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
- pdf_drop_font(ctx, fontdesc);
- fontdesc = NULL;
- fontdesc = pdf_new_font_desc(ctx);
- pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
- fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
- fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
- goto skip_encoding;
- }
- }
- face = fontdesc->font->ft_face;
- kind = ft_kind(ctx, face);
- /* Encoding */
- symbolic = fontdesc->flags & 4;
- /* Bug 703273: If non-symbolic, we're not symbolic. */
- if (fontdesc->flags & 32)
- symbolic = 0;
- if (kind == TYPE1)
- cmap = select_type1_cmap(face);
- else if (kind == TRUETYPE)
- cmap = select_truetype_cmap(ctx, face, symbolic);
- else
- cmap = select_unknown_cmap(face);
- if (cmap)
- {
- fz_ft_lock(ctx);
- fterr = FT_Set_Charmap(face, cmap);
- fz_ft_unlock(ctx);
- if (fterr)
- fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
- }
- else
- fz_warn(ctx, "freetype could not find any cmaps");
- /* FIXME: etable may leak on error. */
- etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid");
- fontdesc->size += 256 * sizeof(unsigned short);
- for (i = 0; i < 256; i++)
- {
- estrings[i] = NULL;
- etable[i] = 0;
- }
- encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
- if (encoding)
- {
- if (pdf_is_name(ctx, encoding))
- pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
- if (pdf_is_dict(ctx, encoding))
- {
- pdf_obj *base, *diff, *item;
- base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
- if (pdf_is_name(ctx, base))
- pdf_load_encoding(estrings, pdf_to_name(ctx, base));
- else if (!fontdesc->is_embedded && !symbolic)
- pdf_load_encoding(estrings, "StandardEncoding");
- diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
- if (pdf_is_array(ctx, diff))
- {
- n = pdf_array_len(ctx, diff);
- k = 0;
- for (i = 0; i < n; i++)
- {
- item = pdf_array_get(ctx, diff, i);
- if (pdf_is_int(ctx, item))
- k = pdf_to_int(ctx, item);
- if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings))
- estrings[k++] = pdf_to_name(ctx, item);
- }
- }
- }
- }
- else if (!fontdesc->is_embedded && !symbolic)
- pdf_load_encoding(estrings, "StandardEncoding");
- fz_ft_lock(ctx);
- has_lock = 1;
- /* start with the builtin encoding */
- for (i = 0; i < 256; i++)
- etable[i] = ft_char_index(face, i);
- /* built-in and substitute fonts may be a different type than what the document expects */
- subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
- if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
- kind = TYPE1;
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
- kind = TYPE1;
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
- kind = TRUETYPE;
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
- kind = TYPE1;
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
- kind = TRUETYPE;
- /* encode by glyph name where we can */
- if (kind == TYPE1)
- {
- for (i = 0; i < 256; i++)
- {
- if (estrings[i])
- {
- glyph = ft_name_index(face, estrings[i]);
- if (glyph > 0)
- etable[i] = glyph;
- }
- }
- }
- /* encode by glyph name where we can */
- if (kind == TRUETYPE)
- {
- /* Unicode cmap */
- if (!symbolic && face->charmap && face->charmap->platform_id == 3)
- {
- for (i = 0; i < 256; i++)
- {
- if (estrings[i])
- {
- glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
- if (glyph > 0)
- etable[i] = glyph;
- }
- }
- }
- /* MacRoman cmap */
- else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
- {
- for (i = 0; i < 256; i++)
- {
- if (estrings[i])
- {
- int mrcode = lookup_mre_code(estrings[i]);
- glyph = 0;
- if (mrcode > 0)
- glyph = ft_char_index(face, mrcode);
- if (glyph == 0)
- glyph = ft_name_index(face, estrings[i]);
- if (glyph > 0)
- etable[i] = glyph;
- }
- }
- }
- /* Symbolic cmap */
- else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
- {
- for (i = 0; i < 256; i++)
- {
- if (estrings[i])
- {
- glyph = ft_name_index(face, estrings[i]);
- if (glyph > 0)
- etable[i] = glyph;
- }
- }
- }
- }
- /* try to reverse the glyph names from the builtin encoding */
- for (i = 0; i < 256; i++)
- {
- if (etable[i] && !estrings[i])
- {
- if (FT_HAS_GLYPH_NAMES(face))
- {
- fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
- if (fterr)
- fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
- if (ebuffer[i][0])
- estrings[i] = ebuffer[i];
- }
- else
- {
- estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
- }
- }
- }
- /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
- if (kind == TYPE1 && symbolic)
- {
- for (i = 0; i < 256; i++)
- if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
- estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
- }
- fz_ft_unlock(ctx);
- has_lock = 0;
- fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
- fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
- fontdesc->cid_to_gid_len = 256;
- fontdesc->cid_to_gid = etable;
- fz_try(ctx)
- {
- pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
- }
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- fz_warn(ctx, "cannot load ToUnicode CMap");
- }
- skip_encoding:
- /* Widths */
- pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
- widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
- if (widths)
- {
- int first, last;
- first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
- last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
- if (first < 0 || last > 255 || first > last)
- first = last = 0;
- for (i = 0; i < last - first + 1; i++)
- {
- int wid = pdf_array_get_int(ctx, widths, i);
- pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
- }
- }
- else
- {
- fz_ft_lock(ctx);
- has_lock = 1;
- for (i = 0; i < 256; i++)
- pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
- fz_ft_unlock(ctx);
- has_lock = 0;
- }
- pdf_end_hmtx(ctx, fontdesc);
- }
- fz_catch(ctx)
- {
- if (has_lock)
- fz_ft_unlock(ctx);
- if (fontdesc && etable != fontdesc->cid_to_gid)
- fz_free(ctx, etable);
- pdf_drop_font(ctx, fontdesc);
- fz_rethrow(ctx);
- }
- return fontdesc;
- }
- static int
- hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
- {
- hash->u.pi.i = 0;
- hash->u.pi.ptr = NULL;
- return 1;
- }
- static void *
- hail_mary_keep_key(fz_context *ctx, void *key)
- {
- return key;
- }
- static void
- hail_mary_drop_key(fz_context *ctx, void *key)
- {
- }
- static int
- hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
- {
- return k0 == k1;
- }
- static void
- hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_)
- {
- fz_strlcpy(s, "(hail mary font)", n);
- }
- static int hail_mary_store_key; /* Dummy */
- static const fz_store_type hail_mary_store_type =
- {
- "hail-mary",
- hail_mary_make_hash_key,
- hail_mary_keep_key,
- hail_mary_drop_key,
- hail_mary_cmp_key,
- hail_mary_format_key,
- NULL
- };
- pdf_font_desc *
- pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
- {
- pdf_font_desc *fontdesc;
- pdf_font_desc *existing;
- if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
- {
- return fontdesc;
- }
- /* FIXME: Get someone with a clue about fonts to fix this */
- fontdesc = pdf_load_simple_font(ctx, doc, NULL);
- existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
- assert(existing == NULL);
- (void)existing; /* Silence warning in release builds */
- return fontdesc;
- }
- /*
- * CID Fonts
- */
- static pdf_font_desc *
- load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
- {
- pdf_obj *widths;
- pdf_obj *descriptor;
- pdf_font_desc *fontdesc = NULL;
- fz_buffer *buf = NULL;
- pdf_cmap *cmap;
- FT_Face face;
- char collection[256];
- const char *basefont;
- int i, k, fterr;
- pdf_obj *cidtogidmap;
- pdf_obj *obj;
- int dw;
- fz_var(fontdesc);
- fz_var(buf);
- fz_try(ctx)
- {
- /* Get font name and CID collection */
- basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
- {
- pdf_obj *cidinfo;
- const char *reg, *ord;
- cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
- if (cidinfo)
- {
- reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
- ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
- fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
- }
- else
- {
- fz_warn(ctx, "CIDFont is missing CIDSystemInfo dictionary; assuming Adobe-Identity");
- fz_strlcpy(collection, "Adobe-Identity", sizeof collection);
- }
- }
- /* Encoding */
- if (pdf_is_name(ctx, encoding))
- {
- cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
- }
- else if (pdf_is_indirect(ctx, encoding))
- {
- cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
- }
- else
- {
- fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
- }
- /* Load font file */
- fontdesc = pdf_new_font_desc(ctx);
- fontdesc->encoding = cmap;
- fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
- pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
- descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
- if (!descriptor)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
- pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
- face = fontdesc->font->ft_face;
- /* Apply encoding */
- cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
- if (pdf_is_stream(ctx, cidtogidmap))
- {
- size_t z, len;
- unsigned char *data;
- buf = pdf_load_stream(ctx, cidtogidmap);
- len = fz_buffer_storage(ctx, buf, &data);
- fontdesc->cid_to_gid_len = len / 2;
- fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map");
- fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
- for (z = 0; z < fontdesc->cid_to_gid_len; z++)
- fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
- }
- else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
- {
- fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
- }
- /* if font is external, cidtogidmap should not be identity */
- /* so we map from cid to unicode and then map that through the (3 1) */
- /* unicode cmap to get a glyph id */
- else if (fontdesc->font->flags.ft_substitute)
- {
- fz_ft_lock(ctx);
- fterr = FT_Select_Charmap(face, ft_encoding_unicode);
- fz_ft_unlock(ctx);
- if (fterr)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
- if (!strcmp(collection, "Adobe-CNS1"))
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
- else if (!strcmp(collection, "Adobe-GB1"))
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
- else if (!strcmp(collection, "Adobe-Japan1"))
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
- else if (!strcmp(collection, "Adobe-Japan2"))
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
- else if (!strcmp(collection, "Adobe-Korea1"))
- fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
- }
- pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
- /* If we have an identity encoding, we're supposed to use the glyph ids directly.
- * If we only have a substitute font, that won't work.
- * Make a last ditch attempt by using
- * the ToUnicode table if it exists to map via the substitute font's cmap. */
- if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
- {
- if (!fontdesc->to_ttf_cmap)
- {
- if (fontdesc->to_unicode)
- {
- // Use ToUnicode from PDF file if possible.
- fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
- }
- else
- {
- // Attempt a generic ToUnicode (default MacRoman ordering for TrueType)
- fontdesc->to_ttf_cmap = pdf_load_builtin_cmap(ctx, "TrueType-UCS2");
- }
- }
- if (fontdesc->to_ttf_cmap)
- {
- fz_warn(ctx, "non-embedded font using identity encoding: %s (mapping via %s)", basefont, fontdesc->to_ttf_cmap->cmap_name);
- if (!fontdesc->to_unicode)
- fontdesc->to_unicode = pdf_keep_cmap(ctx, fontdesc->to_ttf_cmap);
- }
- else
- fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
- }
- /* Horizontal */
- dw = pdf_dict_get_int_default(ctx, dict, PDF_NAME(DW), 1000);
- pdf_set_default_hmtx(ctx, fontdesc, dw);
- widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
- if (widths)
- {
- int c0, c1, w, n, m;
- n = pdf_array_len(ctx, widths);
- for (i = 0; i < n; )
- {
- c0 = pdf_array_get_int(ctx, widths, i);
- obj = pdf_array_get(ctx, widths, i + 1);
- if (pdf_is_array(ctx, obj))
- {
- m = pdf_array_len(ctx, obj);
- for (k = 0; k < m; k++)
- {
- w = pdf_array_get_int(ctx, obj, k);
- pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
- }
- i += 2;
- }
- else
- {
- c1 = pdf_to_int(ctx, obj);
- w = pdf_array_get_int(ctx, widths, i + 2);
- pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
- i += 3;
- }
- }
- }
- pdf_end_hmtx(ctx, fontdesc);
- /* Vertical */
- if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
- {
- int dw2y = 880;
- int dw2w = -1000;
- obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
- if (obj)
- {
- dw2y = pdf_array_get_int(ctx, obj, 0);
- dw2w = pdf_array_get_int(ctx, obj, 1);
- }
- pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
- widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
- if (widths)
- {
- int c0, c1, w, x, y, n;
- n = pdf_array_len(ctx, widths);
- for (i = 0; i < n; )
- {
- c0 = pdf_array_get_int(ctx, widths, i);
- obj = pdf_array_get(ctx, widths, i + 1);
- if (pdf_is_array(ctx, obj))
- {
- int m = pdf_array_len(ctx, obj);
- for (k = 0; k * 3 < m; k ++)
- {
- w = pdf_array_get_int(ctx, obj, k * 3 + 0);
- x = pdf_array_get_int(ctx, obj, k * 3 + 1);
- y = pdf_array_get_int(ctx, obj, k * 3 + 2);
- pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
- }
- i += 2;
- }
- else
- {
- c1 = pdf_to_int(ctx, obj);
- w = pdf_array_get_int(ctx, widths, i + 2);
- x = pdf_array_get_int(ctx, widths, i + 3);
- y = pdf_array_get_int(ctx, widths, i + 4);
- pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
- i += 5;
- }
- }
- }
- pdf_end_vmtx(ctx, fontdesc);
- }
- }
- fz_always(ctx)
- fz_drop_buffer(ctx, buf);
- fz_catch(ctx)
- {
- pdf_drop_font(ctx, fontdesc);
- fz_rethrow(ctx);
- }
- return fontdesc;
- }
- static pdf_font_desc *
- pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
- {
- pdf_obj *dfonts;
- pdf_obj *dfont;
- pdf_obj *subtype;
- pdf_obj *encoding;
- pdf_obj *to_unicode;
- dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
- if (!dfonts)
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
- dfont = pdf_array_get(ctx, dfonts, 0);
- subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
- encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
- to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
- if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
- return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
- if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
- return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
- fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
- }
- /*
- * FontDescriptor
- */
- static void
- pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
- const char *collection, const char *basefont, int iscidfont)
- {
- pdf_obj *obj1, *obj2, *obj3, *obj;
- const char *fontname;
- FT_Face face;
- /* Prefer BaseFont; don't bother with FontName */
- fontname = basefont;
- fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
- fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
- /* fontdesc->ascent and descent have already been set to sane defaults */
- fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
- fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
- fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
- obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
- obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
- obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
- obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
- if (pdf_is_indirect(ctx, obj))
- {
- fz_try(ctx)
- {
- pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
- }
- fz_catch(ctx)
- {
- fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
- fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
- fz_report_error(ctx);
- fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
- if (!iscidfont && fontname != pdf_clean_font_name(fontname))
- pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
- else
- pdf_load_system_font(ctx, fontdesc, fontname, collection);
- }
- }
- else
- {
- if (!iscidfont && fontname != pdf_clean_font_name(fontname))
- pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
- else
- pdf_load_system_font(ctx, fontdesc, fontname, collection);
- }
- /* Check for DynaLab fonts that must use hinting */
- face = fontdesc->font->ft_face;
- if (ft_kind(ctx, face) == TRUETYPE)
- {
- /* FreeType's own 'tricky' font detection needs a bit of help */
- if (is_dynalab(fontdesc->font->name))
- face->face_flags |= FT_FACE_FLAG_TRICKY;
- fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
- fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
- }
- /* Prefer FontDescriptor Ascent/Descent values to embedded font's */
- fontdesc->ascent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Ascent), fontdesc->ascent);
- fontdesc->descent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Descent), fontdesc->descent);
- /* Allow for naughty producers that give us a positive descent. */
- if (fontdesc->descent > 0)
- fontdesc->descent = -fontdesc->descent;
- if (fontdesc->ascent <= 0 || fontdesc->ascent > FZ_MAX_TRUSTWORTHY_ASCENT * 1000 ||
- fontdesc->descent < FZ_MAX_TRUSTWORTHY_DESCENT * 1000)
- {
- fz_warn(ctx, "bogus font ascent/descent values (%g / %g)", fontdesc->ascent, fontdesc->descent);
- fontdesc->font->ascender = 0.8f;
- fontdesc->font->descender = -0.2f;
- fontdesc->font->ascdesc_src = FZ_ASCDESC_DEFAULT;
- }
- else
- {
- fontdesc->font->ascender = fontdesc->ascent / 1000.0f;
- fontdesc->font->descender = fontdesc->descent / 1000.0f;
- fontdesc->font->ascdesc_src = FZ_ASCDESC_FROM_FONT;
- }
- }
- static void
- pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
- {
- fz_font *font = fontdesc->font;
- int i, k, n, cid, gid;
- n = 0;
- for (i = 0; i < fontdesc->hmtx_len; i++)
- {
- for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
- {
- cid = pdf_lookup_cmap(fontdesc->encoding, k);
- gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
- if (gid > n)
- n = gid;
- }
- }
- font->width_count = n + 1;
- font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths");
- fontdesc->size += font->width_count * sizeof(short);
- font->width_default = fontdesc->dhmtx.w;
- for (i = 0; i < font->width_count; i++)
- font->width_table[i] = -1;
- for (i = 0; i < fontdesc->hmtx_len; i++)
- {
- for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
- {
- cid = pdf_lookup_cmap(fontdesc->encoding, k);
- gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
- if (gid >= 0 && gid < font->width_count)
- font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
- }
- }
- for (i = 0; i < font->width_count; i++)
- if (font->width_table[i] == -1)
- font->width_table[i] = font->width_default;
- }
- pdf_font_desc *
- pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
- {
- pdf_obj *subtype;
- pdf_obj *dfonts;
- pdf_obj *charprocs;
- pdf_font_desc *fontdesc = NULL;
- int type3 = 0;
- if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
- {
- if (fontdesc->t3loading)
- {
- pdf_drop_font(ctx, fontdesc);
- fz_throw(ctx, FZ_ERROR_SYNTAX, "recursive type3 font");
- }
- return fontdesc;
- }
- subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
- dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
- charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
- if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
- fontdesc = pdf_load_type0_font(ctx, doc, dict);
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
- fontdesc = pdf_load_simple_font(ctx, doc, dict);
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
- fontdesc = pdf_load_simple_font(ctx, doc, dict);
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
- fontdesc = pdf_load_simple_font(ctx, doc, dict);
- else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
- {
- fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
- type3 = 1;
- }
- else if (charprocs)
- {
- fz_warn(ctx, "unknown font format, guessing type3.");
- fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
- type3 = 1;
- }
- else if (dfonts)
- {
- fz_warn(ctx, "unknown font format, guessing type0.");
- fontdesc = pdf_load_type0_font(ctx, doc, dict);
- }
- else
- {
- fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
- fontdesc = pdf_load_simple_font(ctx, doc, dict);
- }
- fz_try(ctx)
- {
- /* Create glyph width table for stretching substitute fonts and text extraction. */
- pdf_make_width_table(ctx, fontdesc);
- pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
- /* Load CharProcs */
- if (type3)
- {
- fontdesc->t3loading = 1;
- fz_try(ctx)
- pdf_load_type3_glyphs(ctx, doc, fontdesc);
- fz_always(ctx)
- fontdesc->t3loading = 0;
- fz_catch(ctx)
- {
- pdf_remove_item(ctx, fontdesc->storable.drop, dict);
- fz_rethrow(ctx);
- }
- }
- }
- fz_catch(ctx)
- {
- pdf_drop_font(ctx, fontdesc);
- fz_rethrow(ctx);
- }
- return fontdesc;
- }
- void
- pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
- {
- int i;
- fz_write_printf(ctx, out, "fontdesc {\n");
- if (fontdesc->font->ft_face)
- fz_write_printf(ctx, out, "\tfreetype font\n");
- if (fontdesc->font->t3procs)
- fz_write_printf(ctx, out, "\ttype3 font\n");
- fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
- fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
- fz_write_printf(ctx, out, "\tW {\n");
- for (i = 0; i < fontdesc->hmtx_len; i++)
- fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
- fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
- fz_write_printf(ctx, out, "\t}\n");
- if (fontdesc->wmode)
- {
- fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
- fz_write_printf(ctx, out, "\tW2 {\n");
- for (i = 0; i < fontdesc->vmtx_len; i++)
- fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
- fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
- fz_write_printf(ctx, out, "\t}\n");
- }
- }
|