| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- #!/usr/bin/env python3
- # Parse a CMap file and dump it as a C struct.
- import sys
- # Decode a subset of CMap syntax (only what is needed for our built-in resources)
- # We require that tokens are whitespace separated.
- def dumpcmap(filename):
- codespacerange = []
- usecmap = ""
- cmapname = ""
- wmode = 0
- map = {}
- def tocode(s):
- if s[0] == '<' and s[-1] == '>':
- return int(s[1:-1], 16)
- return int(s, 10)
- def map_cidchar(lo, v):
- map[lo] = v
- def map_cidrange(lo, hi, v):
- while lo <= hi:
- map[lo] = v
- lo = lo + 1
- v = v + 1
- def add_bf(lo, v):
- # Decode unicode surrogate pairs
- if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff:
- map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000
- elif len(v) == 1:
- map[lo] = v[0]
- elif len(v) <= 8:
- map[lo] = v[:]
- else:
- print("/* warning: too long one-to-many mapping: %s */" % (v))
- def map_bfchar(lo, bf):
- bf = bf[1:-1] # drop < >
- v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
- add_bf(lo, v)
- def map_bfrange(lo, hi, bf):
- bf = bf[1:-1] # drop < >
- v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
- while lo <= hi:
- add_bf(lo, v)
- lo = lo + 1
- v[-1] = v[-1] + 1
- current = None
- for line in open(filename, "r").readlines():
- if line[0] == '%':
- continue
- line = line.strip().split()
- if len(line) == 0:
- continue
- if line[0] == '/CMapName':
- cmapname = line[1][1:]
- elif line[0] == '/WMode':
- wmode = int(line[1])
- elif len(line) > 1 and line[1] == 'usecmap':
- usecmap = line[0][1:]
- elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
- elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
- elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange'
- elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
- elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar'
- elif line[0] == 'begincodespacerange': current = 'codespacerange'
- elif line[0] == 'begincidrange': current = 'cidrange'
- elif line[0] == 'beginbfrange': current = 'bfrange'
- elif line[0] == 'begincidchar': current = 'cidchar'
- elif line[0] == 'beginbfchar': current = 'bfchar'
- elif line[0].startswith("end"):
- current = None
- elif current == 'codespacerange' and len(line) == 2:
- n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
- codespacerange.append((n, a, b))
- elif current == 'cidrange' and len(line) == 3:
- a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
- map_cidrange(a, b, c)
- elif current == 'cidchar' and len(line) == 2:
- a, b = tocode(line[0]), tocode(line[1])
- map_cidchar(a, b)
- elif current == 'bfchar' and len(line) == 2:
- a, b = tocode(line[0]), line[1]
- map_bfchar(a, b)
- elif current == 'bfrange' and len(line) == 3:
- a, b, c = tocode(line[0]), tocode(line[1]), line[2]
- map_bfrange(a, b, c)
- # Create ranges
- ranges = []
- xranges = []
- mranges = []
- mdata = []
- out_lo = -100
- out_hi = -100
- out_v_lo = 0
- out_v_hi = 0
- def flush_range():
- if out_lo >= 0:
- if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff:
- xranges.append((out_lo, out_hi, out_v_lo))
- else:
- ranges.append((out_lo, out_hi, out_v_lo))
- keys = list(map.keys())
- keys.sort()
- for code in keys:
- v = map[code]
- if type(v) is not int:
- flush_range()
- out_lo = out_hi = -100
- mranges.append((code, len(mdata)))
- mdata.append(len(v))
- mdata.extend(v)
- else:
- if code != out_hi + 1 or v != out_v_hi + 1:
- flush_range()
- out_lo = out_hi = code
- out_v_lo = out_v_hi = v
- else:
- out_hi = out_hi + 1
- out_v_hi = out_v_hi + 1
- flush_range()
- # Print C file
- cname = cmapname.replace('-', '_')
- print()
- print("/*", cmapname, "*/")
- print()
- if len(ranges) > 0:
- print("static const pdf_range cmap_%s_ranges[] = {" % cname)
- for r in ranges:
- print("{0x%x,0x%x,0x%x}," % r)
- print("};")
- print()
- if len(xranges) > 0:
- print("static const pdf_xrange cmap_%s_xranges[] = {" % cname)
- for r in xranges:
- print("{0x%x,0x%x,0x%x}," % r)
- print("};")
- print()
- if len(mranges) > 0:
- print("static const pdf_mrange cmap_%s_mranges[] = {" % cname)
- for r in mranges:
- print("{0x%x,0x%x}," % r)
- print("};")
- print()
- print("static const int cmap_%s_table[] = {" % cname)
- n = mdata[0]
- i = 0
- for r in mdata:
- if i <= n:
- sys.stdout.write("0x%x," % r)
- i = i + 1
- else:
- sys.stdout.write("\n0x%x," % r)
- i = 1
- n = r
- sys.stdout.write("\n")
- print("};")
- print()
- print("static pdf_cmap cmap_%s = {" % cname)
- print("\t{ -1, pdf_drop_cmap_imp },")
- print("\t/* cmapname */ \"%s\"," % cmapname)
- print("\t/* usecmap */ \"%s\", NULL," % usecmap)
- print("\t/* wmode */ %d," % wmode)
- print("\t/* codespaces */ %d, {" % len(codespacerange))
- if len(codespacerange) > 0:
- for codespace in codespacerange:
- fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2)
- print(fmt % codespace)
- else:
- print("\t\t{ 0, 0, 0 },")
- print("\t},")
- if len(ranges) > 0:
- print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname))
- else:
- print("\t0, 0, NULL, /* ranges */")
- if len(xranges) > 0:
- print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname))
- else:
- print("\t0, 0, NULL, /* xranges */")
- if len(mranges) > 0:
- print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname))
- else:
- print("\t0, 0, NULL, /* mranges */")
- if len(mdata) > 0:
- print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname))
- else:
- print("\t0, 0, NULL, /* table */")
- print("\t0, 0, 0, NULL /* splay tree */")
- print("};")
- print("/* This is an automatically generated file. Do not edit. */")
- for arg in sys.argv[1:]:
- dumpcmap(arg)
|