cmapflatten.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #!/usr/bin/env python3
  2. # Parse a Uni* CMap file and flatten it.
  3. #
  4. # The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never
  5. # 'bfchar' or 'bfrange'.
  6. import sys
  7. def flattencmap(filename):
  8. codespacerange = []
  9. usecmap = ""
  10. cmapname = ""
  11. cmapversion = "1.0"
  12. csi_registry = "(Adobe)"
  13. csi_ordering = "(Unknown)"
  14. csi_supplement = 1
  15. wmode = 0
  16. map = {}
  17. def tocode(s):
  18. if s[0] == '<' and s[-1] == '>':
  19. return int(s[1:-1], 16)
  20. return int(s, 10)
  21. def map_cidchar(lo, v):
  22. map[lo] = v
  23. def map_cidrange(lo, hi, v):
  24. while lo <= hi:
  25. map[lo] = v
  26. lo = lo + 1
  27. v = v + 1
  28. current = None
  29. for line in open(filename, "r").readlines():
  30. if line[0] == '%':
  31. continue
  32. line = line.strip().split()
  33. if len(line) == 0:
  34. continue
  35. if line[0] == '/CMapVersion': cmapversion = line[1]
  36. elif line[0] == '/CMapName': cmapname = line[1][1:]
  37. elif line[0] == '/WMode': wmode = int(line[1])
  38. elif line[0] == '/Registry': csi_registry = line[1]
  39. elif line[0] == '/Ordering': csi_ordering = line[1]
  40. elif line[0] == '/Supplement': csi_supplement = line[1]
  41. elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:]
  42. elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
  43. elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
  44. elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
  45. elif line[0].startswith("end"):
  46. current = None
  47. elif current == 'codespacerange' and len(line) == 2:
  48. n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
  49. codespacerange.append((n, a, b))
  50. elif current == 'cidrange' and len(line) == 3:
  51. a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
  52. map_cidrange(a, b, c)
  53. elif current == 'cidchar' and len(line) == 2:
  54. a, b = tocode(line[0]), tocode(line[1])
  55. map_cidchar(a, b)
  56. # Print flattened CMap file
  57. print("%!PS-Adobe-3.0 Resource-CMap")
  58. print("%%DocumentNeededResources: procset (CIDInit)")
  59. print("%%IncludeResource: procset (CIDInit)")
  60. print("%%%%BeginResource: CMap (%s)" % cmapname)
  61. print("%%%%Version: %s" % cmapversion)
  62. print("%%EndComments")
  63. print("/CIDInit /ProcSet findresource begin")
  64. print("12 dict begin")
  65. print("begincmap")
  66. if usecmap: print("/%s usecmap" % usecmap)
  67. print("/CIDSystemInfo 3 dict dup begin")
  68. print(" /Registry %s def" % csi_registry)
  69. print(" /Ordering %s def" % csi_ordering)
  70. print(" /Supplement %s def" % csi_supplement)
  71. print("end def")
  72. print("/CMapName /%s def" % cmapname)
  73. print("/CMapVersion %s def" % cmapversion)
  74. print("/CMapType 1 def")
  75. print("/WMode %d def" % wmode)
  76. if len(codespacerange):
  77. print("%d begincodespacerange" % len(codespacerange))
  78. for r in codespacerange:
  79. fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2)
  80. print(fmt % (r[1], r[2]))
  81. print("endcodespacerange")
  82. keys = list(map.keys())
  83. keys.sort()
  84. print("%d begincidchar" % len(keys))
  85. for code in keys:
  86. v = map[code]
  87. print("<%04x> %d" % (code, v))
  88. print("endcidchar")
  89. print("endcmap")
  90. print("CMapName currentdict /CMap defineresource pop")
  91. print("end")
  92. print("end")
  93. print("%%EndResource")
  94. print("%%EOF")
  95. for arg in sys.argv[1:]:
  96. flattencmap(arg)