| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- #!/usr/bin/env python3
- # Find and extract common CMap subsets.
- # Taken flattened CMaps as input, using only the 'cidchar' sections.
- # The outputs are truncated; so use 'cmapflatten.py' to clean them up.
- import sys, os
- def load_cmap_set(filename):
- cmap = set()
- active = False
- for line in open(filename).readlines():
- line = line.strip()
- if line.endswith("endcidchar"): active = False
- if active: cmap.add(line)
- if line.endswith("begincidchar"): active = True
- return cmap
- def load_cmap_prologue(filename):
- prologue = []
- for line in open(filename).readlines():
- line = line.strip()
- if line.endswith("begincidchar"):
- break
- prologue.append(line)
- return prologue
- epilogue = [
- 'endcidchar',
- ]
- common_name = os.path.basename(sys.argv[1])
- # First find the common subset
- common = load_cmap_set(sys.argv[2])
- for f in sys.argv[3:]:
- common &= load_cmap_set(f)
- def print_cmap(filename, prologue, cmap):
- out = open(filename, "w")
- for line in prologue:
- if not line.endswith("usecmap"):
- print(line, file=out)
- if line == 'begincmap':
- print("/"+common_name, "usecmap", file=out)
- print(len(cmap), "begincidchar", file=out)
- for line in sorted(cmap):
- print(line, file=out)
- for line in epilogue:
- print(line, file=out)
- # Print common subset
- print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common)
- # Now find unique bits
- for f in sys.argv[2:]:
- cmap = load_cmap_set(f) - common
- prologue = load_cmap_prologue(f)
- print_cmap(f+".shared", prologue, cmap)
|