cmapshare.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/usr/bin/env python3
  2. # Find and extract common CMap subsets.
  3. # Taken flattened CMaps as input, using only the 'cidchar' sections.
  4. # The outputs are truncated; so use 'cmapflatten.py' to clean them up.
  5. import sys, os
  6. def load_cmap_set(filename):
  7. cmap = set()
  8. active = False
  9. for line in open(filename).readlines():
  10. line = line.strip()
  11. if line.endswith("endcidchar"): active = False
  12. if active: cmap.add(line)
  13. if line.endswith("begincidchar"): active = True
  14. return cmap
  15. def load_cmap_prologue(filename):
  16. prologue = []
  17. for line in open(filename).readlines():
  18. line = line.strip()
  19. if line.endswith("begincidchar"):
  20. break
  21. prologue.append(line)
  22. return prologue
  23. epilogue = [
  24. 'endcidchar',
  25. ]
  26. common_name = os.path.basename(sys.argv[1])
  27. # First find the common subset
  28. common = load_cmap_set(sys.argv[2])
  29. for f in sys.argv[3:]:
  30. common &= load_cmap_set(f)
  31. def print_cmap(filename, prologue, cmap):
  32. out = open(filename, "w")
  33. for line in prologue:
  34. if not line.endswith("usecmap"):
  35. print(line, file=out)
  36. if line == 'begincmap':
  37. print("/"+common_name, "usecmap", file=out)
  38. print(len(cmap), "begincidchar", file=out)
  39. for line in sorted(cmap):
  40. print(line, file=out)
  41. for line in epilogue:
  42. print(line, file=out)
  43. # Print common subset
  44. print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common)
  45. # Now find unique bits
  46. for f in sys.argv[2:]:
  47. cmap = load_cmap_set(f) - common
  48. prologue = load_cmap_prologue(f)
  49. print_cmap(f+".shared", prologue, cmap)