makesubset.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #!/usr/bin/env python3
  2. # Convert MES-2 (or WGL4) character set to list of glyphs for font subsetting.
  3. # Also add small-caps glyph names for small letters and ligatures.
  4. import sys
  5. glyphs = {}
  6. for line in open("scripts/glyphlist.txt").readlines():
  7. if len(line) > 0 and line[0] != '#':
  8. n, u = line.rstrip().split(';')
  9. if len(u) == 4:
  10. u = int(u, base=16)
  11. if u >= 0x0000 and u <= 0x001F: continue # control block 1
  12. if u >= 0x007F and u <= 0x009F: continue # control block 2
  13. if u >= 0x2500 and u <= 0x25FF: continue # Box Drawing, Block Elements, Geometric Shapes
  14. if u not in glyphs:
  15. glyphs[u] = [n]
  16. else:
  17. glyphs[u].append(n)
  18. # Ligatures are mapped to 'fi' and 'fl'; we also want them using the 'f_i' convention.
  19. table = {}
  20. do_small = False
  21. def load_table(fn):
  22. for line in open(fn).readlines():
  23. is_small = ('SMALL LETTER' in line) or ('SMALL LIGATURE' in line)
  24. u = int(line.split()[0], 16)
  25. if u in glyphs:
  26. for n in glyphs[u]:
  27. table[n] = u
  28. if do_small and is_small:
  29. table[n+'.sc'] = u
  30. if u >= 128:
  31. table['uni%04X'%u] = u
  32. if do_small and is_small:
  33. table['uni%04X.sc'%u] = u
  34. def load_ligs():
  35. table['ff'] = 0xFB00
  36. table['fi'] = 0xFB01
  37. table['fl'] = 0xFB02
  38. table['ffi'] = 0xFB03
  39. table['ffl'] = 0xFB04
  40. if do_small:
  41. table['f_f.sc'] = 0xFB00
  42. table['f_i.sc'] = 0xFB01
  43. table['f_l.sc'] = 0xFB02
  44. table['f_f_i.sc'] = 0xFB03
  45. table['f_f_l.sc'] = 0xFB04
  46. if len(sys.argv) < 2:
  47. print('usage: python3 scripts/makesubset.py scripts/MES-2.TXT', file=sys.stderr)
  48. else:
  49. for input in sys.argv[1:]:
  50. if input == '-sc':
  51. do_small = True
  52. elif input == '-lig':
  53. load_ligs()
  54. else:
  55. load_table(input)
  56. if len(sys.argv) > 2 and sys.argv[2] == '-scdump':
  57. smcp = []
  58. for n in list(table.keys()):
  59. u = table[n]
  60. if u > 0 and n.endswith('.sc') and not n.startswith('uni'):
  61. smcp.append('{0x%04X, "%s"},' % (u,n))
  62. smcp.sort()
  63. print('\n\t'.join(smcp))
  64. else:
  65. list = list(table.keys())
  66. list.sort()
  67. print(','.join(list))