| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- # Step 04 - generate Java literals.
- #
- # Java byte-code has severe restrictions. There is no such thing as
- # "array literal" - those are implemented as series of data[x] = y;
- # as a consequence N-byte array will use 7N bytes in class, plus N bytes
- # in instantiated variable. Also no literal could be longer than 64KiB.
- #
- # To keep dictionary data compact both in source code and in compiled format
- # we use the following tricks:
- # * use String as a data container
- # * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows
- # efficient conversion to byte array; also ASCII characters use only 1 byte
- #. of memory (UTF-8 encoding)
- # * RLE-compress sequence of 8-th bits
- #
- # This script generates literals used in Java code.
- try:
- unichr # Python 2
- except NameError:
- unichr = chr # Python 3
- bin_path = "dictionary.bin"
- with open(bin_path, "rb") as raw:
- data = raw.read()
- low = []
- hi = []
- is_skip = True
- skip_flip_offset = 36
- cntr = skip_flip_offset
- for b in data:
- value = ord(b)
- low.append(chr(value & 0x7F))
- if is_skip:
- if value < 0x80:
- cntr += 1
- else:
- is_skip = False
- hi.append(unichr(cntr))
- cntr = skip_flip_offset + 1
- else:
- if value >= 0x80:
- cntr += 1
- else:
- is_skip = True
- hi.append(unichr(cntr))
- cntr = skip_flip_offset + 1
- hi.append(unichr(cntr))
- low0 = low[0:len(low) // 2]
- low1 = low[len(low) // 2:len(low)]
- def escape(chars):
- result = []
- for c in chars:
- if "\r" == c:
- result.append("\\r")
- elif "\n" == c:
- result.append("\\n")
- elif "\t" == c:
- result.append("\\t")
- elif "\"" == c:
- result.append("\\\"")
- elif "\\" == c:
- result.append("\\\\")
- elif ord(c) < 32 or ord(c) >= 127:
- result.append("\\u%04X" % ord(c))
- else:
- result.append(c)
- return result
- source_code = [
- " private static final String DATA0 = \"", "".join(escape(low0)), "\";\n",
- " private static final String DATA1 = \"", "".join(escape(low1)), "\";\n",
- " private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n"
- ]
- src_path = "DictionaryData.inc.java"
- with open(src_path, "w") as source:
- source.write("".join(source_code))
|