gen_test_tab.php 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. <?php
  2. /* Generate lookup table from unicode.org mapping file (SHIFTJIS.TXT by default). */
  3. /*
  4. libzint - the open source barcode library
  5. Copyright (C) 2019-2022 Robin Stuart <rstuart114@gmail.com>
  6. */
  7. /* To create backend/tests/test_sjis_tab.h (from the project root directory):
  8. *
  9. * php backend/tests/tools/gen_test_tab.php
  10. *
  11. * To create backend/tests/test_gb2312_tab.h;
  12. *
  13. * php backend/tests/tools/gen_test_tab.php -f GB2312.TXT -s gb2312_tab
  14. *
  15. * To create backend/tests/test_gbk.h;
  16. *
  17. * php backend/tests/tools/gen_test_tab.php -f CP936.TXT -s gbk_tab
  18. *
  19. * To create backend/tests/test_gb18030_tab.h (note that backend/tests/tools/data/GB18030.TXT
  20. * will have to be downloaded first from https://haible.de/bruno/charsets/conversion-tables/GB18030.html
  21. * using the version jdk-1.4.2/GB18030.TXT):
  22. *
  23. * php backend/tests/tools/gen_test_tab.php -f GB18030.TXT -s gb18030_tab
  24. *
  25. * To create backend/tests/test_big5_tab.h;
  26. *
  27. * php backend/tests/tools/gen_test_tab.php -f BIG5.TXT -s big5_tab
  28. *
  29. * To create backend/tests/test_ksx1001_tab.h;
  30. *
  31. * php backend/tests/tools/gen_test_tab.php -f KSX1001.TXT -s ksx1001_tab
  32. *
  33. */
  34. $basename = basename(__FILE__);
  35. $dirname = dirname(__FILE__);
  36. $opts = getopt('d:f:o:s:');
  37. $data_dirname = isset($opts['d']) ? $opts['d'] : ($dirname . '/../../tools/data'); // Where to load file from.
  38. $file_name = isset($opts['f']) ? $opts['f'] : 'SHIFTJIS.TXT'; // Name of file.
  39. $out_dirname = isset($opts['o']) ? $opts['o'] : ($dirname . '/..'); // Where to put output.
  40. $suffix_name = isset($opts['s']) ? $opts['s'] : 'sjis_tab'; // Suffix of table and output file.
  41. $file = $data_dirname . '/' . $file_name;
  42. // Read the file.
  43. if (($get = file_get_contents($file)) === false) {
  44. error_log($error = "$basename: ERROR: Could not read mapping file \"$file\"");
  45. exit($error . PHP_EOL);
  46. }
  47. $lines = explode("\n", $get);
  48. // Parse the file.
  49. $tab_lines = array();
  50. $sort = array();
  51. foreach ($lines as $line) {
  52. $line = trim($line);
  53. if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) {
  54. continue;
  55. }
  56. if (preg_match('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{5})/', $line)) { // Exclude U+10000..10FFFF to save space
  57. continue;
  58. }
  59. $tab_lines[] = preg_replace_callback('/^0x([0-9A-F]{2,8})[ \t]+0x([0-9A-F]{4}).*$/', function ($matches) {
  60. global $sort;
  61. $mb = hexdec($matches[1]);
  62. $unicode = hexdec($matches[2]);
  63. $sort[] = $unicode;
  64. return sprintf(" 0x%04X, 0x%04X,", $mb, $unicode);
  65. }, $line);
  66. }
  67. array_multisort($sort, $tab_lines);
  68. // Output.
  69. $out = array();
  70. $out[] = '/* Generated by ' . $basename . ' from ' . $file_name . ' */';
  71. $out[] = 'static const unsigned int test_' . $suffix_name . '[] = {';
  72. $out = array_merge($out, $tab_lines);
  73. $out[] = '};';
  74. $out[] = '';
  75. $out[] = 'static const unsigned int test_' . $suffix_name . '_ind[] = {';
  76. $first = 0;
  77. foreach ($sort as $ind => $unicode) {
  78. $div = (int)($unicode / 0x400);
  79. while ($div >= $first) {
  80. $out[] = ' ' . ($ind * 2) . ',';
  81. $first++;
  82. }
  83. }
  84. $out[] = '};';
  85. file_put_contents($out_dirname . '/test_' . $suffix_name . '.h', implode("\n", $out) . "\n");
  86. /* vim: set ts=4 sw=4 et : */