encodings.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. #include <string.h>
  25. #include <stdlib.h>
  26. #include "encodings.h"
  27. #include "glyphlist.h"
  28. #include "smallcaps.h"
  29. #define FROM_UNICODE(ENC) \
  30. int l = 0; \
  31. int r = nelem(ENC##_from_unicode) - 1; \
  32. if (u < 128) \
  33. return u; \
  34. while (l <= r) \
  35. { \
  36. int m = (l + r) >> 1; \
  37. if (u < ENC##_from_unicode[m].u) \
  38. r = m - 1; \
  39. else if (u > ENC##_from_unicode[m].u) \
  40. l = m + 1; \
  41. else \
  42. return ENC##_from_unicode[m].c; \
  43. } \
  44. return -1; \
  45. int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) }
  46. int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) }
  47. int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) }
  48. int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) }
  49. int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) }
  50. int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) }
  51. int
  52. fz_unicode_from_glyph_name_strict(const char *name)
  53. {
  54. int l = 0;
  55. int r = nelem(single_name_list) - 1;
  56. while (l <= r)
  57. {
  58. int m = (l + r) >> 1;
  59. int c = strcmp(name, single_name_list[m]);
  60. if (c < 0)
  61. r = m - 1;
  62. else if (c > 0)
  63. l = m + 1;
  64. else
  65. return single_code_list[m];
  66. }
  67. return 0;
  68. }
  69. static int
  70. read_num(const char *p, int base)
  71. {
  72. char *e;
  73. int v = strtol(p, &e, base);
  74. if (*e != 0)
  75. return 0;
  76. return v;
  77. }
  78. int
  79. fz_unicode_from_glyph_name(const char *name)
  80. {
  81. char buf[64];
  82. char *p;
  83. int l = 0;
  84. int r = nelem(single_name_list) - 1;
  85. int code = 0;
  86. fz_strlcpy(buf, name, sizeof buf);
  87. /* kill anything after first period and underscore */
  88. p = strchr(buf, '.');
  89. if (p) p[0] = 0;
  90. p = strchr(buf, '_');
  91. if (p)
  92. {
  93. /* Hacky tests for alternative ligature names */
  94. if (buf[0] == 'f')
  95. {
  96. if (!strcmp(buf, "f_f"))
  97. strcpy(buf, "ff");
  98. else if (!strcmp(buf, "f_f_i"))
  99. strcpy(buf, "ffi");
  100. else if (!strcmp(buf, "f_f_l"))
  101. strcpy(buf, "ffl");
  102. else if (!strcmp(buf, "f_i"))
  103. strcpy(buf, "fi");
  104. else if (!strcmp(buf, "f_l"))
  105. strcpy(buf, "fl");
  106. else
  107. p[0] = 0;
  108. }
  109. else
  110. p[0] = 0;
  111. }
  112. while (l <= r)
  113. {
  114. int m = (l + r) >> 1;
  115. int c = strcmp(buf, single_name_list[m]);
  116. if (c < 0)
  117. r = m - 1;
  118. else if (c > 0)
  119. l = m + 1;
  120. else
  121. return single_code_list[m];
  122. }
  123. if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7)
  124. code = read_num(buf+3, 16);
  125. else if (buf[0] == 'u')
  126. code = read_num(buf+1, 16);
  127. else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0)
  128. code = read_num(buf+1, 10);
  129. else
  130. code = read_num(buf, 10);
  131. return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER;
  132. }
  133. static const char *empty_dup_list[] = { 0 };
  134. const char **
  135. fz_duplicate_glyph_names_from_unicode(int ucs)
  136. {
  137. int l = 0;
  138. int r = nelem(agl_dup_offsets) / 2 - 1;
  139. while (l <= r)
  140. {
  141. int m = (l + r) >> 1;
  142. if (ucs < agl_dup_offsets[m << 1])
  143. r = m - 1;
  144. else if (ucs > agl_dup_offsets[m << 1])
  145. l = m + 1;
  146. else
  147. return agl_dup_names + agl_dup_offsets[(m << 1) + 1];
  148. }
  149. return empty_dup_list;
  150. }
  151. const char *
  152. fz_glyph_name_from_unicode_sc(int u)
  153. {
  154. int l = 0;
  155. int r = nelem(glyph_name_from_unicode_sc) / 2 - 1;
  156. while (l <= r)
  157. {
  158. int m = (l + r) >> 1;
  159. if (u < glyph_name_from_unicode_sc[m].u)
  160. r = m - 1;
  161. else if (u > glyph_name_from_unicode_sc[m].u)
  162. l = m + 1;
  163. else
  164. return glyph_name_from_unicode_sc[m].n;
  165. }
  166. return NULL;
  167. }