TextEncoderTest.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. /*
  2. * Copyright 2021 gitlost
  3. */
  4. // SPDX-License-Identifier: Apache-2.0
  5. #include "CharacterSet.h"
  6. #include "TextDecoder.h"
  7. #include "TextEncoder.h"
  8. #include "gtest/gtest.h"
  9. using namespace ZXing;
  10. using namespace testing;
  11. template <typename CharT>
  12. void EnDeCode(CharacterSet cs, const CharT* in, std::string_view out)
  13. {
  14. std::string bytes = TextEncoder::FromUnicode(reinterpret_cast<const char*>(in), cs);
  15. EXPECT_EQ(bytes, out);
  16. std::string dec;
  17. TextDecoder::Append(dec, reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size(), cs);
  18. EXPECT_EQ(dec, reinterpret_cast<const char*>(in));
  19. }
  20. TEST(TextEncoderTest, FullCycleEncodeDecode)
  21. {
  22. EnDeCode(CharacterSet::Cp437, u8"\u00C7", "\x80"); // LATIN CAPITAL LETTER C WITH CEDILLA
  23. EnDeCode(CharacterSet::ISO8859_1, u8"\u00A0", "\xA0"); // NO-BREAK SPACE
  24. EnDeCode(CharacterSet::ISO8859_2, u8"\u0104", "\xA1"); // LATIN CAPITAL LETTER A WITH OGONEK
  25. EnDeCode(CharacterSet::ISO8859_3, u8"\u0126", "\xA1"); // LATIN CAPITAL LETTER H WITH STROKE
  26. EnDeCode(CharacterSet::ISO8859_4, u8"\u0138", "\xA2"); // LATIN SMALL LETTER KRA
  27. EnDeCode(CharacterSet::ISO8859_5, u8"\u045F", "\xFF"); // CYRILLIC SMALL LETTER DZHE
  28. EnDeCode(CharacterSet::ISO8859_6, u8"\u0652", "\xF2"); // ARABIC SUKUN
  29. EnDeCode(CharacterSet::ISO8859_7, u8"\u03CE", "\xFE"); // GREEK SMALL LETTER OMEGA WITH TONOS
  30. EnDeCode(CharacterSet::ISO8859_8, u8"\u05EA", "\xFA"); // HEBREW LETTER TAV
  31. EnDeCode(CharacterSet::ISO8859_9, u8"\u011E", "\xD0"); // LATIN CAPITAL LETTER G WITH BREVE
  32. EnDeCode(CharacterSet::ISO8859_10, u8"\u0138", "\xFF"); // LATIN SMALL LETTER KRA
  33. EnDeCode(CharacterSet::ISO8859_11, u8"\u0E5B", "\xFB"); // THAI CHARACTER KHOMUT
  34. EnDeCode(CharacterSet::ISO8859_13, u8"\u2019", "\xFF"); // RIGHT SINGLE QUOTATION MARK
  35. EnDeCode(CharacterSet::ISO8859_14, u8"\u1E6B", "\xF7"); // LATIN SMALL LETTER T WITH DOT ABOVE
  36. EnDeCode(CharacterSet::ISO8859_15, u8"\u00BF", "\xBF"); // INVERTED QUESTION MARK
  37. EnDeCode(CharacterSet::ISO8859_16, u8"\u017C", "\xBF"); // LATIN SMALL LETTER Z WITH DOT ABOVE
  38. // EnDeCode(CharacterSet::Shift_JIS, u8"\u00A5", "\x5C"); // YEN SIGN Mapped to backslash
  39. // EnDeCode(CharacterSet::Shift_JIS, u8"\u203E", "\x7E"); // OVERLINE Mapped to tilde
  40. EnDeCode(CharacterSet::Shift_JIS, u8"\u3000", "\x81\x40"); // IDEOGRAPHIC SPACE
  41. EnDeCode(CharacterSet::Cp1250, u8"\u20AC", "\x80"); // EURO SIGN
  42. EnDeCode(CharacterSet::Cp1251, u8"\u045F", "\x9F"); // CYRILLIC SMALL LETTER DZHE
  43. EnDeCode(CharacterSet::Cp1252, u8"\u02DC", "\x98"); // SMALL TILDE
  44. EnDeCode(CharacterSet::Cp1256, u8"\u0686", "\x8D"); // ARABIC LETTER TCHEH
  45. EnDeCode(CharacterSet::UTF16BE, u8"\u20AC", "\x20\xAC"); // EURO SIGN
  46. EnDeCode(CharacterSet::UTF8, u8"\u20AC", "\xE2\x82\xAC"); // EURO SIGN
  47. EnDeCode(CharacterSet::ASCII, u8"#", "#");
  48. EnDeCode(CharacterSet::Big5, u8"\u3000", "\xA1\x40"); // IDEOGRAPHIC SPACE
  49. EnDeCode(CharacterSet::GB2312, u8"\u3000", "\xA1\xA1"); // IDEOGRAPHIC SPACE
  50. EnDeCode(CharacterSet::EUC_KR, u8"\u3000", "\xA1\xA1"); // IDEOGRAPHIC SPACE
  51. // EnDeCode(CharacterSet::GBK, u8"\u3000", "\xA1\xA1"); // IDEOGRAPHIC SPACE
  52. EnDeCode(CharacterSet::GB18030, u8"\u3000", "\xA1\xA1"); // IDEOGRAPHIC SPACE
  53. EnDeCode(CharacterSet::UTF16LE, u8"\u20AC", "\xAC\x20"); // EURO SIGN
  54. EnDeCode(CharacterSet::UTF32BE, u8"\u20AC", std::string("\x00\x00\x20\xAC", 4)); // EURO SIGN
  55. EnDeCode(CharacterSet::UTF32LE, u8"\u20AC", std::string("\xAC\x20\x00\x00", 4)); // EURO SIGN
  56. // EnDeCode(CharacterSet::ISO646_Inv, "%", "%");
  57. EnDeCode(CharacterSet::BINARY, u8"\u0080\u00FF", "\x80\xFF");
  58. EnDeCode(CharacterSet::Unknown, u8"\u0080", "\x80"); // Treated as binary
  59. EnDeCode(CharacterSet::EUC_JP, u8"\u0080", "\x80"); // Not supported, treated as binary
  60. }