Content.cpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /*
  2. * Copyright 2022 Axel Waggershauser
  3. */
  4. // SPDX-License-Identifier: Apache-2.0
  5. #include "Content.h"
  6. #include "CharacterSet.h"
  7. #include "ECI.h"
  8. #include "HRI.h"
  9. #include "TextDecoder.h"
  10. #include "Utf.h"
  11. #include "ZXAlgorithms.h"
  12. #if !defined(ZXING_READERS) && !defined(ZXING_WRITERS)
  13. #include "Version.h"
  14. #endif
  15. #include <cctype>
  16. namespace ZXing {
  17. std::string ToString(ContentType type)
  18. {
  19. const char* t2s[] = {"Text", "Binary", "Mixed", "GS1", "ISO15434", "UnknownECI"};
  20. return t2s[static_cast<int>(type)];
  21. }
  22. template <typename FUNC>
  23. void Content::ForEachECIBlock(FUNC func) const
  24. {
  25. ECI defaultECI = hasECI ? ECI::ISO8859_1 : ECI::Unknown;
  26. if (encodings.empty())
  27. func(defaultECI, 0, Size(bytes));
  28. else if (encodings.front().pos != 0)
  29. func(defaultECI, 0, encodings.front().pos);
  30. for (int i = 0; i < Size(encodings); ++i) {
  31. auto [eci, start] = encodings[i];
  32. int end = i + 1 == Size(encodings) ? Size(bytes) : encodings[i + 1].pos;
  33. if (start != end)
  34. func(eci, start, end);
  35. }
  36. }
  37. void Content::switchEncoding(ECI eci, bool isECI)
  38. {
  39. // remove all non-ECI entries on first ECI entry
  40. if (isECI && !hasECI)
  41. encodings.clear();
  42. if (isECI || !hasECI)
  43. encodings.push_back({eci, Size(bytes)});
  44. hasECI |= isECI;
  45. }
  46. Content::Content() {}
  47. Content::Content(ByteArray&& bytes, SymbologyIdentifier si) : bytes(std::move(bytes)), symbology(si) {}
  48. void Content::switchEncoding(CharacterSet cs)
  49. {
  50. switchEncoding(ToECI(cs), false);
  51. }
  52. void Content::append(const Content& other)
  53. {
  54. if (!hasECI && other.hasECI)
  55. encodings.clear();
  56. if (other.hasECI || !hasECI)
  57. for (auto& e : other.encodings)
  58. encodings.push_back({e.eci, Size(bytes) + e.pos});
  59. append(other.bytes);
  60. hasECI |= other.hasECI;
  61. }
  62. void Content::erase(int pos, int n)
  63. {
  64. bytes.erase(bytes.begin() + pos, bytes.begin() + pos + n);
  65. for (auto& e : encodings)
  66. if (e.pos > pos)
  67. pos -= n;
  68. }
  69. void Content::insert(int pos, const std::string& str)
  70. {
  71. bytes.insert(bytes.begin() + pos, str.begin(), str.end());
  72. for (auto& e : encodings)
  73. if (e.pos > pos)
  74. pos += Size(str);
  75. }
  76. bool Content::canProcess() const
  77. {
  78. return std::all_of(encodings.begin(), encodings.end(), [](Encoding e) { return CanProcess(e.eci); });
  79. }
  80. std::string Content::render(bool withECI) const
  81. {
  82. if (empty() || !canProcess())
  83. return {};
  84. #ifdef ZXING_READERS
  85. std::string res;
  86. if (withECI)
  87. res = symbology.toString(true);
  88. ECI lastECI = ECI::Unknown;
  89. auto fallbackCS = defaultCharset;
  90. if (!hasECI && fallbackCS == CharacterSet::Unknown)
  91. fallbackCS = guessEncoding();
  92. ForEachECIBlock([&](ECI eci, int begin, int end) {
  93. // first determine how to decode the content (choose character set)
  94. // * eci == ECI::Unknown implies !hasECI and we guess
  95. // * if !IsText(eci) the ToCharcterSet(eci) will return Unknown and we decode as binary
  96. CharacterSet cs = eci == ECI::Unknown ? fallbackCS : ToCharacterSet(eci);
  97. if (withECI) {
  98. // then find the eci to report back in the ECI designator
  99. if (IsText(ToECI(cs))) // everything decoded as text is reported as utf8
  100. eci = ECI::UTF8;
  101. else if (eci == ECI::Unknown) // implies !hasECI and fallbackCS is Unknown or Binary
  102. eci = ECI::Binary;
  103. if (lastECI != eci)
  104. res += ToString(eci);
  105. lastECI = eci;
  106. std::string tmp;
  107. TextDecoder::Append(tmp, bytes.data() + begin, end - begin, cs);
  108. for (auto c : tmp) {
  109. res += c;
  110. if (c == '\\') // in the ECI protocol a '\' has to be doubled
  111. res += c;
  112. }
  113. } else {
  114. TextDecoder::Append(res, bytes.data() + begin, end - begin, cs);
  115. }
  116. });
  117. return res;
  118. #else
  119. //TODO: replace by proper construction from encoded data from within zint
  120. return std::string(bytes.asString());
  121. #endif
  122. }
  123. std::string Content::text(TextMode mode) const
  124. {
  125. switch (mode) {
  126. case TextMode::Plain: return render(false);
  127. case TextMode::ECI: return render(true);
  128. case TextMode::HRI:
  129. switch (type()) {
  130. #ifdef ZXING_READERS
  131. case ContentType::GS1: {
  132. auto plain = render(false);
  133. auto hri = HRIFromGS1(plain);
  134. return hri.empty() ? plain : hri;
  135. }
  136. case ContentType::ISO15434: return HRIFromISO15434(render(false));
  137. case ContentType::Text: return render(false);
  138. #endif
  139. default: return text(TextMode::Escaped);
  140. }
  141. case TextMode::Hex: return ToHex(bytes);
  142. case TextMode::Escaped: return EscapeNonGraphical(render(false));
  143. }
  144. return {}; // silence compiler warning
  145. }
  146. std::wstring Content::utfW() const
  147. {
  148. return FromUtf8(render(false));
  149. }
  150. ByteArray Content::bytesECI() const
  151. {
  152. if (empty())
  153. return {};
  154. std::string res = symbology.toString(true);
  155. ForEachECIBlock([&](ECI eci, int begin, int end) {
  156. if (hasECI)
  157. res += ToString(eci);
  158. for (int i = begin; i != end; ++i) {
  159. char c = static_cast<char>(bytes[i]);
  160. res += c;
  161. if (c == '\\') // in the ECI protocol a '\' has to be doubled
  162. res += c;
  163. }
  164. });
  165. return ByteArray(res);
  166. }
  167. CharacterSet Content::guessEncoding() const
  168. {
  169. #ifdef ZXING_READERS
  170. // assemble all blocks with unknown encoding
  171. ByteArray input;
  172. ForEachECIBlock([&](ECI eci, int begin, int end) {
  173. if (eci == ECI::Unknown)
  174. input.insert(input.end(), bytes.begin() + begin, bytes.begin() + end);
  175. });
  176. if (input.empty())
  177. return CharacterSet::Unknown;
  178. return TextDecoder::GuessEncoding(input.data(), input.size(), CharacterSet::ISO8859_1);
  179. #else
  180. return CharacterSet::Unknown;
  181. #endif
  182. }
  183. ContentType Content::type() const
  184. {
  185. #ifdef ZXING_READERS
  186. if (empty())
  187. return ContentType::Text;
  188. if (!canProcess())
  189. return ContentType::UnknownECI;
  190. if (symbology.aiFlag == AIFlag::GS1)
  191. return ContentType::GS1;
  192. // check for the absolut minimum of a ISO 15434 conforming message ("[)>" + RS + digit + digit)
  193. if (bytes.size() > 6 && bytes.asString(0, 4) == "[)>\x1E" && std::isdigit(bytes[4]) && std::isdigit(bytes[5]))
  194. return ContentType::ISO15434;
  195. ECI fallback = ToECI(guessEncoding());
  196. std::vector<bool> binaryECIs;
  197. ForEachECIBlock([&](ECI eci, int begin, int end) {
  198. if (eci == ECI::Unknown)
  199. eci = fallback;
  200. binaryECIs.push_back((!IsText(eci)
  201. || (ToInt(eci) > 0 && ToInt(eci) < 28 && ToInt(eci) != 25
  202. && std::any_of(bytes.begin() + begin, bytes.begin() + end,
  203. [](auto c) { return c < 0x20 && c != 0x9 && c != 0xa && c != 0xd; }))));
  204. });
  205. if (!Contains(binaryECIs, true))
  206. return ContentType::Text;
  207. if (!Contains(binaryECIs, false))
  208. return ContentType::Binary;
  209. return ContentType::Mixed;
  210. #else
  211. //TODO: replace by proper construction from encoded data from within zint
  212. return ContentType::Text;
  213. #endif
  214. }
  215. } // namespace ZXing