sw.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. void build(Solution &s)
  2. {
  3. auto &tess = s.addProject("google.tesseract", "main");
  4. tess += Git("https://github.com/tesseract-ocr/tesseract", "", "{v}");
  5. auto cppstd = cpp17;
  6. auto &libtesseract = tess.addTarget<LibraryTarget>("libtesseract");
  7. {
  8. libtesseract.setChecks("libtesseract");
  9. libtesseract.PackageDefinitions = true;
  10. libtesseract += cppstd;
  11. libtesseract += "TESS_API"_api;
  12. libtesseract += "include/.*"_rr;
  13. libtesseract += "src/.+/.*"_rr;
  14. libtesseract -= "src/lstm/.*\\.cc"_rr;
  15. libtesseract -= "src/training/.*"_rr;
  16. libtesseract.Public += "include"_idir;
  17. libtesseract.Protected +=
  18. "src/ccmain"_id,
  19. "src/api"_id,
  20. "src/dict"_id,
  21. "src/viewer"_id,
  22. "src/wordrec"_id,
  23. "src/ccstruct"_id,
  24. "src/cutil"_id,
  25. "src/textord"_id,
  26. "src/ccutil"_id,
  27. "src/lstm"_id,
  28. "src/classify"_id,
  29. "src/arch"_id,
  30. "src/training"_id;
  31. if (libtesseract.getCompilerType() == CompilerType::MSVC ||
  32. libtesseract.getCompilerType() == CompilerType::ClangCl)
  33. {
  34. libtesseract += "__SSE4_1__"_def;
  35. libtesseract.CompileOptions.push_back("-arch:AVX2");
  36. // openmp
  37. //if (libtesseract.getOptions()["openmp"] == "true")
  38. if (0)
  39. {
  40. if (libtesseract.getCompilerType() == CompilerType::MSVC)
  41. libtesseract.CompileOptions.push_back("-openmp");
  42. else
  43. libtesseract.CompileOptions.push_back("-fopenmp");
  44. libtesseract += "_OPENMP=201107"_def;
  45. if (libtesseract.getBuildSettings().Native.ConfigurationType == ConfigurationType::Debug)
  46. libtesseract += "vcompd.lib"_slib;
  47. else
  48. libtesseract += "vcomp.lib"_slib;
  49. }
  50. }
  51. auto win_or_mingw =
  52. libtesseract.getBuildSettings().TargetOS.Type == OSType::Windows ||
  53. libtesseract.getBuildSettings().TargetOS.Type == OSType::Mingw
  54. ;
  55. // check fma flags
  56. libtesseract -= "src/arch/dotproductfma.cpp";
  57. // check arch (arm)
  58. libtesseract -= "src/arch/dotproductneon.cpp";
  59. if (libtesseract.getBuildSettings().TargetOS.Type != OSType::Windows &&
  60. libtesseract.getBuildSettings().TargetOS.Arch != ArchType::aarch64)
  61. {
  62. libtesseract["src/arch/dotproductavx.cpp"].args.push_back("-mavx");
  63. libtesseract["src/arch/dotproductavx512.cpp"].args.push_back("-mavx512f");
  64. libtesseract["src/arch/dotproductsse.cpp"].args.push_back("-msse4.1");
  65. libtesseract["src/arch/intsimdmatrixsse.cpp"].args.push_back("-msse4.1");
  66. libtesseract["src/arch/intsimdmatrixavx2.cpp"].args.push_back("-mavx2");
  67. }
  68. if (!win_or_mingw)
  69. {
  70. #if SW_MODULE_ABI_VERSION > 29
  71. if (!libtesseract.getBuildSettings().TargetOS.Android)
  72. #endif
  73. libtesseract += "pthread"_slib;
  74. }
  75. if (libtesseract.getBuildSettings().TargetOS.Arch == ArchType::aarch64)
  76. {
  77. libtesseract += "src/arch/dotproductneon.cpp";
  78. }
  79. libtesseract.Public += "HAVE_CONFIG_H"_d;
  80. libtesseract.Public += "_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1"_d;
  81. libtesseract.Public += "HAVE_LIBARCHIVE"_d;
  82. libtesseract.Public += "org.sw.demo.danbloomberg.leptonica"_dep;
  83. libtesseract.Public += "org.sw.demo.libarchive.libarchive"_dep;
  84. if (win_or_mingw)
  85. {
  86. libtesseract.Public += "ws2_32.lib"_slib;
  87. libtesseract.Protected += "NOMINMAX"_def;
  88. }
  89. if (libtesseract.getCompilerType() == CompilerType::MSVC)
  90. libtesseract.Protected.CompileOptions.push_back("-utf-8");
  91. libtesseract.Variables["TESSERACT_MAJOR_VERSION"] = libtesseract.Variables["PACKAGE_MAJOR_VERSION"];
  92. libtesseract.Variables["TESSERACT_MINOR_VERSION"] = libtesseract.Variables["PACKAGE_MINOR_VERSION"];
  93. libtesseract.Variables["TESSERACT_MICRO_VERSION"] = libtesseract.Variables["PACKAGE_PATCH_VERSION"];
  94. libtesseract.Variables["TESSERACT_VERSION_STR"] = "master";
  95. libtesseract.configureFile("include/tesseract/version.h.in", "tesseract/version.h");
  96. }
  97. //
  98. auto &tesseract = tess.addExecutable("tesseract");
  99. {
  100. tesseract += cppstd;
  101. tesseract += "src/tesseract.cpp";
  102. tesseract += libtesseract;
  103. }
  104. auto &svpaint = tess.addExecutable("svpaint");
  105. {
  106. svpaint += cppstd;
  107. svpaint += "src/svpaint.cpp";
  108. svpaint += libtesseract;
  109. }
  110. auto &training = tess.addDirectory("training");
  111. //
  112. auto &common_training = training.addLibrary("common_training");
  113. {
  114. common_training += "TESS_COMMON_TRAINING_API"_api;
  115. common_training += cppstd;
  116. common_training += "src/training/common/.*"_rr;
  117. common_training.Public += "src/training/common"_idir;
  118. common_training.Public += libtesseract;
  119. }
  120. //
  121. auto &unicharset_training = training.addLibrary("unicharset_training");
  122. {
  123. unicharset_training += "TESS_UNICHARSET_TRAINING_API"_api;
  124. unicharset_training += cppstd;
  125. unicharset_training += "src/training/unicharset/.*"_rr;
  126. unicharset_training.Public += "src/training/unicharset"_idir;
  127. unicharset_training.Public += common_training;
  128. unicharset_training.Public += "org.sw.demo.unicode.icu.i18n"_dep;
  129. auto win_or_mingw =
  130. unicharset_training.getBuildSettings().TargetOS.Type == OSType::Windows ||
  131. unicharset_training.getBuildSettings().TargetOS.Type == OSType::Mingw
  132. ;
  133. if (!win_or_mingw)
  134. unicharset_training += "pthread"_slib;
  135. }
  136. //
  137. #define ADD_EXE(n, ...) \
  138. auto &n = training.addExecutable(#n); \
  139. n += cppstd; \
  140. n += "src/training/" #n ".*"_rr; \
  141. n.Public += __VA_ARGS__; \
  142. n
  143. ADD_EXE(ambiguous_words, common_training);
  144. ADD_EXE(classifier_tester, common_training);
  145. ADD_EXE(combine_lang_model, unicharset_training);
  146. ADD_EXE(combine_tessdata, common_training);
  147. ADD_EXE(cntraining, common_training);
  148. ADD_EXE(dawg2wordlist, common_training);
  149. ADD_EXE(mftraining, common_training) += "src/training/mergenf.*"_rr;
  150. ADD_EXE(shapeclustering, common_training);
  151. ADD_EXE(unicharset_extractor, unicharset_training);
  152. ADD_EXE(wordlist2dawg, common_training);
  153. ADD_EXE(lstmeval, unicharset_training);
  154. ADD_EXE(lstmtraining, unicharset_training);
  155. ADD_EXE(set_unicharset_properties, unicharset_training);
  156. ADD_EXE(merge_unicharsets, common_training);
  157. //
  158. auto &pango_training = training.addLibrary("pango_training");
  159. {
  160. pango_training += "TESS_PANGO_TRAINING_API"_api;
  161. pango_training += cppstd;
  162. pango_training += "src/training/pango/.*"_rr;
  163. pango_training.Public += "src/training/pango"_idir;
  164. pango_training.Public += unicharset_training;
  165. pango_training.Public += "org.sw.demo.gnome.pango.pangocairo"_dep;
  166. }
  167. ADD_EXE(text2image, pango_training);
  168. {
  169. text2image += cppstd;
  170. text2image +=
  171. "src/training/degradeimage.cpp",
  172. "src/training/degradeimage.h",
  173. "src/training/text2image.cpp"
  174. ;
  175. }
  176. if (!s.getExternalVariables()["with-tests"])
  177. return;
  178. // tests
  179. {
  180. auto &test = tess.addDirectory("test");
  181. test.Scope = TargetScope::Test;
  182. String skipped_tests_str;
  183. if (s.getExternalVariables()["skip-tests"])
  184. skipped_tests_str = s.getExternalVariables()["skip-tests"].getValue();
  185. auto skipped_tests = split_string(skipped_tests_str, ",");
  186. auto add_test = [&test, &s, &cppstd, &libtesseract, &pango_training, &skipped_tests](const String &name) -> decltype(auto)
  187. {
  188. auto &t = test.addTarget<ExecutableTarget>(name);
  189. t += cppstd;
  190. t += FileRegex("unittest", name + "_test.*", false);
  191. t += "unittest"_idir;
  192. t += "SW_TESTING"_def;
  193. auto datadir = test.SourceDir / "tessdata_unittest";
  194. if (s.getExternalVariables()["test-data-dir"])
  195. datadir = fs::current_path() / s.getExternalVariables()["test-data-dir"].getValue();
  196. t += Definition("TESSBIN_DIR=\"" + ""s + "\"");
  197. t += Definition("TESTING_DIR=\"" + to_printable_string(normalize_path(test.SourceDir / "test/testing")) + "\"");
  198. t += Definition("TESTDATA_DIR=\"" + to_printable_string(normalize_path(test.SourceDir / "test/testdata")) + "\"");
  199. t += Definition("LANGDATA_DIR=\"" + to_printable_string(normalize_path(datadir / "langdata_lstm")) + "\"");
  200. t += Definition("TESSDATA_DIR=\"" + to_printable_string(normalize_path(datadir / "tessdata")) + "\"");
  201. t += Definition("TESSDATA_BEST_DIR=\"" + to_printable_string(normalize_path(datadir / "tessdata_best")) + "\"");
  202. // we push all deps to all tests simplify things
  203. t += pango_training;
  204. t += "org.sw.demo.google.googletest.gmock.main"_dep;
  205. t += "org.sw.demo.google.googletest.gtest.main"_dep;
  206. if (t.getCompilerType() == CompilerType::MSVC)
  207. t.CompileOptions.push_back("-utf-8");
  208. auto win_or_mingw =
  209. t.getBuildSettings().TargetOS.Type == OSType::Windows ||
  210. t.getBuildSettings().TargetOS.Type == OSType::Mingw
  211. ;
  212. if (!win_or_mingw)
  213. t += "pthread"_slib;
  214. auto tst = libtesseract.addTest(t, name);
  215. for (auto &st : skipped_tests)
  216. {
  217. std::regex r(st);
  218. if (std::regex_match(name, r))
  219. {
  220. tst.skip(true);
  221. break;
  222. }
  223. }
  224. return t;
  225. };
  226. Strings tests
  227. {
  228. "apiexample",
  229. "applybox",
  230. "baseapi",
  231. "baseapi_thread",
  232. "bitvector",
  233. "capiexample",
  234. "capiexample_c",
  235. "cleanapi",
  236. "colpartition",
  237. "commandlineflags",
  238. "denorm",
  239. "equationdetect",
  240. "fileio",
  241. "heap",
  242. "imagedata",
  243. "indexmapbidi",
  244. "intfeaturemap",
  245. "intsimdmatrix",
  246. "lang_model",
  247. "layout",
  248. "ligature_table",
  249. "linlsq",
  250. "list",
  251. "lstm_recode",
  252. "lstm_squashed",
  253. "lstm",
  254. "lstmtrainer",
  255. "loadlang",
  256. "mastertrainer",
  257. "matrix",
  258. "networkio",
  259. "normstrngs",
  260. "nthitem",
  261. "osd",
  262. "pagesegmode",
  263. "pango_font_info",
  264. "paragraphs",
  265. "params_model",
  266. "progress",
  267. "qrsequence",
  268. "recodebeam",
  269. "rect",
  270. "resultiterator",
  271. "scanutils",
  272. "shapetable",
  273. "stats",
  274. "stringrenderer",
  275. "stridemap",
  276. "tablefind",
  277. "tablerecog",
  278. "tabvector",
  279. "textlineprojection",
  280. "tfile",
  281. "unichar",
  282. "unicharcompress",
  283. "unicharset",
  284. "validate_grapheme",
  285. "validate_indic",
  286. "validate_khmer",
  287. "validate_myanmar",
  288. "validator",
  289. };
  290. for (auto t : tests)
  291. add_test(t);
  292. auto &dt = add_test("dawg");
  293. dt += Definition("wordlist2dawg_prog=\"" + to_printable_string(normalize_path(wordlist2dawg.getOutputFile())) + "\"");
  294. dt += Definition("dawg2wordlist_prog=\"" + to_printable_string(normalize_path(dawg2wordlist.getOutputFile())) + "\"");
  295. auto &tw = add_test("tatweel");
  296. tw += "unittest/util/.*"_rr;
  297. tw += "unittest/third_party/.*"_rr;
  298. tw -= "unittest/third_party/googletest/.*"_rr;
  299. }
  300. }
  301. void check(Checker &c)
  302. {
  303. auto &s = c.addSet("libtesseract");
  304. s.checkFunctionExists("getline");
  305. s.checkIncludeExists("dlfcn.h");
  306. s.checkIncludeExists("inttypes.h");
  307. s.checkIncludeExists("memory.h");
  308. s.checkIncludeExists("stdint.h");
  309. s.checkIncludeExists("stdlib.h");
  310. s.checkIncludeExists("string.h");
  311. s.checkIncludeExists("sys/stat.h");
  312. s.checkIncludeExists("sys/types.h");
  313. s.checkIncludeExists("tiffio.h");
  314. s.checkIncludeExists("unistd.h");
  315. s.checkTypeSize("long long int");
  316. s.checkTypeSize("size_t");
  317. s.checkTypeSize("void *");
  318. s.checkTypeSize("wchar_t");
  319. {
  320. auto &c = s.checkSymbolExists("snprintf");
  321. c.Parameters.Includes.push_back("stdio.h");
  322. }
  323. }