TextPage.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. #include "mupdf/fitz.h"
  2. #include "mupdf/pdf.h"
  3. #include "MuPDF.h"
  4. #ifndef __TEXTPAGE
  5. #define __TEXTPAGE
  6. using namespace System;
  7. using namespace System::Collections;
  8. using namespace System::Text;
  9. #pragma once
  10. namespace MuPDF {
  11. [FlagsAttribute()]
  12. public enum class TextOption {
  13. None,
  14. PreserveLigatures = 1,
  15. PreserveWhitespace = 2,
  16. PreserveImages = 4,
  17. InhibitSpaces = 8,
  18. Dehyphenate = 16,
  19. PreserveSpans = 32,
  20. MediaBoxClip = 64,
  21. UseCidForUnknownUnicode = 128,
  22. CollectStructure = 256,
  23. AccurateBBoxes = 512,
  24. CollectVectors = 1024,
  25. IgnoreActualtext = 2048,
  26. Segment = 4096,
  27. ParagraphBreak = 8192,
  28. TableHunt = 16384,
  29. CollectStyles = 32768,
  30. UseGidForUnknownUnicode = 65536,
  31. };
  32. [FlagsAttribute]
  33. public enum class FontFlags {
  34. None,
  35. Mono,
  36. Serif = 1 << 1,
  37. Bold = 1 << 2,
  38. Italic = 1 << 3,
  39. UseSubstituteMetrics = 1 << 4,
  40. StretchToMatchPDFMetrics = 1 << 5,
  41. FakeBold = 1 << 6,
  42. FakeItalic = 1 << 7,
  43. HasOpenType = 1 << 8,
  44. InvalidBBox = 1 << 9,
  45. CJK = 1 << 10,
  46. Lang0 = 1 << 11,
  47. Lang1 = 1 << 12,
  48. Embed = 1 << 13,
  49. NeverEmbed = 1 << 14,
  50. };
  51. public ref class TextOptions {
  52. public:
  53. TextOption Flags;
  54. /// <summary>
  55. /// Defines scale ratio for text rendition. Base resolution is 96 DPI.
  56. /// </summary>
  57. float Scale = 1;
  58. static operator fz_stext_options(TextOptions^ options);
  59. };
  60. // in mupdf_load_system_font.c
  61. extern "C" static fz_font* load_windows_font(fz_context* ctx, const char* fontname, int bold, int italic,
  62. int needs_exact_metrics);
  63. extern "C" static void init_system_font_list(void);
  64. public ref class TextFont : IEquatable<TextFont^> {
  65. public:
  66. property String^ Name {
  67. String^ get() {
  68. return _name && _font->name == _namePtr ? _name : (_name = gcnew String(_font->name));
  69. }
  70. }
  71. property int GlyphCount {
  72. int get() { return _font->glyph_count; }
  73. }
  74. property int WidthCount {
  75. int get() { return _font->width_count; }
  76. }
  77. property short WidthDefault {
  78. short get() { return _font->width_default; }
  79. }
  80. property FontFlags Flags {
  81. FontFlags get() { return (FontFlags)*(int*)&(_font->flags); }
  82. }
  83. array<Byte>^ GetFontNameBytes() {
  84. GcnewArray(Byte, b, 32);
  85. auto n = _font->name;
  86. System::Runtime::InteropServices::Marshal::Copy((System::IntPtr)(void*)n, b, 0, 32);
  87. return b;
  88. }
  89. array<short>^ GetWidths() {
  90. GcnewArray(short, a, _font->width_count);
  91. System::Runtime::InteropServices::Marshal::Copy((System::IntPtr)(void*)_font->width_table, a, 0, _font->width_count);
  92. return a;
  93. }
  94. int GetCharacter(int cid) {
  95. return ft_char_index(_font->ft_face, cid);
  96. }
  97. /// <summary>
  98. /// Find the glyph id for a given unicode character within a font.
  99. /// </summary>
  100. /// <param name="unicode">The unicode character to encode.</param>
  101. /// <returns>Returns the glyph id for the given unicode value, or 0 if unknown.</returns>
  102. int Encode(int unicode) {
  103. return fz_encode_character(Context::Ptr, _font, unicode);
  104. }
  105. /// <summary>
  106. /// Return the advance for a given glyph.
  107. /// </summary>
  108. /// <param name="glyph">The glyph id.</param>
  109. /// <param name="vertical">True for vertical writing mode, false for horizontal mode.</param>
  110. float Advance(int glyph, bool vertical) {
  111. return fz_advance_glyph(Context::Ptr, _font, glyph, vertical);
  112. }
  113. Equatable(TextFont, _font)
  114. internal:
  115. TextFont(fz_font* font) : _font(font), _namePtr(font->name) {};
  116. private:
  117. fz_font* _font;
  118. String^ _name;
  119. char* _namePtr;
  120. };
  121. public ref class TextChar : Generic::IEnumerable<TextChar^>, IEquatable<TextChar^> {
  122. public:
  123. /// <summary>
  124. /// Gets the Unicode code point for this character.
  125. /// </summary>
  126. property int Character {
  127. int get() { return _ch->c; }
  128. }
  129. /// <summary>
  130. /// Gets the sRGB Hex color (alpha in top 8 bits, then r, then g, then b in low bits).
  131. /// </summary>
  132. property int Color {
  133. int get() { return _ch->argb; }
  134. }
  135. property float Size {
  136. float get() { return _ch->size; }
  137. }
  138. /// <summary>
  139. /// Gets a pointer to the internal font, for font comparision without creating new TextFont instances.
  140. /// </summary>
  141. property IntPtr FontPtr {
  142. IntPtr get() { return (IntPtr)(void*)_ch->font; }
  143. }
  144. property TextChar^ Next {
  145. TextChar^ get() { return _ch->next ? gcnew TextChar(_ch->next) : nullptr; }
  146. }
  147. property Point Origin {
  148. Point get() { return _ch->origin; }
  149. }
  150. property MuPDF::Quad Quad {
  151. MuPDF::Quad get() { return _ch->quad; }
  152. }
  153. property TextFont^ Font {
  154. TextFont^ get() { return _Font ? _Font : gcnew TextFont(_ch->font); }
  155. }
  156. /// <summary>
  157. /// Compares whether other TextChar has the same font as the current one.
  158. /// </summary>
  159. /// <param name="other">Another TextChar</param>
  160. bool HasSameFont(TextChar^ other) {
  161. return other && _ch->font == other->_ch->font;
  162. }
  163. /// <summary>
  164. /// Compares whether other TextChar has the same font, size and color as the current one.
  165. /// </summary>
  166. /// <param name="other">Another TextChar</param>
  167. bool HasSameStyle(TextChar^ other) {
  168. return other && _ch->size == other->_ch->size && _ch->argb == other->_ch->argb && _ch->font == other->_ch->font;
  169. }
  170. String^ ToString() override {
  171. return Char::ConvertFromUtf32(_ch->c);
  172. }
  173. static operator Char(TextChar^ ch) {
  174. return ch->_ch->c;
  175. }
  176. internal:
  177. TextChar(fz_stext_char* ch) : _ch(ch) {}
  178. property fz_stext_char* Ptr {
  179. fz_stext_char* get() { return _ch; }
  180. }
  181. private:
  182. fz_stext_char* _ch;
  183. TextFont^ _Font;
  184. #pragma region IEnumerator
  185. public:
  186. virtual Generic::IEnumerator<MuPDF::TextChar^>^ GetEnumerator() sealed = Generic::IEnumerable<MuPDF::TextChar^>::GetEnumerator {
  187. return gcnew Enumerator<TextChar, fz_stext_char>(_ch, _ch->next);
  188. }
  189. virtual System::Collections::IEnumerator^ GetEnumeratorBase() sealed = System::Collections::IEnumerable::GetEnumerator {
  190. return GetEnumerator();
  191. }
  192. #pragma endregion
  193. #pragma region IEquatable
  194. Equatable(TextChar, _ch)
  195. #pragma endregion
  196. };
  197. public ref class TextSpan {
  198. public:
  199. initonly int Color;
  200. initonly float Size;
  201. initonly TextFont^ Font;
  202. initonly Point Origin;
  203. initonly Box Bound;
  204. initonly bool IsVertical;
  205. String^ ToString() override;
  206. internal:
  207. TextSpan(fz_stext_char* ch, int length, Box bound, bool vertical) : _ch(ch), _length(length), Font(gcnew TextFont(ch->font)), Size(ch->size), Color(ch->argb), Origin((Point)ch->origin), Bound(bound), IsVertical(vertical) { }
  208. private:
  209. fz_stext_char* _ch;
  210. int _length;
  211. };
  212. public ref class TextLine : Generic::IEnumerable<TextChar^>, IEquatable<MuPDF::TextLine^> {
  213. public:
  214. property bool IsVertical {
  215. bool get() { return _line->wmode; }
  216. }
  217. property Box Bound {
  218. Box get() { return _line->bbox; }
  219. }
  220. property TextChar^ FirstCharacter {
  221. TextChar^ get() { return gcnew TextChar(_line->first_char); }
  222. }
  223. property TextChar^ LastCharacter {
  224. TextChar^ get() { return gcnew TextChar(_line->last_char); }
  225. }
  226. /// <summary>
  227. /// Gets the first font used in TextLine.
  228. /// </summary>
  229. property TextFont^ Font {
  230. TextFont^ get() { return gcnew TextFont(_line->first_char->font); }
  231. }
  232. Generic::IEnumerable<MuPDF::TextSpan^>^ GetSpans() {
  233. return gcnew TextLineSpanContainer(this);
  234. }
  235. String^ ToString() override;
  236. internal:
  237. TextLine(fz_stext_line* line) : _line(line) {}
  238. property fz_stext_line* Ptr {
  239. fz_stext_line* get() { return _line; }
  240. }
  241. private:
  242. fz_stext_line* _line;
  243. ref class TextLineSpanContainer : Generic::IEnumerable<TextSpan^>, Generic::IEnumerator<MuPDF::TextSpan^> {
  244. public:
  245. TextLineSpanContainer(TextLine^ line) : _Line(line), _start(_Line->_line->first_char) { }
  246. property TextSpan^ Current {
  247. virtual TextSpan^ get() sealed { return _Current; }
  248. }
  249. property Object^ CurrentBase {
  250. virtual Object^ get() sealed = System::Collections::IEnumerator::Current::get {
  251. return _Current;
  252. }
  253. }
  254. virtual bool MoveNext();
  255. virtual void Reset() = System::Collections::IEnumerator::Reset;
  256. virtual Generic::IEnumerator<MuPDF::TextSpan^>^ GetEnumerator() sealed = Generic::IEnumerable<MuPDF::TextSpan^>::GetEnumerator {
  257. return this;
  258. }
  259. virtual System::Collections::IEnumerator^ GetEnumeratorBase() sealed = System::Collections::IEnumerable::GetEnumerator{
  260. return GetEnumerator();
  261. }
  262. private:
  263. ~TextLineSpanContainer() {}
  264. TextLine^ _Line;
  265. TextSpan^ _Current;
  266. fz_stext_char* _start;
  267. fz_stext_char* _active;
  268. };
  269. #pragma region IEnumerator
  270. public:
  271. virtual Generic::IEnumerator<MuPDF::TextChar^>^ GetEnumerator() sealed = Generic::IEnumerable<MuPDF::TextChar^>::GetEnumerator {
  272. return gcnew Enumerator<TextChar, fz_stext_char>(_line->first_char, _line->last_char);
  273. }
  274. virtual System::Collections::IEnumerator^ GetEnumeratorBase() sealed = System::Collections::IEnumerable::GetEnumerator{
  275. return GetEnumerator();
  276. }
  277. #pragma endregion
  278. #pragma region IEquatable
  279. Equatable(TextLine, _line)
  280. #pragma endregion
  281. };
  282. public enum class BlockType {
  283. Text = 0,
  284. Image = 1,
  285. Struct = 2,
  286. Vector = 3,
  287. Grid = 4
  288. };
  289. public ref class TextBlock : Generic::IEnumerable<TextLine^>, IEquatable<TextBlock^> {
  290. public:
  291. property BlockType Type {
  292. BlockType get() { return (BlockType)_block->type; }
  293. }
  294. property Box Bound {
  295. Box get() { return _block->bbox; }
  296. }
  297. virtual String^ ToString() override;
  298. internal:
  299. TextBlock(fz_stext_block* block) : _block(block) {
  300. }
  301. property fz_stext_block* Ptr {
  302. fz_stext_block* get() { return _block; }
  303. }
  304. private:
  305. fz_stext_block* _block;
  306. #pragma region IEnumerator
  307. public:
  308. virtual Generic::IEnumerator<MuPDF::TextLine^>^ GetEnumerator() sealed = Generic::IEnumerable<MuPDF::TextLine^>::GetEnumerator{
  309. return _block->type == FZ_STEXT_BLOCK_TEXT
  310. ? gcnew Enumerator<TextLine, fz_stext_line>(_block->u.t.first_line, _block->u.t.last_line)
  311. : EmptyCollection<MuPDF::TextLine^>::GetEnumerator();
  312. }
  313. virtual System::Collections::IEnumerator^ GetEnumeratorBase() sealed = System::Collections::IEnumerable::GetEnumerator{
  314. return GetEnumerator();
  315. }
  316. #pragma endregion
  317. #pragma region IEquatable
  318. Equatable(TextBlock, _block)
  319. #pragma endregion
  320. };
  321. public ref class TextPage : Generic::IEnumerable<TextBlock^> {
  322. public:
  323. property Box Bound {
  324. Box get() { return _page->mediabox; }
  325. }
  326. property TextBlock^ FirstBlock {
  327. TextBlock^ get() { return gcnew TextBlock(_page->first_block); }
  328. }
  329. property TextBlock^ LastBlock {
  330. TextBlock^ get() { return gcnew TextBlock(_page->last_block); }
  331. }
  332. String^ ToString() override {
  333. return Bound.ToString();
  334. }
  335. internal:
  336. TextPage(fz_stext_page* page) : _page(page) {};
  337. ~TextPage() {
  338. ReleaseHandle();
  339. }
  340. property fz_stext_page* Ptr {
  341. fz_stext_page* get() { return _page; }
  342. }
  343. private:
  344. fz_stext_page* _page;
  345. void ReleaseHandle() {
  346. fz_drop_stext_page(Context::Ptr, _page);
  347. _page = NULL;
  348. }
  349. public:
  350. virtual Generic::IEnumerator<MuPDF::TextBlock^>^ GetEnumerator() sealed = Generic::IEnumerable<MuPDF::TextBlock^>::GetEnumerator {
  351. return gcnew Enumerator<TextBlock, fz_stext_block>(_page->first_block, _page->last_block);
  352. }
  353. virtual System::Collections::IEnumerator^ GetEnumeratorBase() sealed = System::Collections::IEnumerable::GetEnumerator {
  354. return GetEnumerator();
  355. }
  356. };
  357. };
  358. #endif // !__TEXTPAGE