make_cppyy.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. import jlib
  2. import textwrap
  3. def make_cppyy(
  4. state_,
  5. build_dirs,
  6. generated,
  7. ):
  8. path = f'{build_dirs.dir_so}/mupdf_cppyy.py'
  9. jlib.log( 'Updating {path}')
  10. text = ''
  11. text += textwrap.dedent( """
  12. '''
  13. MuPDF Python bindings using cppyy: https://cppyy.readthedocs.io
  14. Cppyy generates bindings at runtime, so we don't need to build a .so like SWIG.
  15. However we still need the mupdf.so (MuPDF C API) and mupdfcpp.so (MuPDF C++
  16. API) libraries to be present and accessible via LD_LIBRARY_PATH.
  17. Usage:
  18. import mupdf_cppyy
  19. mupdf = mupdf_cppyy.cppyy.gbl.mupdf
  20. document = mupdf.Document(...)
  21. Requirements:
  22. Install cppyy; for example:
  23. python -m pip install cppyy
  24. '''
  25. import ctypes
  26. import inspect
  27. import os
  28. import re
  29. import sys
  30. import cppyy
  31. import cppyy.ll
  32. try:
  33. import jlib
  34. except ModuleNotFoundError:
  35. class jlib:
  36. @staticmethod
  37. def log( text):
  38. sys.stderr.write( f'{text}\\n')
  39. mupdf_dir = os.path.abspath( f'{__file__}/../../..')
  40. # pdf_annot_type is both an enum and a function (that returns
  41. # the enum type!).
  42. with open( f'{mupdf_dir}/include/mupdf/pdf/annot.h') as f:
  43. text = f.read()
  44. text, n = re.subn(
  45. '(enum pdf_annot_type pdf_annot_type[(]fz_context [*]ctx, pdf_annot [*]annot[)];)',
  46. '/*\\1*/',
  47. text,
  48. )
  49. assert n == 1, f'n={n}'
  50. # libmupdf and libmupdf.so also work here.
  51. if 0:
  52. print( f'$LD_LIBRARY_PATH={os.environ["LD_LIBRARY_PATH"]}', file=sys.stderr)
  53. ret = cppyy.load_library('mupdf')
  54. #jlib.log( 'after loading "mupdf": ret={ret=}')
  55. cppyy.load_library('mupdfcpp')
  56. cppyy.add_include_path( f'{mupdf_dir}/include')
  57. cppyy.add_include_path( f'{mupdf_dir}/platform/c++/include')
  58. # pdf_annot_type is both an enum and a function (that returns
  59. # the enum type!).
  60. with open( f'{mupdf_dir}/include/mupdf/pdf/annot.h') as f:
  61. text1 = f.read()
  62. text1, n = re.subn(
  63. '(enum pdf_annot_type pdf_annot_type[(]fz_context [*]ctx, pdf_annot [*]annot[)];)',
  64. '/* \\\\1 */',
  65. text1,
  66. )
  67. assert n == 1, f'n={n}'
  68. with open( 'foo_text1.h', 'w') as f:
  69. f.write( text1)
  70. # pdf_widget_type is both an enum and a function (that returns
  71. # the enum type!).
  72. with open( f'{mupdf_dir}/include/mupdf/pdf/form.h') as f:
  73. text2 = f.read()
  74. text2, n = re.subn(
  75. '(enum pdf_widget_type pdf_widget_type[(]fz_context [*]ctx, pdf_annot [*]widget[)];)',
  76. '/* \\\\1 */',
  77. text2,
  78. )
  79. assert n == 1, f'n={n}'
  80. with open( 'foo_text2.h', 'w') as f:
  81. f.write( text2)
  82. # Not sure why we need '#define FZ_ENABLE_ICC 1', but
  83. # otherwise classes.h doesn't see a definition of
  84. # fz_icc_profile. Presumably something to do with us manually
  85. # including our modified copy of include/mupdf/pdf/annot.h.
  86. #
  87. cppyy.cppdef( f'''
  88. #undef NDEBUG
  89. #define FZ_ENABLE_ICC 1
  90. {text1}
  91. {text2}
  92. #ifndef MUPDF_PDF_ANNOT_H
  93. #error MUPDF_PDF_ANNOT_H not defined
  94. #endif
  95. #include "mupdf/fitz/version.h"
  96. #include "mupdf/classes.h"
  97. #include "mupdf/classes2.h"
  98. #include "mupdf/functions.h"
  99. #include "mupdf/fitz.h"
  100. #include "mupdf/pdf.h"
  101. ''')
  102. cppyy.cppdef( f'''
  103. #ifndef MUPDF_PDF_ANNOT_H
  104. #error MUPDF_PDF_ANNOT_H not defined
  105. #endif
  106. ''')
  107. if os.environ.get( 'MUPDF_cppyy_sig_exceptions') == '1':
  108. jlib.log( 'calling cppyy.ll.set_signals_as_exception(True)')
  109. cppyy.ll.set_signals_as_exception(True)
  110. if 0:
  111. # Do some checks.
  112. try:
  113. cppyy.gbl.abort()
  114. except Exception as e:
  115. print( f'Ignoring test exception from abort(): {e}', file=sys.stderr)
  116. else:
  117. assert 0, 'No exception from cppyy.gbl.abort()'
  118. cppyy.cppdef('''
  119. void mupdf_check_assert()
  120. {
  121. assert( 0);
  122. }
  123. ''')
  124. cppyy.ll.set_signals_as_exception(True)
  125. print( 'Testing assert failure', file=sys.stderr)
  126. try:
  127. cppyy.gbl.mupdf_check_assert()
  128. except Exception as e:
  129. print( f'Ignoring test exception from assert(0): {e}', file=sys.stderr)
  130. print( 'Testing rect creation from null', file=sys.stderr)
  131. try:
  132. r = cppyy.gbl.mupdf.Rect( 0)
  133. except Exception as e:
  134. print( f'Ignoring exception from test rect creation from null e={e}', file=sys.stderr)
  135. except:
  136. print( '*** Non-Exception exception', file=sys.stderr)
  137. traceback.print_exc()
  138. else:
  139. print( f'*** No exception from test rect creation from null', file=sys.stderr)
  140. print( 'Finished testing rect creation from null', file=sys.stderr)
  141. #try:
  142. # cppyy.gbl.raise( SIGABRT)
  143. #except:
  144. # traceback.print_exc()
  145. #
  146. # Would be convenient to do:
  147. #
  148. # from cppyy.gbl.mupdf import *
  149. #
  150. # - but unfortunately this is not possible, e.g. see:
  151. #
  152. # https://cppyy.readthedocs.io/en/latest/misc.html#reduced-typing
  153. #
  154. # So instead it is suggested that users of this api do:
  155. #
  156. # import mupdf
  157. # mupdf = mupdf.cppyy.gbl.mupdf
  158. #
  159. # If access to mupdf.cppyy.gbl is required (e.g. to see globals that are not in
  160. # the C++ mupdf namespace), caller can additionally do:
  161. #
  162. # import cppyy
  163. # cppyy.gbl.std...
  164. #
  165. # We make various modifications of cppyy.gbl.mupdf to simplify usage.
  166. #
  167. #print( f'len(dir(cppyy.gbl))={len(dir(cppyy.gbl))}')
  168. #print( f'len(dir(cppyy.gbl.mupdf))={len(dir(cppyy.gbl.mupdf))}')
  169. # Find macros and import into cppyy.gbl.mupdf.
  170. paths = (
  171. f'{mupdf_dir}/include/mupdf/fitz/version.h',
  172. f'{mupdf_dir}/include/mupdf/ucdn.h',
  173. f'{mupdf_dir}/pdf/object.h',
  174. )
  175. for path in (
  176. f'{mupdf_dir}/include/mupdf/fitz/version.h',
  177. f'{mupdf_dir}/include/mupdf/ucdn.h',
  178. ):
  179. with open( path) as f:
  180. for line in f:
  181. m = re.match('^#define\\\\s([a-zA-Z_][a-zA-Z_0-9]+)\\\\s+([^\\\\s]*)\\\\s*$', line)
  182. if m:
  183. name = m.group(1)
  184. value = m.group(2)
  185. if value == '':
  186. value = 1
  187. else:
  188. value = eval( value)
  189. #print( f'mupdf_cppyy.py: Setting {name}={value!r}')
  190. setattr( cppyy.gbl.mupdf, name, value)
  191. # MuPDF enums are defined as C so are not in the mupdf
  192. # namespace. To mimic the SWIG mupdf bindings, we explicitly
  193. # copy them into cppyy.gbl.mupdf.
  194. #
  195. """)
  196. # Copy enums into mupdf namespace. We use generated.c_enums for this
  197. # because cppyy has a bug where enums are not visible for iteration in a
  198. # namespace - see: https://github.com/wlav/cppyy/issues/45
  199. #
  200. for enum_type, enum_names in generated.c_enums.items():
  201. for enum_name in enum_names:
  202. text += f'cppyy.gbl.mupdf.{enum_name} = cppyy.gbl.{enum_name}\n'
  203. # Add code for converting small integers into MuPDF's special pdf_obj*
  204. # values, and add these special enums to the mupdf namespace.
  205. text += textwrap.dedent( """
  206. cppyy.cppdef('''
  207. #include "mupdf/fitz.h"
  208. /* Casts an integer to a pdf_obj*. Used to convert SWIG's int
  209. values for PDF_ENUM_NAME_* into PdfObj's. */
  210. pdf_obj* obj_enum_to_obj(int n)
  211. {
  212. return (pdf_obj*) (intptr_t) n;
  213. }
  214. ''')
  215. """)
  216. for enum_type, enum_names in generated.c_enums.items():
  217. for enum_name in enum_names:
  218. if enum_name.startswith( 'PDF_ENUM_NAME_'):
  219. text += f'cppyy.gbl.mupdf.{enum_name} = cppyy.gbl.mupdf.PdfObj( cppyy.gbl.obj_enum_to_obj( cppyy.gbl.mupdf.{enum_name}))\n'
  220. # Auto-generated out-param wrappers.
  221. text += generated.cppyy_extra
  222. # Misc processing can be done directly in Python code.
  223. #
  224. text += textwrap.dedent( """
  225. # Import selected basic types into mupdf namespace.
  226. #
  227. cppyy.gbl.mupdf.fz_point = cppyy.gbl.fz_point
  228. cppyy.gbl.mupdf.fz_rect = cppyy.gbl.fz_rect
  229. cppyy.gbl.mupdf.fz_matrix = cppyy.gbl.fz_matrix
  230. cppyy.gbl.mupdf.fz_font_flags_t = cppyy.gbl.fz_font_flags_t
  231. cppyy.gbl.mupdf.fz_default_color_params = cppyy.gbl.fz_default_color_params
  232. # Override various functions so that, for example, functions with
  233. # out-parameters instead return tuples.
  234. #
  235. # cppyy doesn't like interpreting char name[32] as a string?
  236. cppyy.cppdef('''
  237. std::string mupdf_font_name(fz_font* font)
  238. {
  239. //std::cerr << __FUNCTION__ << ": font=" << font << " font->name=" << font->name << "\\\\n";
  240. return font->name;
  241. }
  242. ''')
  243. class getattr_path_raise: pass
  244. def getattr_path( path, default=getattr_path_raise):
  245. '''
  246. Like getattr() but resolves string path, splitting at '.'
  247. characters.
  248. '''
  249. if isinstance( path, str):
  250. path = path.split( '.')
  251. # Maybe we should use out caller's module?
  252. ret = sys.modules[ __name__]
  253. try:
  254. for subname in path:
  255. ret = getattr( ret, subname)
  256. except AttributeError:
  257. if default is getattr_path_raise:
  258. raise
  259. return default
  260. return ret
  261. def setattr_path( path, value):
  262. '''
  263. Like getattr() but resolves string path, splitting at '.'
  264. characters.
  265. '''
  266. if isinstance( path, str):
  267. path = path.split( '.')
  268. ns = getattr_path( path[:-1])
  269. setattr( ns, path[-1], value)
  270. assert getattr_path( 'cppyy') == cppyy
  271. assert getattr_path( 'cppyy.gbl.mupdf') == cppyy.gbl.mupdf
  272. def insert( *paths):
  273. '''
  274. Returns a decorator that copies the function into the specified
  275. name(s). We assert that each item in <path> does not already
  276. exist.
  277. '''
  278. class Anon: pass
  279. for path in paths:
  280. assert getattr_path( path, Anon) is Anon, f'path={path} already exists.'
  281. def decorator( fn):
  282. for path in paths:
  283. setattr_path( path, fn)
  284. return decorator
  285. def replace( *paths):
  286. '''
  287. Decorator that inserts a function into namespace(s), replacing
  288. the existing function(s). We assert that the namespace(s)
  289. already contains the specified name,
  290. '''
  291. def decorator( fn):
  292. class Anon: pass
  293. for path in paths:
  294. assert getattr_path( path, Anon) is not Anon, f'path does not exist: {path}'
  295. for path in paths:
  296. setattr_path( path, fn)
  297. return decorator
  298. def override( path, *paths_extra):
  299. '''
  300. Returns a decorator for <fn> which sets <path> and each item
  301. in <paths_extra> to <fn>. When <fn> is called, it is passed an
  302. additional <_original> arg set to the original <path>.
  303. '''
  304. def decorator( fn):
  305. fn_original = getattr_path( path)
  306. def fn2( *args, **kwargs):
  307. '''
  308. Call <fn>, passing additional <_original> arg.
  309. '''
  310. assert '_original' not in kwargs
  311. kwargs[ '_original'] = fn_original
  312. return fn( *args, **kwargs)
  313. setattr_path( path, fn2)
  314. for p in paths_extra:
  315. setattr_path( p, fn2)
  316. return fn2
  317. return decorator
  318. # A C++ fn that returns fz_buffer::data; our returned value seems
  319. # to work better than direct access in Python.
  320. #
  321. cppyy.cppdef(f'''
  322. namespace mupdf
  323. {{
  324. void* Buffer_data( fz_buffer* buffer)
  325. {{
  326. return buffer->data;
  327. }}
  328. }}
  329. ''')
  330. @replace( 'cppyy.gbl.mupdf.Buffer.buffer_storage', 'cppyy.gbl.mupdf.mfz_buffer_storage')
  331. def _( buffer):
  332. assert isinstance( buffer, cppyy.gbl.mupdf.Buffer)
  333. assert buffer.m_internal
  334. # Getting buffer.m_internal.data via Buffer_data() appears
  335. # to work better than using buffer.m_internal.data
  336. # directly. E.g. the latter fails when passed to
  337. # mfz_recognize_image_format().
  338. #
  339. d = cppyy.gbl.mupdf.Buffer_data( buffer.m_internal)
  340. return buffer.m_internal.len, d
  341. cppyy.cppdef('''
  342. std::string mupdf_raw_to_python_bytes( void* data, size_t size)
  343. {
  344. return std::string( (char*) data, size);
  345. }
  346. ''')
  347. @insert( 'cppyy.gbl.mupdf.raw_to_python_bytes')
  348. def _( data, size):
  349. '''
  350. Need to explicitly convert cppyy's std::string wrapper into
  351. a bytes, otherwise it defaults to a Python str.
  352. '''
  353. ret = cppyy.gbl.mupdf_raw_to_python_bytes( data, size)
  354. ret = bytes( ret)
  355. return ret
  356. # Support for converting a fz_buffer's contents into a Python
  357. # bytes.
  358. #
  359. # We do this by creating a std::string in C++, then in Python
  360. # converting the resulting class cppyy.gbl.std.string into a bytes.
  361. #
  362. # Not sure whether this conversion to bytes involves a second copy
  363. # of the data.
  364. #
  365. cppyy.cppdef( f'''
  366. namespace mupdf
  367. {{
  368. /* Returns std::string containing copy of buffer contents. */
  369. std::string buffer_to_string( const Buffer& buffer, bool clear)
  370. {{
  371. unsigned char* datap;
  372. size_t len = mupdf::mfz_buffer_storage( buffer, &datap);
  373. std::string ret = std::string( (char*) datap, len);
  374. if (clear)
  375. {{
  376. mupdf::mfz_clear_buffer(buffer);
  377. mupdf::mfz_trim_buffer(buffer);
  378. }}
  379. return ret;
  380. }}
  381. }}
  382. ''')
  383. @replace( 'cppyy.gbl.mupdf.mfz_buffer_extract', 'cppyy.gbl.mupdf.Buffer.buffer_extract')
  384. def _( buffer):
  385. s = cppyy.gbl.mupdf.buffer_to_string( buffer, clear=True)
  386. b = bytes( s)
  387. return b
  388. @insert( 'cppyy.gbl.mupdf.mfz_buffer_extract_copy', 'cppyy.gbl.mupdf.Buffer.buffer_extract_copy')
  389. def _( buffer):
  390. s = cppyy.gbl.mupdf.buffer_to_string( buffer, clear=False)
  391. b = bytes( s)
  392. return b
  393. # Python-friendly mfz_new_buffer_from_copied_data() taking a str.
  394. #
  395. cppyy.cppdef('''
  396. namespace mupdf
  397. {
  398. Buffer mfz_new_buffer_from_copied_data( const std::string& data)
  399. {
  400. /* Constructing a mupdf::Buffer from a char* ends
  401. up using fz_new_buffer_from_base64(). We want to
  402. use fz_new_buffer_from_data() which can be done by
  403. passing an unsigned char*. */
  404. return mupdf::mfz_new_buffer_from_copied_data(
  405. (const unsigned char*) data.c_str(),
  406. data.size()
  407. );
  408. }
  409. }
  410. ''')
  411. cppyy.gbl.mupdf.Buffer.new_buffer_from_copied_data = cppyy.gbl.mupdf.mfz_new_buffer_from_copied_data
  412. # Python-friendly alternative to ppdf_set_annot_color(), taking up
  413. # to 4 explicit color args.
  414. #
  415. cppyy.cppdef('''
  416. void mupdf_pdf_set_annot_color(
  417. mupdf::PdfAnnot& self,
  418. int n,
  419. float color0,
  420. float color1,
  421. float color2,
  422. float color3
  423. )
  424. {
  425. float color[] = { color0, color1, color2, color3 };
  426. return self.set_annot_color( n, color);
  427. }
  428. void mupdf_pdf_set_annot_interior_color(
  429. mupdf::PdfAnnot& self,
  430. int n,
  431. float color0,
  432. float color1,
  433. float color2,
  434. float color3
  435. )
  436. {
  437. float color[] = { color0, color1, color2, color3 };
  438. self.set_annot_interior_color( n, color);
  439. }
  440. void mupdf_mfz_fill_text(
  441. const mupdf::Device& dev,
  442. const mupdf::Text& text,
  443. mupdf::Matrix& ctm,
  444. const mupdf::Colorspace& colorspace,
  445. float color0,
  446. float color1,
  447. float color2,
  448. float color3,
  449. float alpha,
  450. mupdf::ColorParams& color_params
  451. )
  452. {
  453. float color[] = { color0, color1, color2, color3 };
  454. return mupdf::mfz_fill_text( dev, text, ctm, colorspace, color, alpha, color_params);
  455. }
  456. ''')
  457. def mupdf_make_colors( color):
  458. '''
  459. Returns (n, colors) where <colors> is a tuple with 4 items,
  460. the first <n> of which are from <color> and the rest are
  461. zero.
  462. '''
  463. if isinstance(color, float):
  464. color = color,
  465. assert isinstance( color, ( tuple, list))
  466. n = len( color)
  467. ret = tuple(color) + (4-n)*(0,)
  468. assert len( ret) == 4
  469. return n, ret
  470. @replace( 'cppyy.gbl.mupdf.mpdf_set_annot_color', 'cppyy.gbl.mupdf.PdfAnnot.set_annot_color')
  471. def _( pdf_annot, color):
  472. n, colors = mupdf_make_colors( color)
  473. return cppyy.gbl.mupdf_pdf_set_annot_color( pdf_annot, n, *colors)
  474. @replace( 'cppyy.gbl.mupdf.mpdf_set_annot_interior_color', 'cppyy.gbl.mupdf.PdfAnnot.set_annot_interior_color')
  475. def _( pdf_annot, color):
  476. n, colors = mupdf_make_colors( color)
  477. cppyy.gbl.mupdf_pdf_set_annot_interior_color( pdf_annot, n, *colors)
  478. @replace( 'cppyy.gbl.mupdf.mfz_fill_text', 'cppyy.gbl.mupdf.Device.fill_text')
  479. def _( dev, text, ctm, colorspace, color, alpha, color_params):
  480. _, colors = mupdf_make_colors( color)
  481. return cppyy.gbl.mupdf_mfz_fill_text( dev, text, ctm, colorspace, *colors, alpha, color_params)
  482. # Override cppyy.gbl.mupdf.Document.lookup_metadata() to return a
  483. # string or None if not found.
  484. #
  485. @override( 'cppyy.gbl.mupdf.lookup_metadata', 'cppyy.gbl.mupdf.Document.lookup_metadata')
  486. def _(self, key, _original):
  487. e = ctypes.c_int(0)
  488. ret = _original(self.m_internal, key, e)
  489. e = e.value
  490. if e < 0:
  491. return None
  492. # <ret> will be a cppyy.gbl.std.string, for which str()
  493. # returns something that looks like a 'bytes', so
  494. # explicitly convert to 'str'.
  495. ret = str( ret)
  496. return ret
  497. # Override cppyy.gbl.mupdf.parse_page_range() to distinguish
  498. # between returned const char* being null or empty string
  499. # - cppyy converts both to an empty string, which means
  500. # we can't distinguish between the last range (where
  501. # fz_parse_page_range() returns '') and beyond the last range
  502. # (where fz_parse_page_range() returns null).
  503. #
  504. # fz_parse_page_range() leaves the out-params unchanged when it
  505. # returns null, so we can detect whether null was returned by
  506. # initializing the out-params with special values that would never
  507. # be ordinarily be returned.
  508. #
  509. @override( 'cppyy.gbl.mupdf.parse_page_range', 'cppyy.gbl.mupdf.mfz_parse_page_range')
  510. def _(s, n, _original):
  511. a = ctypes.c_int(-1)
  512. b = ctypes.c_int(-1)
  513. s = _original(s, a, b, n)
  514. if a.value == -1 and b.value == -1:
  515. s = None
  516. return s, a.value, b.value
  517. # Provide native python implementation of cppyy.gbl.mupdf.format_output_path()
  518. # (-> fz_format_output_path). (The underlying C/C++ functions take a fixed-size
  519. # buffer for the output string so isn't useful for Python code.)
  520. #
  521. @replace( 'cppyy.gbl.mupdf.format_output_path', 'cppyy.gbl.mupdf.mfz_format_output_path')
  522. def _(format, page):
  523. m = re.search( '(%[0-9]*d)', format)
  524. if m:
  525. ret = format[ :m.start(1)] + str(page) + format[ m.end(1):]
  526. else:
  527. dot = format.rfind( '.')
  528. if dot < 0:
  529. dot = len( format)
  530. ret = format[:dot] + str(page) + format[dot:]
  531. return ret
  532. # Override cppyy.gbl.mupdf.Pixmap.n and cppyy.gbl.mupdf.Pixmap.alpha so
  533. # that they return int. (The underlying C++ functions return unsigned char
  534. # so cppyy's default bindings end up returning a python string which isn't
  535. # useful.)
  536. #
  537. @override( 'cppyy.gbl.mupdf.Pixmap.n')
  538. def _( self, _original):
  539. return ord( _original( self))
  540. @override( 'cppyy.gbl.mupdf.Pixmap.alpha')
  541. def _(self, _original):
  542. return ord( _original( self))
  543. # Override cppyy.gbl.mupdf.ppdf_clean_file() so that it takes a Python
  544. # container instead of (argc, argv).
  545. #
  546. @override( 'cppyy.gbl.mupdf.ppdf_clean_file', 'cppyy.gbl.mupdf.mpdf_clean_file')
  547. def _(infile, outfile, password, opts, argv, _original):
  548. a = 0
  549. if argv:
  550. a = (ctypes.c_char_p * len(argv))(*argv)
  551. a = ctypes.pointer(a)
  552. _original(infile, outfile, password, opts, len(argv), a)
  553. # Add cppyy.gbl.mupdf.mpdf_dict_getl() with Python variadic args.
  554. #
  555. @insert( 'cppyy.gbl.mupdf.mpdf_dict_getl', 'cppyy.gbl.mupdf.PdfObj.dict_getl')
  556. def _(obj, *tail):
  557. for key in tail:
  558. if not obj.m_internal:
  559. break
  560. obj = obj.dict_get(key)
  561. assert isinstance(obj, cppyy.gbl.mupdf.PdfObj)
  562. return obj
  563. # Add cppyy.gbl.mupdf.mpdf_dict_getl() with Python variadic args.
  564. #
  565. @insert( 'cppyy.gbl.mupdf.mpdf_dict_putl', 'cppyy.gbl.mupdf.PdfObj.dict_putl')
  566. def _(obj, val, *tail):
  567. if obj.is_indirect():
  568. obj = obj.resolve_indirect_chain()
  569. if not obj.is_dict():
  570. raise Exception(f'not a dict: {obj}')
  571. if not tail:
  572. return
  573. doc = obj.get_bound_document()
  574. for key in tail[:-1]:
  575. next_obj = obj.dict_get(key)
  576. if not next_obj.m_internal:
  577. # We have to create entries
  578. next_obj = doc.new_dict(1)
  579. obj.dict_put(key, next_obj)
  580. obj = next_obj
  581. key = tail[-1]
  582. obj.dict_put(key, val)
  583. # Raise exception if an attempt is made to call mpdf_dict_putl_drop.
  584. #
  585. @insert( 'cppyy.gbl.mpdf_dict_putl_drop', 'cppyy.gbl.mupdf.PdfObj.dict_putl_drop')
  586. def _(obj, *tail):
  587. raise Exception(
  588. 'mupdf.PdfObj.dict_putl_drop() is unsupported and unnecessary'
  589. ' in Python because reference counting is automatic.'
  590. ' Instead use mupdf.PdfObj.dict_putl()'
  591. )
  592. def ppdf_set_annot_color(annot, color):
  593. '''
  594. Python implementation of pdf_set_annot_color() using
  595. ppdf_set_annot_color2().
  596. '''
  597. if isinstance(color, float):
  598. ppdf_set_annot_color2(annot, 1, color, 0, 0, 0)
  599. elif len(color) == 1:
  600. ppdf_set_annot_color2(annot, 1, color[0], 0, 0, 0)
  601. elif len(color) == 2:
  602. ppdf_set_annot_color2(annot, 2, color[0], color[1], 0, 0)
  603. elif len(color) == 3:
  604. ppdf_set_annot_color2(annot, 3, color[0], color[1], color[2], 0)
  605. elif len(color) == 4:
  606. ppdf_set_annot_color2(annot, 4, color[0], color[1], color[2], color[3])
  607. else:
  608. raise Exception( f'Unexpected color should be float or list of 1-4 floats: {color}')
  609. # Python-friendly alternative to fz_runetochar().
  610. #
  611. cppyy.cppdef(f'''
  612. std::vector<unsigned char> mupdf_runetochar2(int rune)
  613. {{
  614. std::vector<unsigned char> buffer(10);
  615. int n = mupdf::runetochar((char*) &buffer[0], rune);
  616. assert(n < sizeof(buffer));
  617. buffer.resize(n);
  618. if (0)
  619. {{
  620. std::cerr << __FUNCTION__ << ": rune=" << rune << ":";
  621. for (auto i: buffer)
  622. {{
  623. std::cerr << ' ' << (int) i;
  624. }}
  625. std::cerr << "\\\\n";
  626. }}
  627. return buffer;
  628. }}
  629. ''')
  630. @insert( 'cppyy.gbl.mupdf.runetochar2', 'cppyy.gbl.mupdf.mfz_runetochar2')
  631. def mupdf_runetochar2( rune):
  632. vuc = cppyy.gbl.mupdf_runetochar2( rune)
  633. ret = bytearray()
  634. #jlib.log( '{vuc!r=}')
  635. for uc in vuc:
  636. #jlib.log( '{uc!r=}')
  637. ret.append( ord( uc))
  638. #jlib.log( '{ret!r=}')
  639. return ret
  640. # Patch mfz_text_language_from_string() to treat str=None as nullptr.
  641. #
  642. @override( 'cppyy.gbl.mupdf.mfz_text_language_from_string')
  643. def _( s, _original):
  644. if s is None:
  645. s = ctypes.c_char_p()
  646. return _original( s)
  647. # Python-friendly versions of fz_convert_color(), returning (dv0,
  648. # dv1, dv2, dv3).
  649. #
  650. cppyy.cppdef(f'''
  651. struct mupdf_convert_color2_v
  652. {{
  653. float v0;
  654. float v1;
  655. float v2;
  656. float v3;
  657. }};
  658. void mupdf_convert_color2(
  659. fz_colorspace* ss,
  660. const float* sv,
  661. fz_colorspace* ds,
  662. mupdf_convert_color2_v* dv,
  663. fz_colorspace* is,
  664. fz_color_params params
  665. )
  666. {{
  667. mupdf::convert_color(ss, sv, ds, &dv->v0, is, params);
  668. }}
  669. ''')
  670. @replace( 'cppyy.gbl.mupdf.convert_color')
  671. def _convert_color( ss, sv, ds, is_, params):
  672. # Note that <sv> should be a cppyy representation of a float*.
  673. dv = cppyy.gbl.mupdf_convert_color2_v()
  674. if is_ is None:
  675. is_ = cppyy.ll.cast[ 'fz_colorspace*']( 0)
  676. cppyy.gbl.mupdf_convert_color2( ss, sv, ds, dv, is_, params)
  677. return dv.v0, dv.v1, dv.v2, dv.v3
  678. cppyy.cppdef(f'''
  679. namespace mupdf
  680. {{
  681. std::vector<int> mfz_memrnd2(int length)
  682. {{
  683. std::vector<unsigned char> ret(length);
  684. mupdf::mfz_memrnd(&ret[0], length);
  685. /* Unlike SWIG, cppyy converts
  686. std::vector<unsigned char> into a string, not a
  687. list of integers. */
  688. std::vector<int> ret2( ret.begin(), ret.end());
  689. return ret2;
  690. }}
  691. }}
  692. ''')
  693. # Provide an overload for mfz_recognize_image_format(), because
  694. # the default unsigned char p[8] causes problems.
  695. #
  696. cppyy.cppdef(f'''
  697. namespace mupdf
  698. {{
  699. int mfz_recognize_image_format(const void* p)
  700. {{
  701. int ret = mfz_recognize_image_format( (unsigned char*) p);
  702. return ret;
  703. }}
  704. }}
  705. ''')
  706. # Wrap mupdf::Pixmap::md5_pixmap() and mupdf::Md5::md5_final2
  707. # to make them return a Python 'bytes' instance. The
  708. # C++ code returns std::vector<unsigned char> which
  709. # SWIG converts into something that can be trivially
  710. # converted to a Python 'bytes', but with cppyy it is a
  711. # cppyy.gbl.std.vector['unsigned char'] which gives error
  712. # "TypeError: 'str' object cannot be interpreted as an integer"
  713. # if used to construct a Python 'bytes'.
  714. #
  715. @override( 'cppyy.gbl.mupdf.Pixmap.md5_pixmap')
  716. def _( pixmap, _original):
  717. r = _original( pixmap)
  718. assert isinstance(r, cppyy.gbl.std.vector['unsigned char'])
  719. r = bytes( str( r), 'latin')
  720. return r
  721. @override( 'cppyy.gbl.mupdf.Md5.md5_final2')
  722. def _( md5, _original):
  723. r = _original( md5)
  724. assert isinstance(r, cppyy.gbl.std.vector['unsigned char'])
  725. r = bytes( str( r), 'latin')
  726. return r
  727. # Allow cppyy.gbl.mupdf.mfz_md5_update() to be called with a buffer.
  728. def mupdf_mfz_md5_update_buffer( md5, buffer):
  729. len_, data = buffer.buffer_storage()
  730. # <data> will be a void*.
  731. data = cppyy.ll.cast[ 'const unsigned char*']( data)
  732. return cppyy.gbl.mupdf.mfz_md5_update( md5, data, len_)
  733. cppyy.gbl.mupdf.mfz_md5_update_buffer = mupdf_mfz_md5_update_buffer
  734. # Make a version of mpdf_to_name() that returns a std::string
  735. # so that cppyy can wrap it, and make Python wrappers for
  736. # mupdf::mpdf_to_name() and mupdf::PdfObj::to_name() use this
  737. # new version.
  738. #
  739. # Otherwise cppyy fails with curious error "TypeError: function
  740. # takes exactly 5 arguments (1 given)".
  741. #
  742. cppyy.cppdef(f'''
  743. namespace mupdf
  744. {{
  745. std::string mpdf_to_name2(const PdfObj& obj)
  746. {{
  747. /* Convert const char* to std::string. */
  748. return mpdf_to_name( obj);
  749. }}
  750. }}
  751. ''')
  752. def mpdf_to_name( obj):
  753. return str( cppyy.gbl.mupdf.mpdf_to_name2( obj))
  754. cppyy.gbl.mupdf.mpdf_to_name = mpdf_to_name
  755. cppyy.gbl.mupdf.PdfObj.to_name = mpdf_to_name
  756. # Wrap mfz_new_font_from_*() to convert name=None to name=(const
  757. # char*) nullptr.
  758. #
  759. @override( 'cppyy.gbl.mupdf.mfz_new_font_from_buffer')
  760. def _( name, fontfile, index, use_glyph_bbox, _original):
  761. if name is None:
  762. name = ctypes.c_char_p()
  763. return _original( name, fontfile, index, use_glyph_bbox)
  764. @override( 'cppyy.gbl.mupdf.mfz_new_font_from_file')
  765. def _( name, fontfile, index, use_glyph_bbox, _original):
  766. if name is None:
  767. name = ctypes.c_char_p()
  768. return _original( name, fontfile, index, use_glyph_bbox)
  769. @override( 'cppyy.gbl.mupdf.mfz_new_font_from_memory')
  770. def _( name, data, len, index, use_glyph_bbox, _original):
  771. if name is None:
  772. name = ctypes.c_char_p()
  773. return _original( name, data, len, index, use_glyph_bbox)
  774. # String representation of a fz_font_flags_t, for debugging.
  775. #
  776. cppyy.cppdef(f'''
  777. std::string mupdf_mfz_font_flags_string( const fz_font_flags_t& ff)
  778. {{
  779. std::stringstream out;
  780. out << "{{"
  781. << " is_mono=" << ff.is_mono
  782. << " is_serif=" << ff.is_serif
  783. << " is_bold=" << ff.is_bold
  784. << " is_italic=" << ff.is_italic
  785. << " ft_substitute=" << ff.ft_substitute
  786. << " ft_stretch=" << ff.ft_stretch
  787. << " fake_bold=" << ff.fake_bold
  788. << " fake_italic=" << ff.fake_italic
  789. << " has_opentype=" << ff.has_opentype
  790. << " invalid_bbox=" << ff.invalid_bbox
  791. << " cjk=" << ff.cjk
  792. << " cjk_lang=" << ff.cjk_lang
  793. << "}}";
  794. return out.str();
  795. }}
  796. ''')
  797. # Direct access to fz_font_flags_t::ft_substitute for mupdfpy,
  798. # while cppyy doesn't handle bitfields correctly.
  799. #
  800. cppyy.cppdef(f'''
  801. int mupdf_mfz_font_flags_ft_substitute( const fz_font_flags_t& ff)
  802. {{
  803. return ff.ft_substitute;
  804. }}
  805. ''')
  806. # Allow mupdfpy to work - requires make_bookmark2() due to SWIG weirdness.
  807. #
  808. cppyy.gbl.mupdf.make_bookmark2 = cppyy.gbl.mupdf.make_bookmark
  809. cppyy.gbl.mupdf.lookup_bookmark2 = cppyy.gbl.mupdf.lookup_bookmark
  810. """)
  811. # Add auto-generate out-param wrappers - these modify fn wrappers to return
  812. # out-params as tuples.
  813. #
  814. #text += generated.cppyy_extra
  815. jlib.fs_ensure_parent_dir( path)
  816. jlib.fs_update( text, path)