mupdfwrap_test.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. #!/usr/bin/env python3
  2. '''
  3. Simple tests of the Python MuPDF API.
  4. '''
  5. import inspect
  6. import os
  7. import platform
  8. import sys
  9. if os.environ.get('MUPDF_PYTHON') in ('swig', None):
  10. # PYTHONPATH should have been set up to point to a build/shared-*/
  11. # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
  12. import mupdf
  13. elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
  14. sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
  15. import mupdf_cppyy
  16. del sys.path[0]
  17. mupdf = mupdf_cppyy.cppyy.gbl.mupdf
  18. else:
  19. raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
  20. _log_prefix = ''
  21. def log(text):
  22. f = inspect.stack()[1]
  23. print(f'{f.filename}:{f.lineno} {_log_prefix}{text}', file=sys.stderr)
  24. sys.stderr.flush()
  25. def log_prefix_set(prefix):
  26. global _log_prefix
  27. _log_prefix = prefix
  28. g_test_n = 0
  29. g_mupdf_root = os.path.abspath('%s/../..' % __file__)
  30. def show_stext(document):
  31. '''
  32. Shows all available information about Stext blocks, lines and characters.
  33. '''
  34. for p in range(document.count_pages()):
  35. page = document.load_page(p)
  36. stextpage = mupdf.StextPage(page, mupdf.StextOptions())
  37. for block in stextpage:
  38. block_ = block.m_internal
  39. log(f'block: type={block_.type} bbox={block_.bbox}')
  40. for line in block:
  41. line_ = line.m_internal
  42. log(f' line: wmode={line_.wmode}'
  43. + f' dir={line_.dir}'
  44. + f' bbox={line_.bbox}'
  45. )
  46. for char in line:
  47. char_ = char.m_internal
  48. log(f' char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
  49. + f' origin={char_.origin}'
  50. + f' quad={char_.quad}'
  51. + f' size={char_.size:6.2f}'
  52. + f' font=('
  53. + f'is_mono={char_.font.flags.is_mono}'
  54. + f' is_bold={char_.font.flags.is_bold}'
  55. + f' is_italic={char_.font.flags.is_italic}'
  56. + f' ft_substitute={char_.font.flags.ft_substitute}'
  57. + f' ft_stretch={char_.font.flags.ft_stretch}'
  58. + f' fake_bold={char_.font.flags.fake_bold}'
  59. + f' fake_italic={char_.font.flags.fake_italic}'
  60. + f' has_opentype={char_.font.flags.has_opentype}'
  61. + f' invalid_bbox={char_.font.flags.invalid_bbox}'
  62. + f' name={char_.font.name}'
  63. + f')'
  64. )
  65. def test_filter(path):
  66. if platform.system() == 'Windows':
  67. print( 'Not testing mupdf.PdfFilterOptions2 because known to fail on Windows.')
  68. return
  69. # pdf_sanitizer_filter_options.
  70. class MySanitizeFilterOptions( mupdf.PdfSanitizeFilterOptions2):
  71. def __init__( self):
  72. super().__init__()
  73. self.use_virtual_text_filter()
  74. self.state = 1
  75. def text_filter( self, ctx, ucsbuf, ucslen, trm, ctm, bbox):
  76. if 0:
  77. log( f'text_filter(): ctx={ctx} ucsbuf={ucsbuf} ucslen={ucslen} trm={trm} ctm={ctm} bbox={bbox}')
  78. # Remove every other item.
  79. self.state = 1 - self.state
  80. return self.state
  81. sanitize_filter_options = MySanitizeFilterOptions()
  82. # pdf_filter_factory.
  83. class MyPdfFilterFactory( mupdf.PdfFilterFactory2):
  84. def __init__( self, sopts):
  85. super().__init__()
  86. self.sopts = sopts
  87. self.use_virtual_filter()
  88. def filter(self, ctx, doc, chain, struct_parents, transform, options):
  89. return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
  90. def filter_bad(self, ctx, doc, chain, struct_parents, transform, options, extra_arg):
  91. return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
  92. filter_factory = MyPdfFilterFactory( sanitize_filter_options.internal())
  93. # pdf_filter_options.
  94. class MyFilterOptions( mupdf.PdfFilterOptions2):
  95. def __init__( self):
  96. super().__init__()
  97. self.recurse = 1
  98. self.instance_forms = 0
  99. self.ascii = 1
  100. filter_options = MyFilterOptions()
  101. filter_options.add_factory( filter_factory.internal())
  102. document = mupdf.PdfDocument(path)
  103. for p in range(document.pdf_count_pages()):
  104. page = document.pdf_load_page(p)
  105. log( f'Running document.pdf_filter_page_contents on page {p}')
  106. document.pdf_begin_operation('test filter')
  107. document.pdf_filter_page_contents(page, filter_options)
  108. document.pdf_end_operation()
  109. if 1:
  110. # Try again but with a broken filter_factory callback method, and check
  111. # we get an appropriate exception. This checks that the SWIG Director
  112. # exception-handling code is working.
  113. #
  114. filter_factory.filter = filter_factory.filter_bad
  115. page = document.pdf_load_page(0)
  116. document.pdf_begin_operation('test filter')
  117. try:
  118. document.pdf_filter_page_contents(page, filter_options)
  119. except Exception as e:
  120. e_expected_text = "filter_bad() missing 1 required positional argument: 'extra_arg'"
  121. if e_expected_text not in str(e):
  122. raise Exception(f'Error does not contain expected text: {e_expected_text}') from e
  123. finally:
  124. document.pdf_end_operation()
  125. if 1:
  126. document.pdf_save_document('mupdf_test-out0.pdf', mupdf.PdfWriteOptions())
  127. def test_install_load_system_font(path):
  128. '''
  129. Very basic test of mupdf.fz_install_load_system_font_funcs(). We check
  130. that the fonts returned by our python callback is returned if we ask for a
  131. non-existent font.
  132. We also render `path` as a PNG with/without our font override. This isn't
  133. particularly useful, but if `path` contained references to unknown fonts,
  134. it would give different results.
  135. '''
  136. print(f'test_install_load_system_font()')
  137. def make_png(infix=''):
  138. document = mupdf.FzDocument(path)
  139. pixmap = mupdf.FzPixmap(document, 0, mupdf.FzMatrix(), mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB), 0)
  140. path_out = f'{path}{infix}.png'
  141. pixmap.fz_save_pixmap_as_png(path_out)
  142. print(f'Have created: {path_out}.')
  143. make_png()
  144. trace = list()
  145. replacement_font = mupdf.fz_new_font_from_file(
  146. None,
  147. os.path.abspath(f'{__file__}/../../resources/fonts/urw/NimbusRoman-BoldItalic.cff'),
  148. 0,
  149. 0,
  150. )
  151. assert replacement_font.m_internal
  152. print(f'{replacement_font.m_internal.name=} {replacement_font.m_internal.glyph_count=}')
  153. def font_f(name, bold, italic, needs_exact_metrics):
  154. trace.append((name, bold, italic, needs_exact_metrics))
  155. print(f'font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.')
  156. # Always return `replacement_font`.
  157. return replacement_font
  158. def f_cjk(name, ordering, serif):
  159. trace.append((name, ordering, serif))
  160. print(f'f_cjk(): Looking for font: {name=} {ordering=} {serif=}.')
  161. return None
  162. def f_fallback(script, language, serif, bold, italic):
  163. trace.append((script, language, serif, bold, italic))
  164. print(f'f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.')
  165. return None
  166. mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback)
  167. # Check that asking for any font returns `replacement_font`.
  168. font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
  169. assert isinstance(font, mupdf.FzFont)
  170. assert trace == [
  171. ('some-font-name', 0, 0, 0),
  172. ], f'Incorrect {trace=}.'
  173. assert font.m_internal
  174. print(f'{font.m_internal.name=} {font.m_internal.glyph_count=}')
  175. assert font.m_internal.name == replacement_font.m_internal.name
  176. assert font.m_internal.glyph_count == replacement_font.m_internal.glyph_count
  177. make_png('-replace-font')
  178. # Restore default behaviour.
  179. mupdf.fz_install_load_system_font_funcs()
  180. font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
  181. assert not font.m_internal
  182. def test(path):
  183. '''
  184. Runs various mupdf operations on <path>, which is assumed to be a file that
  185. mupdf can open.
  186. '''
  187. log(f'testing path={path}')
  188. assert os.path.isfile(path)
  189. global g_test_n
  190. g_test_n += 1
  191. test_install_load_system_font(path)
  192. # See notes in wrap/swig.py:build_swig() about buffer_extract() and
  193. # buffer_storage().
  194. #
  195. assert getattr(mupdf.FzBuffer, 'fz_buffer_storage_raw', None) is None
  196. assert getattr(mupdf.FzBuffer, 'fz_buffer_storage')
  197. assert getattr(mupdf.FzBuffer, 'fz_buffer_extract')
  198. assert getattr(mupdf.FzBuffer, 'fz_buffer_extract_copy')
  199. # Test that we get the expected Python exception instance and text.
  200. document = mupdf.FzDocument(path)
  201. try:
  202. mupdf.fz_load_page(document, 99999999)
  203. except mupdf.FzErrorArgument as e:
  204. log(f'{type(e)=} {str(e)=} {repr(e)=}.')
  205. log(f'{e.what()=}.')
  206. expected = 'code=4: invalid page number: 100000000'
  207. assert str(e) == expected and e.what() == expected, (
  208. f'Incorrect exception text:\n'
  209. f' {str(e)=}\n'
  210. f' {e.what()=}\n'
  211. f' {expected=}'
  212. )
  213. except Exception as e:
  214. assert 0, f'Incorrect exception {type(e)=} {e=}.'
  215. else:
  216. assert 0, f'No expected exception.'
  217. # Test SWIG Director wrapping of pdf_filter_options:
  218. #
  219. test_filter(path)
  220. # Test operations using functions:
  221. #
  222. log('Testing functions.')
  223. log(f' Opening: %s' % path)
  224. document = mupdf.fz_open_document(path)
  225. log(f' mupdf.fz_needs_password(document)={mupdf.fz_needs_password(document)}')
  226. log(f' mupdf.fz_count_pages(document)={mupdf.fz_count_pages(document)}')
  227. log(f' mupdf.fz_document_output_intent(document)={mupdf.fz_document_output_intent(document)}')
  228. # Test operations using classes:
  229. #
  230. log(f'Testing classes')
  231. document = mupdf.FzDocument(path)
  232. log(f'Have created mupdf.FzDocument for {path}')
  233. log(f'document.fz_needs_password()={document.fz_needs_password()}')
  234. log(f'document.fz_count_pages()={document.fz_count_pages()}')
  235. if 0:
  236. log(f'stext info:')
  237. show_stext(document)
  238. for k in (
  239. 'format',
  240. 'encryption',
  241. 'info:Author',
  242. 'info:Title',
  243. 'info:Creator',
  244. 'info:Producer',
  245. 'qwerty',
  246. ):
  247. v = document.fz_lookup_metadata(k)
  248. log(f'document.fz_lookup_metadata() k={k} returned v={v!r}')
  249. if k == 'qwerty':
  250. assert v is None, f'v={v!r}'
  251. else:
  252. pass
  253. zoom = 10
  254. scale = mupdf.FzMatrix.fz_scale(zoom/100., zoom/100.)
  255. page_number = 0
  256. log(f'Have created scale: a={scale.a} b={scale.b} c={scale.c} d={scale.d} e={scale.e} f={scale.f}')
  257. colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
  258. log(f'colorspace.m_internal.key_storable.storable.refs={colorspace.m_internal.key_storable.storable.refs!r}')
  259. if 0:
  260. c = colorspace.fz_clamp_color([3.14])
  261. log('colorspace.clamp_color returned c={c}')
  262. pixmap = mupdf.FzPixmap(document, page_number, scale, colorspace, 0)
  263. log(f'Have created pixmap: {pixmap.m_internal.w} {pixmap.m_internal.h} {pixmap.m_internal.stride} {pixmap.m_internal.n}')
  264. filename = f'mupdf_test-out1-{g_test_n}.png'
  265. pixmap.fz_save_pixmap_as_png(filename)
  266. log(f'Have created {filename} using pixmap.save_pixmap_as_png().')
  267. # Print image data in ascii PPM format. Copied from
  268. # mupdf/docs/examples/example.c.
  269. #
  270. samples = pixmap.samples()
  271. stride = pixmap.stride()
  272. n = pixmap.n()
  273. filename = f'mupdf_test-out2-{g_test_n}.ppm'
  274. with open(filename, 'w') as f:
  275. f.write('P3\n')
  276. f.write('%s %s\n' % (pixmap.m_internal.w, pixmap.m_internal.h))
  277. f.write('255\n')
  278. for y in range(0, pixmap.m_internal.h):
  279. for x in range(pixmap.m_internal.w):
  280. if x:
  281. f.write(' ')
  282. offset = y * stride + x * n
  283. if hasattr(mupdf, 'bytes_getitem'):
  284. # swig
  285. f.write('%3d %3d %3d' % (
  286. mupdf.bytes_getitem(samples, offset + 0),
  287. mupdf.bytes_getitem(samples, offset + 1),
  288. mupdf.bytes_getitem(samples, offset + 2),
  289. ))
  290. else:
  291. # cppyy
  292. f.write('%3d %3d %3d' % (
  293. samples[offset + 0],
  294. samples[offset + 1],
  295. samples[offset + 2],
  296. ))
  297. f.write('\n')
  298. log(f'Have created {filename} by scanning pixmap.')
  299. # Generate .png and but create Pixmap from Page instead of from Document.
  300. #
  301. page = mupdf.FzPage(document, 0)
  302. separations = page.fz_page_separations()
  303. log(f'page_separations() returned {"true" if separations else "false"}')
  304. pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
  305. filename = f'mupdf_test-out3-{g_test_n}.png'
  306. pixmap.fz_save_pixmap_as_png(filename)
  307. log(f'Have created {filename} using pixmap.fz_save_pixmap_as_png()')
  308. # Show links
  309. log(f'Links.')
  310. page = mupdf.FzPage(document, 0)
  311. link = mupdf.fz_load_links(page);
  312. log(f'{link}')
  313. if link:
  314. for i in link:
  315. log(f'{i}')
  316. # Check we can iterate over Link's, by creating one manually.
  317. #
  318. link = mupdf.FzLink(mupdf.FzRect(0, 0, 1, 1), "hello")
  319. log(f'items in <link> are:')
  320. for i in link:
  321. log(f' {i.m_internal.refs} {i.m_internal.uri}')
  322. # Check iteration over Outlines. We do depth-first iteration.
  323. #
  324. log(f'Outlines.')
  325. def olog(text):
  326. if 0:
  327. log(text)
  328. num_outline_items = 0
  329. depth = 0
  330. it = mupdf.FzOutlineIterator(document)
  331. while 1:
  332. item = it.fz_outline_iterator_item()
  333. olog(f'depth={depth} valid={item.valid()}')
  334. if item.valid():
  335. log(f'{" "*depth*4}uri={item.uri()} is_open={item.is_open()} title={item.title()}')
  336. num_outline_items += 1
  337. else:
  338. olog(f'{" "*depth*4}<null>')
  339. r = it.fz_outline_iterator_down()
  340. olog(f'depth={depth} down => {r}')
  341. if r >= 0:
  342. depth += 1
  343. if r < 0:
  344. r = it.fz_outline_iterator_next()
  345. olog(f'depth={depth} next => {r}')
  346. assert r
  347. if r:
  348. # No more items at current depth, so repeatedly go up until we
  349. # can go right.
  350. end = 0
  351. while 1:
  352. r = it.fz_outline_iterator_up()
  353. olog(f'depth={depth} up => {r}')
  354. if r < 0:
  355. # We are at EOF. Need to break out of top-level loop.
  356. end = 1
  357. break
  358. depth -= 1
  359. r = it.fz_outline_iterator_next()
  360. olog(f'depth={depth} next => {r}')
  361. if r == 0:
  362. # There are items at this level.
  363. break
  364. if end:
  365. break
  366. log(f'num_outline_items={num_outline_items}')
  367. # Check iteration over StextPage.
  368. #
  369. log(f'StextPage.')
  370. stext_options = mupdf.FzStextOptions(0)
  371. page_num = 40
  372. try:
  373. stext_page = mupdf.FzStextPage(document, page_num, stext_options)
  374. except Exception:
  375. log(f'no page_num={page_num}')
  376. else:
  377. device_stext = mupdf.FzDevice(stext_page, stext_options)
  378. matrix = mupdf.FzMatrix()
  379. page = mupdf.FzPage(document, 0)
  380. cookie = mupdf.FzCookie()
  381. page.fz_run_page(device_stext, matrix, cookie)
  382. log(f' stext_page is:')
  383. for block in stext_page:
  384. log(f' block:')
  385. for line in block:
  386. line_text = ''
  387. for char in line:
  388. line_text += chr(char.m_internal.c)
  389. log(f' {line_text}')
  390. device_stext.fz_close_device()
  391. # Check fz_search_page2().
  392. items = mupdf.fz_search_page2(document, 0, "compression", 20)
  393. print(f'{len(items)=}')
  394. for item in items:
  395. print(f' {item.mark=} {item.quad=}')
  396. # Check copy-constructor.
  397. log(f'Checking copy-constructor')
  398. document2 = mupdf.FzDocument(document)
  399. del document
  400. page = mupdf.FzPage(document2, 0)
  401. scale = mupdf.FzMatrix()
  402. pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
  403. pixmap.fz_save_pixmap_as_png('mupdf_test-out3.png')
  404. stdout = mupdf.FzOutput(mupdf.FzOutput.Fixed_STDOUT)
  405. log(f'{type(stdout)} {stdout.m_internal.state}')
  406. mediabox = page.fz_bound_page()
  407. out = mupdf.FzDocumentWriter(filename, 'png', '', mupdf.FzDocumentWriter.FormatPathType_DOCUMENT)
  408. dev = out.fz_begin_page(mediabox)
  409. page.fz_run_page(dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
  410. out.fz_end_page()
  411. # Check out-params are converted into python return value.
  412. bitmap = mupdf.FzBitmap(10, 20, 8, 72, 72)
  413. bitmap_details = bitmap.fz_bitmap_details()
  414. log(f'{bitmap_details}')
  415. assert list(bitmap_details) == [10, 20, 8, 12], f'bitmap_details={bitmap_details!r}'
  416. log(f'finished test of %s' % path)
  417. if __name__ == '__main__':
  418. print(f'{mupdf.Py_LIMITED_API=}', flush=1)
  419. paths = sys.argv[1:]
  420. if not paths:
  421. paths = [
  422. f'{g_mupdf_root}/thirdparty/zlib/zlib.3.pdf',
  423. ]
  424. # Run test() on all the .pdf files in the mupdf repository.
  425. #
  426. for path in paths:
  427. log_prefix_set(f'{os.path.relpath(path, g_mupdf_root)}: ')
  428. try:
  429. test(path)
  430. finally:
  431. log_prefix_set('')
  432. log(f'finished')