setup.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. #!/usr/bin/env python3
  2. '''
  3. Installation script for MuPDF Python bindings, using scripts/pipcl.py.
  4. Notes:
  5. When building an sdist (e.g. with 'pip sdist'), we use clang-python to
  6. generate C++ source which is then included in the sdist.
  7. This allows wheels to be built from an sdist without requiring clang-python
  8. to be installed.
  9. Internal testing only - environmental variables:
  10. MUPDF_SETUP_BUILD_DIR
  11. Overrides the default build directory.
  12. MUPDF_SETUP_USE_CLANG_PYTHON
  13. Affects whether we use clang-python when building.
  14. If set, must be '0' or '1', and we override the default and do not
  15. ('0') / do ('1') use clang-python to generate C++ source code from
  16. MuPDF headers.
  17. If we are an sdist we default to not re-generating C++ - the generated
  18. files will be already available in platform/c++/. Otherwise we default
  19. to generating C++ source code.
  20. MUPDF_SETUP_USE_SWIG
  21. If set, must be '0' or '1', and we do not ('0') / do ('1') attempt to
  22. run swig.
  23. '''
  24. import os
  25. import platform
  26. import re
  27. import subprocess
  28. import sys
  29. import time
  30. def log(text=''):
  31. for line in text.split('\n'):
  32. print(f'mupdf:setup.py: {line}')
  33. sys.stdout.flush()
  34. def cache(function):
  35. '''
  36. Simple (and probably unnecessary) caching decorator.
  37. '''
  38. cache = {}
  39. def wrapper(*args):
  40. if not args in cache:
  41. cache[args] = function()
  42. return cache[args]
  43. return wrapper
  44. @cache
  45. def root_dir():
  46. return os.path.dirname(os.path.abspath(__file__))
  47. @cache
  48. def windows():
  49. s = platform.system()
  50. return s == 'Windows' or s.startswith('CYGWIN')
  51. @cache
  52. def macos():
  53. s = platform.system()
  54. return s == 'Darwin'
  55. @cache
  56. def openbsd():
  57. s = platform.system()
  58. return s == 'OpenBSD'
  59. @cache
  60. def msys2():
  61. return platform.system().startswith('MSYS_NT-')
  62. @cache
  63. def build_dir():
  64. # This is x86/x64-specific.
  65. #
  66. # We generate 32 or 64-bit binaries to match whatever Python we
  67. # are running under.
  68. #
  69. ret = os.environ.get('MUPDF_SETUP_BUILD_DIR')
  70. if ret is None:
  71. cpu = 'x32' if sys.maxsize == 2**31 - 1 else 'x64'
  72. python_version = '.'.join(platform.python_version().split('.')[:2])
  73. ret = f'{root_dir()}/build/shared-release-{cpu}-py{python_version}'
  74. return ret
  75. @cache
  76. def in_sdist():
  77. return os.path.exists(f'{root_dir()}/PKG-INFO')
  78. sys.path.append(f'{root_dir()}/scripts')
  79. import pipcl
  80. @cache
  81. def mupdf_version():
  82. '''
  83. Returns version string.
  84. If $MUPDF_SETUP_VERSION is set we use it directly, asserting that it starts
  85. with the version string defined in include/mupdf/fitz/version.h.
  86. Otherwise if we are in an sdist ('PKG-INFO' exists) we use its
  87. version. We assert that this starts with the base version in
  88. include/mupdf/fitz/version.h.
  89. Otherwise we generate a version string by appending the current date and
  90. time to the base version in include/mupdf/fitz/version.h. For example
  91. '1.18.0.20210330.1800'.
  92. '''
  93. return mupdf_version_internal()
  94. def mupdf_version_internal(t_tuple=None):
  95. '''
  96. Return version number, with doctest check for broken behaviour with leading
  97. zeros.
  98. >>> t0str = '2024-06-06-00:00'
  99. >>> t0tuple = time.strptime(t0str, '%Y-%m-%d-%H:%M')
  100. >>> v = mupdf_version_internal(t0tuple)
  101. >>> print(v, file=sys.stderr)
  102. >>> assert v.endswith('.202406060000')
  103. '''
  104. with open(f'{root_dir()}/include/mupdf/fitz/version.h') as f:
  105. text = f.read()
  106. m = re.search('\n#define FZ_VERSION "([^"]+)"\n', text)
  107. assert m
  108. base_version = m.group(1)
  109. # If MUPDF_SETUP_VERSION exists, use it.
  110. #
  111. ret = os.environ.get('MUPDF_SETUP_VERSION')
  112. if ret:
  113. log(f'Using version from $MUPDF_SETUP_VERSION: {ret}')
  114. assert ret.startswith(base_version)
  115. return ret
  116. # If we are in an sdist, so use the version from the PKG-INFO file.
  117. #
  118. if in_sdist():
  119. items = pipcl.parse_pkg_info('PKG-INFO')
  120. assert items['Name'] == 'mupdf'
  121. ret = items['Version']
  122. #log(f'Using version from PKG-INFO: {ret}')
  123. assert ret.startswith(base_version)
  124. return ret
  125. # If we get here, we are in a source tree.
  126. #
  127. # We use the MuPDF version with a unique(ish) suffix based on the current
  128. # date and time, so we can make multiple Python releases without requiring
  129. # an increment to the MuPDF version.
  130. #
  131. # This also allows us to easily experiment on test.pypi.org.
  132. #
  133. # We have to avoid the time component(s) containing `.0` as this is
  134. # prohibited by PEP-440.
  135. #
  136. if t_tuple is None:
  137. t_tuple = time.localtime()
  138. tt = time.strftime(".%Y%m%d%H%M", t_tuple)
  139. tail = tt.replace('.0', '.')
  140. ret = base_version + tail
  141. #log(f'Have created version number: {ret}')
  142. pipcl._assert_version_pep_440(ret)
  143. return ret
  144. def git_info():
  145. '''
  146. Returns (current, origin, diff):
  147. current: git id from 'git show'.
  148. origin: git id from 'git show origin'.
  149. diff: diff relative to current.
  150. '''
  151. def get_id(command):
  152. text = subprocess.check_output(command, shell=True, cwd=root_dir())
  153. text = text.decode('utf8')
  154. text = text.split('\n', 1)[0]
  155. text = text.split(' ', 1)[0]
  156. return text
  157. current = get_id('git show --pretty=oneline')
  158. origin = get_id('git show --pretty=oneline origin')
  159. diff = subprocess.check_output(f'cd {root_dir()} && git diff', shell=True).decode('utf8')
  160. return current, origin, diff
  161. def get_flag(name, default):
  162. '''
  163. name:
  164. Name of environmental variable.
  165. default:
  166. Value to return if <name> undefined.
  167. Returns False if name is '0', True if name is '1', <default> if
  168. undefined. Otherwise assert fails.
  169. '''
  170. value = os.environ.get(name)
  171. if value is None:
  172. ret = default
  173. elif value == '0':
  174. ret = False
  175. elif value == '1':
  176. ret = True
  177. else:
  178. assert 0, f'If set, ${name} must be "0" or "1", but is: {value!r}'
  179. log(f'name={name} default={default} value={value} ret={ret}')
  180. return ret
  181. # pipcl Callbacks.
  182. #
  183. def sdist():
  184. '''
  185. pipcl callback. We run './scripts/mupdfwrap.py -b 0' to create C++ files
  186. etc using clang-python, and return these generated files plus all files
  187. known to git. [This allows sdists to be used to generate wheels etc on
  188. machines without clang-python.]
  189. '''
  190. assert os.path.exists(f'{root_dir()}/.git'), f'Cannot make sdist because not a git checkout: {root_dir()}'
  191. # Create 'git-info' file containing git ids that identify this tree. For
  192. # the moment this is a simple text format, but we could possibly use pickle
  193. # instead, depending on whether we want to include more information, e.g.
  194. # diff relative to origin.
  195. #
  196. git_id, git_id_origin, git_diff = git_info()
  197. with open(f'{root_dir()}/git-info', 'w') as f:
  198. f.write(f'git-id: {git_id}\n')
  199. f.write(f'git-id-origin: {git_id_origin}\n')
  200. f.write(f'git-diff:\n{git_diff}\n')
  201. paths = pipcl.git_items( root_dir(), submodules=True)
  202. # Strip out some large test directories.
  203. i = 0
  204. while i < len( paths):
  205. path = paths[i]
  206. remove = False
  207. if (0
  208. or path.startswith( 'thirdparty/harfbuzz/test/')
  209. or path.startswith( 'thirdparty/tesseract/test/')
  210. or path.startswith( 'thirdparty/extract/test/')
  211. ):
  212. remove = True
  213. if remove:
  214. #log( f'Excluding: {path}')
  215. del paths[i]
  216. else:
  217. i += 1
  218. # Build C++ files and SWIG C code for inclusion in sdist, so that it can be
  219. # used on systems without clang-python or SWIG.
  220. #
  221. use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', True)
  222. use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
  223. b = ''
  224. if use_clang_python:
  225. b += '0'
  226. if use_swig:
  227. b += '2'
  228. command = '' if os.getcwd() == root_dir() else f'cd {os.path.relpath(root_dir())} && '
  229. command += f'{sys.executable} ./scripts/mupdfwrap.py -d {build_dir()} -b "{b}"'
  230. log(f'Running: {command}')
  231. subprocess.check_call(command, shell=True)
  232. paths += [
  233. 'build/shared-release/mupdf.py',
  234. 'git-info',
  235. 'platform/c++/generated.pickle',
  236. 'platform/c++/implementation/classes.cpp',
  237. 'platform/c++/implementation/classes2.cpp',
  238. 'platform/c++/implementation/exceptions.cpp',
  239. 'platform/c++/implementation/functions.cpp',
  240. 'platform/c++/implementation/internal.cpp',
  241. 'platform/c++/include/mupdf/classes.h',
  242. 'platform/c++/include/mupdf/classes2.h',
  243. 'platform/c++/include/mupdf/exceptions.h',
  244. 'platform/c++/include/mupdf/functions.h',
  245. 'platform/c++/include/mupdf/internal.h',
  246. 'platform/c++/windows_mupdf.def',
  247. 'platform/python/mupdfcpp_swig.i.cpp',
  248. ]
  249. return paths
  250. def build():
  251. '''
  252. pipcl callback. Build MuPDF C, C++ and Python libraries and return list of
  253. created files.
  254. '''
  255. # If we are an sdist, default to not trying to run clang-python - the
  256. # generated files will already exist, and installing/using clang-python
  257. # might be tricky.
  258. #
  259. use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', not in_sdist())
  260. use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
  261. b = ''
  262. if not windows():
  263. b = 'm' # Build C library.
  264. if use_clang_python:
  265. b += '0' # Build C++ source.
  266. b += '1' # Build C++ library (also contains C library on Windows).
  267. if use_swig:
  268. b += '2' # Build SWIG-generated source.
  269. b += '3' # Build SWIG library _mupdf.so.
  270. command = '' if root_dir() == os.getcwd() else f'cd {os.path.relpath(root_dir())} && '
  271. command += (
  272. f'"{sys.executable}" ./scripts/mupdfwrap.py'
  273. f' -d {build_dir()}'
  274. f' -b {b}'
  275. )
  276. do_build = os.environ.get('MUPDF_SETUP_DO_BUILD')
  277. if do_build == '0':
  278. # This is a hack for testing.
  279. log(f'Not doing build because $MUPDF_SETUP_DO_BUILD={do_build}')
  280. else:
  281. log(f'build(): Building MuPDF C, C++ and Python libraries with: {command}')
  282. subprocess.check_call(command, shell=True)
  283. # Return generated files to install or copy into wheel.
  284. #
  285. if windows():
  286. infix = '' if sys.maxsize == 2**31 - 1 else '64'
  287. names = [
  288. f'{build_dir()}/mupdfcpp{infix}.dll', # C and C++.
  289. f'{build_dir()}/_mupdf.pyd', # Python internals.
  290. f'{build_dir()}/mupdf.py', # Python.
  291. ]
  292. elif macos():
  293. log( f'Contents of {build_dir()} are:')
  294. for leaf in os.listdir(build_dir()):
  295. log( f' {leaf}')
  296. names = [
  297. f'{build_dir()}/libmupdf.dylib', # C.
  298. f'{build_dir()}/libmupdfcpp.so', # C++.
  299. f'{build_dir()}/_mupdf.so', # Python internals.
  300. f'{build_dir()}/mupdf.py', # Python.
  301. ]
  302. else:
  303. names = [
  304. pipcl.get_soname(f'{build_dir()}/libmupdf.so'), # C.
  305. pipcl.get_soname(f'{build_dir()}/libmupdfcpp.so'), # C++.
  306. f'{build_dir()}/_mupdf.so', # Python internals.
  307. f'{build_dir()}/mupdf.py', # Python.
  308. ]
  309. paths = []
  310. for name in names:
  311. paths.append((name, ''))
  312. log(f'build(): returning: {paths}')
  313. return paths
  314. def clean(all_):
  315. if all_:
  316. return [
  317. 'build',
  318. 'platform/win32/Release',
  319. 'platform/win32/ReleaseDLL',
  320. 'platform/win32/Win32',
  321. 'platform/win32/x64',
  322. ]
  323. else:
  324. # Ideally we would return selected directories in platform/win32/ if on
  325. # Windows, but that would get a little involved.
  326. #
  327. return build_dir()
  328. # Setup pipcl.
  329. #
  330. description = """
  331. Summary
  332. -------
  333. * Python bindings for the MuPDF PDF library.
  334. * A python module called ``mupdf``.
  335. * Generated from the MuPDF C++ API, which is itself generated from the MuPDF C API.
  336. * Provides Python functions that wrap most ``fz_`` and ``pdf_`` functions.
  337. * Provides Python classes that wrap most ``fz_`` and ``pdf_`` structs.
  338. * Class methods provide access to most of the underlying C API functions (except for functions that don't take struct args such as ``fz_strlcpy()``).
  339. * MuPDF's ``setjmp``/``longjmp`` exceptions are converted to Python exceptions.
  340. * Functions and methods do not take ``fz_context`` arguments. (Automatically-generated per-thread contexts are used internally.)
  341. * Wrapper classes automatically handle reference counting of the underlying structs (with internal calls to ``fz_keep_*()`` and ``fz_drop_*()``).
  342. * Support for MuPDF function pointers with SWIG Director classes, allowing MuPDF to call Python callbacks.
  343. * Provides a small number of extensions beyond the basic C API:
  344. * Some generated classes have extra support for iteration.
  345. * Some custom class methods and constructors.
  346. * Simple 'POD' structs have ``__str__()`` methods, for example ``mupdf.Rect`` is represented like: ``(x0=90.51 y0=160.65 x1=501.39 y1=215.6)``.
  347. Example usage
  348. -------------
  349. Minimal Python code that uses the ``mupdf`` module:
  350. ::
  351. import mupdf
  352. document = mupdf.Document('foo.pdf')
  353. A simple example Python test script (run by ``scripts/mupdfwrap.py -t``) is:
  354. * ``scripts/mupdfwrap_test.py``
  355. More detailed usage of the Python API can be found in:
  356. * ``scripts/mutool.py``
  357. * ``scripts/mutool_draw.py``
  358. Here is some example code that shows all available information about document's Stext blocks, lines and characters:
  359. ::
  360. #!/usr/bin/env python3
  361. import mupdf
  362. def show_stext(document):
  363. '''
  364. Shows all available information about Stext blocks, lines and characters.
  365. '''
  366. for p in range(document.count_pages()):
  367. page = document.load_page(p)
  368. stextpage = mupdf.StextPage(page, mupdf.StextOptions())
  369. for block in stextpage:
  370. block_ = block.m_internal
  371. log(f'block: type={block_.type} bbox={block_.bbox}')
  372. for line in block:
  373. line_ = line.m_internal
  374. log(f' line: wmode={line_.wmode}'
  375. + f' dir={line_.dir}'
  376. + f' bbox={line_.bbox}'
  377. )
  378. for char in line:
  379. char_ = char.m_internal
  380. log(f' char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
  381. + f' origin={char_.origin}'
  382. + f' quad={char_.quad}'
  383. + f' size={char_.size:6.2f}'
  384. + f' font=('
  385. + f'is_mono={char_.font.flags.is_mono}'
  386. + f' is_bold={char_.font.flags.is_bold}'
  387. + f' is_italic={char_.font.flags.is_italic}'
  388. + f' ft_substitute={char_.font.flags.ft_substitute}'
  389. + f' ft_stretch={char_.font.flags.ft_stretch}'
  390. + f' fake_bold={char_.font.flags.fake_bold}'
  391. + f' fake_italic={char_.font.flags.fake_italic}'
  392. + f' has_opentype={char_.font.flags.has_opentype}'
  393. + f' invalid_bbox={char_.font.flags.invalid_bbox}'
  394. + f' name={char_.font.name}'
  395. + f')'
  396. )
  397. document = mupdf.Document('foo.pdf')
  398. show_stext(document)
  399. More information
  400. ----------------
  401. https://mupdf.com/r/C-and-Python-APIs
  402. """
  403. with open(f'{root_dir()}/COPYING') as f:
  404. license = f.read()
  405. mupdf_package = pipcl.Package(
  406. name = 'mupdf',
  407. version = mupdf_version(),
  408. root = root_dir(),
  409. summary = 'Python bindings for MuPDF library.',
  410. description = description,
  411. classifier = [
  412. 'Development Status :: 4 - Beta',
  413. 'Intended Audience :: Developers',
  414. 'License :: OSI Approved :: GNU Affero General Public License v3',
  415. 'Programming Language :: Python :: 3',
  416. ],
  417. author = 'Artifex Software, Inc.',
  418. author_email = 'support@artifex.com',
  419. home_page = 'https://mupdf.com/',
  420. project_url = [
  421. ('Documentation, https://mupdf.com/r/C-and-Python-APIs/'),
  422. ('Source, https://git.ghostscript.com/?p=mupdf.git'),
  423. ('Tracker, https://bugs.ghostscript.com/'),
  424. ],
  425. keywords = 'PDF',
  426. platform = None,
  427. license = license,
  428. fn_build = build,
  429. fn_clean = clean,
  430. fn_sdist = sdist,
  431. )
  432. # Things to allow us to function as a PIP-517 backend:
  433. #
  434. def build_wheel( wheel_directory, config_settings=None, metadata_directory=None):
  435. return mupdf_package.build_wheel(
  436. wheel_directory,
  437. config_settings,
  438. metadata_directory,
  439. )
  440. def build_sdist( sdist_directory, config_settings=None):
  441. return mupdf_package.build_sdist(
  442. sdist_directory,
  443. config_settings,
  444. )
  445. def get_requires_for_build_wheel(config_settings=None):
  446. '''
  447. Adds to pyproject.toml:[build-system]:requires, allowing programmatic
  448. control over what packages we require.
  449. '''
  450. ret = list()
  451. ret.append('setuptools')
  452. if openbsd():
  453. #print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.')
  454. pass
  455. elif macos() and platform.machine() == 'arm64':
  456. #print(
  457. # f'MacOS/arm64: forcing use of libclang 16.0.6 because 17.0.6'
  458. # f' and 18.1.1 are known to fail with:'
  459. # f' `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`'
  460. # )
  461. ret.append('libclang==16.0.6')
  462. else:
  463. ret.append('libclang')
  464. if msys2():
  465. #print(f'msys2: pip install of swig does not build; assuming `pacman -S swig`.')
  466. pass
  467. elif openbsd():
  468. #print(f'OpenBSD: pip install of swig does not build; assuming `pkg_add swig`.')
  469. pass
  470. else:
  471. ret.append( 'swig')
  472. return ret
  473. # Allow us to be used as a pre-PIP-517 setup.py script.
  474. #
  475. if __name__ == '__main__':
  476. mupdf_package.handle_argv(sys.argv)