| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- #!/usr/bin/env python3
- '''
- Intended to behaves exactly like mutool, but uses the mupdf python => C++ =>
- mupdf.so wrappers.
- The code is intended to be similar to the mutool C code, to simplify
- comparison.
- '''
- import getopt
- import os
- import sys
- import textwrap
- if os.environ.get('MUPDF_PYTHON') in ('swig', None):
- # PYTHONPATH should have been set up to point to a build/shared-*/
- # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
- import mupdf
- elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
- sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
- import mupdf_cppyy
- del sys.path[0]
- mupdf = mupdf_cppyy.cppyy.gbl.mupdf
- else:
- raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
- def usage():
- print( textwrap.dedent('''
- usage: mutool.py <command> [options]
- \tclean\t-- rewrite pdf file
- \tconvert\t-- convert document
- \ttrace\t-- trace device calls
- \tdraw\t-- convert document
- '''))
- # Things for clean
- #
- def clean_usage():
- print(textwrap.dedent(
- f'''
- usage: mutool clean [options] input.pdf [output.pdf] [pages]
- \t-p -\tpassword
- \t-g\tgarbage collect unused objects
- \t-gg\tin addition to -g compact xref table
- \t-ggg\tin addition to -gg merge duplicate objects
- \t-gggg\tin addition to -ggg check streams for duplication
- \t-l\tlinearize PDF
- \t-D\tsave file without encryption
- \t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)
- \t-O -\towner password (only if encrypting)
- \t-U -\tuser password (only if encrypting)
- \t-P -\tpermission flags (only if encrypting)
- \t-a\tascii hex encode binary streams
- \t-d\tdecompress streams
- \t-z\tdeflate uncompressed streams
- \t-f\tcompress font streams
- \t-i\tcompress image streams
- \t-c\tclean content streams
- \t-s\tsanitize content streams
- \t-A\tcreate appearance streams for annotations
- \t-AA\trecreate appearance streams for annotations
- \tpages\tcomma separated list of page numbers and ranges
- '''
- ))
- sys.exit(1)
- def clean(argv):
- outfile = 'out.pdf'
- password = ''
- opts = mupdf.PdfCleanOptions()
- opts.write.do_garbage += 1
- errors = 0
- items, argv = getopt.getopt( argv, 'adfgilp:sczDAE:O:U:P:')
- for option, value in items:
- if 0: pass # lgtm [py/unreachable-statement]
- elif option == '-p': password = value
- elif option == '-d': opts.write.do_decompress += 1
- elif option == '-z': opts.write.do_compress += 1
- elif option == '-f': opts.write.do_compress_fonts += 1
- elif option == '-i': opts.write.do_compress_images += 1
- elif option == '-a': opts.write.do_ascii += 1
- elif option == '-g': opts.write.do_garbage += 1
- elif option == '-l': opts.write.do_linear += 1
- elif option == '-c': opts.write.do_clean += 1
- elif option == '-s': opts.write.do_sanitize += 1
- elif option == '-A': opts.write.do_appearance += 1
- elif option == '-D': opts.write.do_encrypt = PDF_ENCRYPT_NONE
- elif option == '-E': opts.write.do_encrypt = encrypt_method_from_string(value)
- elif option == '-P': opts.write.permissions = int(value)
- elif option == '-O': opts.write.opwd_utf8 = value[:128]
- elif option == '-U': opts.write.upwd_utf8 = value[:128]
- else:
- clean_usage()
- if (opts.write.do_ascii or opts.write.do_decompress) and not opts.write.do_compress:
- opts.write.do_pretty = 1
- if not argv:
- clean_usage()
- infile = argv.pop(0)
- if argv and '.pdf' in argv[0].lower():
- outfile = argv.pop(0)
- try:
- mupdf.pdf_clean_file(infile, outfile, password, opts, argv)
- except Exception as e:
- print( f'mupdf.pdf_clean_file() failed: {e}')
- errors += 1
- if 0:
- # Enable for debugging.
- import traceback
- traceback.print_exc()
- return errors != 0;
- # Things for draw.
- #
- import mutool_draw
- draw = mutool_draw.draw
- # Things for convert.
- #
- def convert_usage():
- print( textwrap.dedent(
- f'''
- mutool convert version {mupdf.FZ_VERSION}
- Usage: mutool convert [options] file [pages]
- \t-p -\tpassword
- \t-A -\tnumber of bits of antialiasing (0 to 8)
- \t-W -\tpage width for EPUB layout
- \t-H -\tpage height for EPUB layout
- \t-S -\tfont size for EPUB layout
- \t-U -\tfile name of user stylesheet for EPUB layout
- \t-X\tdisable document styles for EPUB layout
- \t-o -\toutput file name (%d for page number)
- \t-F -\toutput format (default inferred from output file name)
- \t\t\traster: cbz, png, pnm, pgm, ppm, pam, pbm, pkm.
- \t\t\tprint-raster: pcl, pclm, ps, pwg.
- \t\t\tvector: pdf, svg.
- \t\t\ttext: html, xhtml, text, stext.
- \t-O -\tcomma separated list of options for output format
- \tpages\tcomma separated list of page ranges (N=last page)
- '''
- ))
- print( mupdf.fz_draw_options_usage)
- print( mupdf.fz_pcl_write_options_usage)
- print( mupdf.fz_pclm_write_options_usage)
- print( mupdf.fz_pwg_write_options_usage)
- print( mupdf.fz_stext_options_usage)
- print( mupdf.fz_pdf_write_options_usage)
- print( mupdf.fz_svg_write_options_usage)
- sys.exit(1)
- def convert_runpage( doc, number, out):
- page = mupdf.FzPage( doc, number - 1)
- mediabox = page.fz_bound_page()
- dev = out.fz_begin_page(mediabox)
- page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
- out.fz_end_page()
- def convert_runrange( doc, count, range_, out):
- start = None
- end = None
- while 1:
- range_, start, end = mupdf.fz_parse_page_range( range_, count)
- if range_ is None:
- break
- step = +1 if end > start else -1
- for i in range( start, end, step):
- convert_runpage( doc, i, out)
- def convert( argv):
- # input options
- password = ''
- alphabits = 8
- layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
- layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
- layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
- layout_css = None
- layout_use_doc_css = 1
- # output options
- output = None
- format_ = None
- options = ''
- items, argv = getopt.getopt( argv, 'p:A:W:H:S:U:Xo:F:O:')
- for option, value in items:
- if 0: pass # lgtm [py/unreachable-statement]
- elif option == '-p': password = value
- elif option == '-A': alphabits = int(value)
- elif option == '-W': layout_w = float( value)
- elif option == '-H': layout_h = float( value)
- elif option == '-S': layout_em = float( value)
- elif option == '-U': layout_css = value
- elif option == '-X': layout_use_doc_css = 0
- elif option == '-o': output = value
- elif option == '-F': format_ = value
- elif option == '-O': options = value
- else: assert 0
- if not argv or (not format_ and not output):
- convert_usage()
- mupdf.fz_set_aa_level( alphabits)
- if layout_css:
- buf = mupdf.FzBuffer( layout_css)
- mupdf.fz_set_user_css( buf.string_from_buffer())
- mupdf.fz_set_use_document_css(layout_use_doc_css)
- if format_:
- out = mupdf.FzDocumentWriter( output, format_, options)
- else:
- out = mupdf.FzDocumentWriter( output, options, mupdf.FzDocumentWriter.OutputType_PDF)
- i = 0
- while 1:
- if i >= len( argv):
- break
- arg = argv[i]
- doc = mupdf.FzDocument( arg)
- if doc.fz_needs_password():
- if not doc.fz_authenticate_password( password):
- raise Exception( f'cannot authenticate password: {arg}')
- doc.fz_layout_document( layout_w, layout_h, layout_em)
- count = doc.fz_count_pages()
- range_ = '1-N'
- if i + 1 < len(argv) and mupdf.fz_is_page_range(ctx, argv[i+1]):
- i += 1
- range_ = argv[i]
- convert_runrange( doc, count, range_, out)
- i += 1
- out.fz_close_document_writer()
- # Things for trace.
- #
- def trace_usage():
- print( textwrap.dedent('''
- Usage: mutool trace [options] file [pages]
- \t-p -\tpassword
- \t-W -\tpage width for EPUB layout
- \t-H -\tpage height for EPUB layout
- \t-S -\tfont size for EPUB layout
- \t-U -\tfile name of user stylesheet for EPUB layout
- \t-X\tdisable document styles for EPUB layout
- \t-d\tuse display list
- \tpages\tcomma separated list of page numbers and ranges
- '''))
- sys.exit( 1)
- def trace_runpage( use_display_list, doc, number):
- page = mupdf.FzPage( doc, number-1)
- mediabox = page.fz_bound_page()
- print( f'<page number="{number}" mediabox="{mediabox.x0} {mediabox.y0} {mediabox.x1} {mediabox.y1}">')
- output = mupdf.FzOutput( mupdf.FzOutput.Fixed_STDOUT)
- dev = mupdf.FzDevice( output)
- if use_display_list:
- list_ = mupdf.FzDisplayList( page)
- list_.fz_run_display_list( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzRect(mupdf.fz_infinite_rect), mupdf.FzCookie())
- else:
- page.fz_run_page( dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
- output.fz_close_output()
- print( '</page>')
- def trace_runrange( use_display_list, doc, count, range_):
- start = None
- end = None
- while 1:
- range_, start, end = mupdf.fz_parse_page_range( range_, count)
- print(f'range_={range_!r} start={start} end={end}')
- if range_ is None:
- break
- step = +1 if end > start else -1
- for i in range( start, end, step):
- trace_runpage( use_display_list, doc, i)
- def trace( argv):
- password = ''
- layout_w = mupdf.FZ_DEFAULT_LAYOUT_W
- layout_h = mupdf.FZ_DEFAULT_LAYOUT_H
- layout_em = mupdf.FZ_DEFAULT_LAYOUT_EM
- layout_css = None
- layout_use_doc_css = 1
- use_display_list = 0
- argv_i = 0
- while 1:
- arg = argv[ argv_i]
- if arg == '-p':
- password = next( opt)
- elif arg == '-W':
- argv_i += 1
- layout_w = float( argv[argv_i])
- elif arg == '-H':
- argv_i += 1
- layout_h = float( argv[argv_i])
- elif arg == '-S':
- argv_i += 1
- layout_em = float( argv[argv_i])
- elif arg == '-U':
- argv_i += 1
- layout_css = argv[argv_i]
- elif arg == '-X':
- layout_use_doc_css = 0
- elif arg == '-d':
- use_display_list = 1
- else:
- break
- argv_i += 1
- if argv_i == len( argv):
- trace_usage()
- if layout_css:
- buffer_ = mupdf.FzBuffer( layout_css)
- mupdf.fz_set_user_css( buffer_.string_from_buffer())
- mupdf.fz_set_use_document_css( layout_use_doc_css)
- for argv_i in range( argv_i, len( argv)):
- arg = argv[ argv_i]
- doc = mupdf.FzDocument( arg)
- if doc.fz_needs_password():
- doc.fz_authenticate_password( password)
- doc.fz_layout_document( layout_w, layout_h, layout_em)
- print( f'<document filename="{arg}">')
- count = doc.fz_count_pages()
- if argv_i + 1 < len( argv) and mupdf.fz_is_page_range( argv[ argv_i+1]):
- argv_i += 1
- trace_runrange( use_display_list, doc, count, argv[ argv_i])
- else:
- trace_runrange( use_display_list, doc, count, '1-N')
- print( '</document>')
- def main( argv):
- arg1 = argv[1]
- fn = getattr( sys.modules[__name__], arg1, None)
- if not fn:
- print( f'cannot find {arg1}')
- usage()
- sys.exit(1)
- return fn( argv[2:])
- if __name__ == '__main__':
- try:
- e = main( sys.argv)
- sys.exit(e)
- except Exception as e:
- if 0: # Enable when debugging.
- sys.stdout.flush()
- sys.stderr.flush()
- print(f'Exception: {e}')
- sys.stdout.flush()
- raise
|