parse.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. '''
  2. Support for accessing parse tree for MuPDF headers.
  3. '''
  4. import os
  5. import sys
  6. import time
  7. import jlib
  8. try:
  9. import clang
  10. except ImportError as e:
  11. jlib.log( 'Warning, could not import clang: {e}')
  12. clang = None
  13. from . import classes
  14. from . import cpp
  15. from . import state
  16. from . import util
  17. def get_extras(tu, type_):
  18. '''
  19. Returns (cursor, typename, extras):
  20. cursor: for base type.
  21. typename:
  22. extras: None or from classes.classextras.
  23. '''
  24. base_type = get_base_type( type_)
  25. base_type_cursor = base_type.get_declaration()
  26. base_typename = get_base_typename( base_type)
  27. extras = classes.classextras.get( tu, base_typename)
  28. return base_type_cursor, base_typename, extras
  29. def fileline( cursor):
  30. '''
  31. Returns <file>:<line> from cursor.location.
  32. '''
  33. f = cursor.location.file
  34. filename = os.path.relpath( f.name) if f else ''
  35. return f'{filename}:{cursor.location.line}'
  36. def prefix( name):
  37. if name.startswith( 'fz_'):
  38. return 'fz_'
  39. if name.startswith( 'pdf_'):
  40. return 'pdf_'
  41. assert 0, f'unrecognised prefix (not fz_ or pdf_) in name={name}'
  42. def get_fz_extras( tu, fzname):
  43. '''
  44. Finds ClassExtra for <fzname>, coping if <fzname> starts with 'const ' or
  45. 'struct '. Returns None if not found.
  46. '''
  47. fzname = util.clip( fzname, 'const ')
  48. fzname = util.clip( fzname, 'struct ')
  49. ce = classes.classextras.get( tu, fzname)
  50. return ce
  51. def get_children(cursor):
  52. '''
  53. Like cursor.get_children() but recurses into cursors with
  54. clang.cindex.CursorKind.UNEXPOSED_DECL which picks up top-level items
  55. marked with `extern "C"`, and clang.cindex.CursorKind.LINKAGE_SPEC which
  56. picks up items inside `extern "C" {...}`.
  57. '''
  58. verbose = 0
  59. for cursor in cursor.get_children():
  60. #verbose = state.state_.show_details( cursor.spelling)
  61. #verbose = 1
  62. if cursor.kind == clang.cindex.CursorKind.UNEXPOSED_DECL:
  63. # Things tagged with `extern "C" appear to be within this
  64. # cursor.
  65. for cursor2 in cursor.get_children():
  66. if verbose and cursor.spelling:
  67. jlib.log( '{cursor.spelling=}')
  68. yield cursor2
  69. elif cursor.kind == clang.cindex.CursorKind.LINKAGE_SPEC:
  70. # extern "C" {...}
  71. for cursor2 in cursor.get_children():
  72. if verbose and cursor.spelling:
  73. jlib.log( '{cursor.spelling=}')
  74. yield cursor2
  75. else:
  76. if verbose and cursor.spelling:
  77. jlib.log( '{cursor.spelling=}')
  78. yield cursor
  79. def get_members( type_or_cursor, include_empty=False):
  80. '''
  81. Yields cursor for each member. Uses whichever of
  82. clang.cindex.Cursor.get_children() or clang.cindex.Type.get_fields() works.
  83. Args:
  84. type_or_cursor:
  85. .
  86. include_empty:
  87. If false (the default), we first try
  88. clang.cindex.Cursor.get_children(), but ignore items for which
  89. .spelling==''. If resulting list is empty, we instead use
  90. clang.cindex.Type.get_fields().
  91. Otherwise, we return list of items from
  92. clang.cindex.Cursor.get_children(), regardless of whether they
  93. have .spelling==''. This allows finding of non-typedef enums, for
  94. example.
  95. '''
  96. if isinstance( type_or_cursor, clang.cindex.Type):
  97. cursor = type_or_cursor.get_declaration()
  98. elif isinstance( type_or_cursor, clang.cindex.Cursor):
  99. cursor = type_or_cursor
  100. else:
  101. assert 0
  102. if cursor.type.kind in (state.clang.cindex.TypeKind.TYPEDEF, state.clang.cindex.TypeKind.ELABORATED):
  103. cursor2 = cursor.underlying_typedef_type.get_declaration()
  104. else:
  105. cursor2 = cursor
  106. if 0:
  107. # Diagnostics to show the difference between
  108. # clang.cindex.Cursor.get_children() and
  109. # clang.cindex.Type.get_fields().
  110. #
  111. # For example it looks like clang.cindex.Cursor.get_children() can
  112. # return an extra item with .spelling=='' for 'union {...} u;'.
  113. #
  114. ret_cursor = list()
  115. ret_cursor_no_empty = list()
  116. ret_type = list()
  117. for cursor3 in cursor2.get_children():
  118. item = (cursor3.spelling, cursor3.location.file.name, cursor3.location.line)
  119. ret_cursor.append( item)
  120. if cursor3.spelling:
  121. ret_cursor_no_empty.append( item)
  122. for cursor3 in cursor.type.get_canonical().get_fields():
  123. ret_type.append( (cursor3.spelling, cursor3.location.file.name, cursor3.location.line))
  124. ret_cursor.sort()
  125. ret_type.sort()
  126. ret_cursor_no_empty.sort()
  127. if (not ret_cursor_no_empty) and ret_type:
  128. jlib.log( 'ret_type and not ret_cursor_no_empty:')
  129. for i in ret_type:
  130. jlib.log( ' ret_type: {i}')
  131. if 0 and ret_cursor != ret_type:
  132. jlib.log('get_children() != get_fields():')
  133. for i in ret_cursor:
  134. jlib.log( ' ret_cursor: {i}')
  135. for i in ret_type:
  136. jlib.log( ' ret_type: {i}')
  137. ret = list()
  138. for cursor3 in cursor2.get_children():
  139. if include_empty or cursor3.spelling:
  140. ret.append(cursor3)
  141. if not ret:
  142. type_ = cursor.type.get_canonical()
  143. for cursor3 in type_.get_fields():
  144. ret.append( cursor3)
  145. for i in ret:
  146. yield i
  147. def get_field0( type_):
  148. '''
  149. Returns cursor for first field in <type_> or None if <type_> has no fields.
  150. '''
  151. verbose = state.state_.show_details( type_.spelling)
  152. for cursor in get_members(type_):
  153. return cursor
  154. get_base_type_cache = dict()
  155. def get_base_type( type_):
  156. '''
  157. Repeatedly dereferences pointer and returns the ultimate type.
  158. '''
  159. # Caching reduces time from to 0.24s to 0.1s.
  160. key = type_.spelling
  161. ret = get_base_type_cache.get( key)
  162. if ret is None:
  163. while 1:
  164. type_ = state.get_name_canonical( type_)
  165. if type_.kind != clang.cindex.TypeKind.POINTER:
  166. break
  167. type_ = type_.get_pointee()
  168. ret = type_
  169. get_base_type_cache[ key] = ret
  170. return ret
  171. def get_base_typename( type_):
  172. '''
  173. Follows pointer to get ultimate type, and returns its name, with any
  174. leading 'struct ' or 'const ' removed.
  175. '''
  176. type_ = get_base_type( type_)
  177. ret = type_.spelling
  178. ret = util.clip( ret, 'const ')
  179. ret = util.clip( ret, 'struct ')
  180. return ret
  181. def is_double_pointer( type_):
  182. '''
  183. Returns true if <type_> is double pointer.
  184. '''
  185. type_ = state.get_name_canonical( type_)
  186. if type_.kind == clang.cindex.TypeKind.POINTER:
  187. type_ = state.get_name_canonical( type_.get_pointee())
  188. if type_.kind == clang.cindex.TypeKind.POINTER:
  189. return True
  190. has_refs_cache = dict()
  191. def has_refs( tu, type_):
  192. '''
  193. Returns (offset, bits) if <type_> has a 'refs' member, otherwise False.
  194. offset:
  195. Byte offset of 'refs' or name of 'refs' for use with offsetof(),
  196. e.g. 'super.refs'.
  197. bits:
  198. Size of 'refs' in bits. Will be -1 if there is no simple .refs
  199. member (e.g. fz_xml).
  200. '''
  201. type0 = type_
  202. type_ = type_.get_canonical()
  203. key = type_.spelling
  204. key = util.clip(key, 'struct ')
  205. verbose = state.state_.show_details( key)
  206. ret = has_refs_cache.get( key, None)
  207. if ret is None:
  208. ret = False
  209. if verbose:
  210. jlib.log( 'Analysing {type0.spelling=} {type_.spelling=} {key=}')
  211. for prefix in (
  212. 'fz_',
  213. 'pdf_',
  214. ):
  215. if verbose:
  216. jlib.log( '{type_.spelling=} {prefix=}')
  217. if key.startswith( prefix):
  218. if verbose:
  219. jlib.log( 'Type is a fz_ or pdf_ struct: {key=}')
  220. keep_name = f'{prefix}keep_{key[len(prefix):]}'
  221. keep_fn_cursor = state.state_.find_function( tu, keep_name, method=False)
  222. if verbose:
  223. jlib.log( '{keep_name=} {keep_fn_cursor=}')
  224. if keep_fn_cursor:
  225. if verbose:
  226. jlib.log( 'There is a keep() fn for this type so it uses reference counting: {keep_name=}')
  227. base_type_cursor = get_base_type( type_).get_declaration()
  228. if base_type_cursor.is_definition():
  229. if verbose:
  230. jlib.log( 'Type definition is available so we look for .refs member: {key=} {type_.spelling=} {fileline(base_type_cursor)=}')
  231. if verbose:
  232. jlib.log('type_.get_fields()')
  233. for cursor in get_members(type_):
  234. jlib.log(' {cursor.spelling=}')
  235. jlib.log('base_type_cursor.get_children()')
  236. for cursor in base_type_cursor.get_children():
  237. jlib.log(' {cursor.spelling=}')
  238. jlib.log('.')
  239. for cursor in get_members(type_):
  240. name = cursor.spelling
  241. type2 = state.get_name_canonical( cursor.type)
  242. if verbose:
  243. jlib.log( '{name=} {type2.spelling=}')
  244. if name == 'refs' and type2.spelling == 'int':
  245. ret = 'refs', 32
  246. break
  247. if name == 'storable' and type2.spelling in ('struct fz_storable', 'fz_storable'):
  248. ret = 'storable.refs', 32
  249. break
  250. else:
  251. if 0:
  252. jlib.log('Definition is not available for {key=}'
  253. ' because {base_type_cursor.spelling=} .is_definition()'
  254. ' returns false.'
  255. ' base_type_cursor.location={fileline(base_type_cursor)}'
  256. )
  257. if not ret:
  258. if verbose:
  259. jlib.log(
  260. '{type_.spelling=}: Cannot find .refs member or we only have forward'
  261. ' declaration, so have to hard-code the size and offset'
  262. ' of the refs member.'
  263. )
  264. if base_type_cursor.is_definition():
  265. if key == 'pdf_document':
  266. ret = 'super.refs', 32
  267. elif key == 'pdf_page':
  268. ret = 'super.refs', 32
  269. elif key == 'fz_pixmap':
  270. ret = 'storable.refs', 32
  271. elif key in (
  272. 'fz_colorspace',
  273. 'fz_image',
  274. ):
  275. return 'key_storable.storable.refs', 32
  276. elif key == 'pdf_cmap':
  277. return 'storable.refs', 32
  278. else:
  279. #jlib.log( 'No definition available, i.e. forward decl only.')
  280. if key == 'pdf_obj':
  281. ret = 0, 16
  282. elif key == 'fz_path':
  283. ret = 0, 8
  284. elif key in (
  285. 'fz_separations',
  286. 'fz_halftone',
  287. 'pdf_annot',
  288. 'pdf_graft_map',
  289. ):
  290. # Forward decl, first member is 'int regs;'.
  291. return 0, 32
  292. elif key in (
  293. 'fz_display_list',
  294. 'fz_glyph',
  295. 'fz_jbig2_globals',
  296. 'pdf_function',
  297. ):
  298. # Forward decl, first member is 'fz_storable storable;'.
  299. return 0, 32
  300. elif key == 'fz_xml':
  301. # This only has a simple .refs member if the
  302. # .up member is null, so we don't attempt to
  303. # use it, by returning size=-1.
  304. ret = 0, -1
  305. if ret is None:
  306. # Need to hard-code info for this type.
  307. assert 0, jlib.expand_nv(
  308. '{key=} has {keep_name}() fn but is forward decl or we cannot find .refs,'
  309. ' and we have no hard-coded info about size and offset of .regs.'
  310. ' {type0.spelling=} {type_.spelling=} {base_type_cursor.spelling}'
  311. )
  312. assert ret, (
  313. f'{key} has {keep_name}() but have not found size/location of .refs member.'
  314. f' {type_.spelling=}'
  315. f' {base_type_cursor.spelling=}'
  316. f': {fileline(base_type_cursor)}'
  317. )
  318. if type_.spelling in (
  319. 'struct fz_document',
  320. 'struct fz_buffer',
  321. ):
  322. assert ret
  323. #jlib.log('Populating has_refs_cache with {key=} {ret=}')
  324. has_refs_cache[ key] = ret
  325. return ret
  326. def get_value( item, name):
  327. '''
  328. Enhanced wrapper for getattr().
  329. We call ourselves recursively if name contains one or more '.'. If name
  330. ends with (), makes fn call to get value.
  331. '''
  332. if not name:
  333. return item
  334. dot = name.find( '.')
  335. if dot >= 0:
  336. item_sub = get_value( item, name[:dot])
  337. return get_value( item_sub, name[dot+1:])
  338. if name.endswith('()'):
  339. value = getattr( item, name[:-2])
  340. assert callable(value)
  341. return value()
  342. return getattr( item, name)
  343. def get_list( item, *names):
  344. '''
  345. Uses get_value() to find values of specified fields in <item>.
  346. Returns list of (name,value) pairs.
  347. '''
  348. ret = []
  349. for name in names:
  350. value = get_value( item, name)
  351. ret.append((name, value))
  352. return ret
  353. def get_text( item, prefix, sep, *names):
  354. '''
  355. Returns text describing <names> elements of <item>.
  356. '''
  357. ret = []
  358. for name, value in get_list( item, *names):
  359. ret.append( f'{name}={value}')
  360. return prefix + sep.join( ret)
  361. def dump_ast( cursor, out=None, depth=0):
  362. cleanup = lambda: None
  363. if out is None:
  364. out = sys.stdout
  365. if isinstance(out, str):
  366. out = open(out, 'w')
  367. cleanup = lambda : out.close()
  368. try:
  369. indent = depth*4*' '
  370. for cursor2 in cursor.get_children():
  371. def or_none(f):
  372. try:
  373. return f()
  374. except Exception:
  375. return
  376. result = or_none( cursor2.type.get_result)
  377. type_ = cursor2.type
  378. type_canonical = or_none( cursor2.type.get_canonical)
  379. text = indent
  380. text += jlib.log_text(
  381. '{cursor2.kind=}'
  382. ' {cursor2.displayname=}'
  383. ' {cursor2.spelling=}'
  384. ' {cursor2.linkage=}'
  385. ' {cursor2.is_definition()=}'
  386. )
  387. if result:
  388. text += jlib.log_text(' {result.spelling=}')
  389. if type_:
  390. text += jlib.log_text(' {type_.spelling=}')
  391. if type_canonical:
  392. text += jlib.log_text(' {type_canonical.spelling=}')
  393. text += '\n'
  394. if callable(out):
  395. out( text)
  396. else:
  397. out.write(text)
  398. dump_ast( cursor2, out, depth+1)
  399. finally:
  400. cleanup()
  401. def show_ast( filename, includes):
  402. jlib.log('Parsing {filename=}')
  403. index = clang.cindex.Index.create()
  404. args = []
  405. for include in includes:
  406. args += ['-I', include]
  407. tu = index.parse( filename,
  408. args = args,
  409. )
  410. dump_ast( tu.cursor)
  411. class Arg:
  412. '''
  413. Information about a function argument.
  414. .cursor:
  415. Cursor for the argument.
  416. .name:
  417. Arg name, or an invented name if none was present.
  418. .separator:
  419. '' for first returned argument, ', ' for the rest.
  420. .alt:
  421. Cursor for underlying fz_ struct type if <arg> is a pointer to or
  422. ref/value of a fz_ struct type that we wrap. Else None.
  423. .out_param:
  424. True if this looks like an out-parameter, e.g. alt is set and
  425. double pointer, or arg is pointer other than to char.
  426. .name_python:
  427. Same as .name or .name+'_' if .name is a Python keyword.
  428. .name_csharp:
  429. Same as .name or .name+'_' if .name is a C# keyword.
  430. '''
  431. def __init__(self, cursor, name, separator, alt, out_param):
  432. self.cursor = cursor
  433. self.name = name
  434. self.separator = separator
  435. self.alt = alt
  436. self.out_param = out_param
  437. if name in ('in', 'is'):
  438. self.name_python = f'{name}_'
  439. else:
  440. self.name_python = name
  441. self.name_csharp = f'{name}_' if name in ('out', 'is', 'in', 'params') else name
  442. def __str__(self):
  443. return f'Arg(name={self.name} alt={"true" if self.alt else "false"} out_param={self.out_param})'
  444. get_args_cache = dict()
  445. def get_args( tu, cursor, include_fz_context=False, skip_first_alt=False, verbose=False):
  446. '''
  447. Yields Arg instance for each arg of the function at <cursor>.
  448. Args:
  449. tu:
  450. A clang.cindex.TranslationUnit instance.
  451. cursor:
  452. Clang cursor for the function.
  453. include_fz_context:
  454. If false, we skip args that are 'struct fz_context*'
  455. skip_first_alt:
  456. If true, we skip the first arg with .alt set.
  457. verbose:
  458. .
  459. '''
  460. # We are called a few times for each function, and the calculations we do
  461. # are slow, so we cache the returned items. E.g. this reduces total time of
  462. # --build 0 from 3.5s to 2.1s.
  463. #
  464. if verbose:
  465. jlib.log( '## Looking at args of {cursor.spelling=}')
  466. key = tu, cursor.location.file, cursor.location.line, include_fz_context, skip_first_alt
  467. ret = get_args_cache.get( key)
  468. if not verbose and state.state_.show_details(cursor.spelling):
  469. verbose = True
  470. if ret is None:
  471. if verbose:
  472. jlib.log( '## Looking at args of {cursor.spelling=}')
  473. ret = []
  474. i = 0
  475. i_alt = 0
  476. separator = ''
  477. for arg_cursor in cursor.get_arguments():
  478. if verbose:
  479. jlib.log('{arg_cursor.kind=} {arg_cursor.spelling=}')
  480. assert arg_cursor.kind == clang.cindex.CursorKind.PARM_DECL
  481. if not include_fz_context and is_pointer_to( arg_cursor.type, 'fz_context'):
  482. # Omit this arg because our generated mupdf_*() wrapping functions
  483. # use internalContextGet() to get a context.
  484. continue
  485. name = arg_cursor.spelling or f'arg_{i}'
  486. if 0 and name == 'stmofsp':
  487. verbose = True
  488. alt = None
  489. out_param = False
  490. base_type_cursor, base_typename, extras = get_extras( tu, arg_cursor.type)
  491. if verbose:
  492. jlib.log( 'Looking at arg. {extras=}')
  493. if extras:
  494. if verbose:
  495. jlib.log( '{extras.opaque=} {base_type_cursor.kind=} {base_type_cursor.is_definition()=}')
  496. if extras.opaque:
  497. # E.g. we don't have access to definition of fz_separation,
  498. # but it is marked in classes.classextras with opaque=true,
  499. # so there will be a wrapper class.
  500. alt = base_type_cursor
  501. elif (1
  502. and base_type_cursor.kind == clang.cindex.CursorKind.STRUCT_DECL
  503. #and base_type_cursor.is_definition()
  504. ):
  505. alt = base_type_cursor
  506. if verbose:
  507. jlib.log( '{arg_cursor.type.spelling=} {base_typename=} {arg_cursor.type.kind=} {get_base_typename(arg_cursor.type)=}')
  508. jlib.log( '{get_base_type(arg_cursor.type).kind=}')
  509. if alt:
  510. if is_double_pointer( arg_cursor.type):
  511. out_param = True
  512. elif get_base_typename( arg_cursor.type) in ('char', 'unsigned char', 'signed char', 'void', 'FILE'):
  513. if is_double_pointer( arg_cursor.type):
  514. if verbose:
  515. jlib.log( 'setting outparam: {cursor.spelling=} {arg_cursor.type=}')
  516. if cursor.spelling == 'pdf_clean_file':
  517. # Don't mark char** argv as out-param, which will also
  518. # allow us to tell swig to convert python lists into
  519. # (argc,char**) pair.
  520. pass
  521. else:
  522. if verbose:
  523. jlib.log('setting out_param to true')
  524. out_param = True
  525. elif ( base_typename.startswith( ('fz_', 'pdf_'))
  526. and get_base_type(arg_cursor.type).kind != clang.cindex.TypeKind.ENUM
  527. ):
  528. # Pointer to fz_ struct is not usually an out-param.
  529. if verbose:
  530. jlib.log(
  531. 'not out-param because pointer to struct:'
  532. ' arg is: {arg_cursor.displayname=}'
  533. ' {base_typename.spelling=}'
  534. ' {extras}'
  535. ' {arg_cursor.type.kind=}'
  536. )
  537. elif arg_cursor.type.kind == clang.cindex.TypeKind.POINTER:
  538. pointee = arg_cursor.type.get_pointee()
  539. if verbose:
  540. jlib.log( 'clang.cindex.TypeKind.POINTER')
  541. if state.get_name_canonical( pointee).kind == clang.cindex.TypeKind.FUNCTIONPROTO:
  542. # Don't mark function-pointer args as out-params.
  543. if verbose:
  544. jlib.log( 'clang.cindex.TypeKind.FUNCTIONPROTO')
  545. elif pointee.is_const_qualified():
  546. if verbose:
  547. jlib.log( 'is_const_qualified()')
  548. elif pointee.spelling == 'FILE':
  549. pass
  550. else:
  551. if verbose:
  552. jlib.log( 'setting out_param = True')
  553. out_param = True
  554. if alt:
  555. i_alt += 1
  556. i += 1
  557. if alt and skip_first_alt and i_alt == 1:
  558. continue
  559. arg = Arg(arg_cursor, name, separator, alt, out_param)
  560. ret.append(arg)
  561. if verbose:
  562. jlib.log( 'Appending {arg=}')
  563. separator = ', '
  564. get_args_cache[ key] = ret
  565. for arg in ret:
  566. yield arg
  567. def fn_has_struct_args( tu, cursor):
  568. '''
  569. Returns true if fn at <cursor> takes any fz_* struct args.
  570. '''
  571. for arg in get_args( tu, cursor):
  572. if arg.alt:
  573. return True
  574. def get_first_arg( tu, cursor):
  575. '''
  576. Returns (arg, n), where <arg> is from get_args() for first argument (or
  577. None if no arguments), and <n> is number of arguments.
  578. '''
  579. n = 0
  580. ret = None
  581. for arg in get_args( tu, cursor):
  582. if n == 0:
  583. ret = arg
  584. n += 1
  585. return ret, n
  586. is_cache = dict()
  587. def is_( type_, type2):
  588. key = type_.spelling, type2
  589. ret = is_cache.get( key)
  590. if ret is None:
  591. d = cpp.declaration_text( type_, '', top_level='')
  592. d = util.clip( d, 'const ')
  593. d = util.clip( d, 'struct ')
  594. d = d.strip()
  595. ret = (d == type2)
  596. is_cache[ key] = ret
  597. return ret
  598. is_pointer_to_cache = dict()
  599. def is_pointer_to( type_, destination, verbose=False):
  600. '''
  601. Returns true if <type> is a pointer to <destination>.
  602. We do this using text for <destination>, rather than a clang.cindex.Type
  603. or clang.cindex.Cursor, so that we can represent base types such as int or
  604. char without having clang parse system headers. This involves stripping any
  605. initial 'struct ' text.
  606. Also, clang's representation of mupdf's varying use of typedef, struct and
  607. forward-declarations is rather difficult to work with directly.
  608. type_:
  609. A clang.cindex.Type.
  610. destination:
  611. Text typename.
  612. '''
  613. # Use cache - reduces time from 0.6s to 0.2.
  614. #
  615. key = type_.spelling, destination
  616. ret = is_pointer_to_cache.get( key)
  617. if verbose or ret is None:
  618. assert isinstance( type_, clang.cindex.Type)
  619. if verbose: jlib.log( '{type_.spelling=}')
  620. ret = None
  621. destination = util.clip( destination, 'struct ')
  622. if type_.kind == clang.cindex.TypeKind.POINTER:
  623. pointee = type_.get_pointee()
  624. if verbose: jlib.log('{pointee.spelling=}')
  625. d = cpp.declaration_text( pointee, '', top_level='', verbose=verbose)
  626. d = util.clip( d, 'const ')
  627. d = util.clip( d, 'struct ')
  628. if verbose:
  629. jlib.log( '{destination=} {type_.get_pointee().kind=} {type_.get_pointee().spelling=} {state.get_name_canonical( type_.get_pointee()).spelling=}')
  630. ret = d.strip() == destination or d.strip() == f'const {destination}'
  631. is_pointer_to_cache[ key] = ret
  632. return ret
  633. def is_pointer_to_pointer_to( type_, destination, verbose=False):
  634. if verbose:
  635. jlib.log( '{type_.spelling=}')
  636. if type_.kind != clang.cindex.TypeKind.POINTER:
  637. return False
  638. pointee = type_.get_pointee()
  639. return is_pointer_to( pointee, destination, verbose=verbose)
  640. class MethodExcludeReason_VARIADIC:
  641. pass
  642. class MethodExcludeReason_OMIT_CLASS:
  643. pass
  644. class MethodExcludeReason_NO_EXTRAS:
  645. pass
  646. class MethodExcludeReason_NO_RAW_CONSTRUCTOR:
  647. pass
  648. class MethodExcludeReason_NOT_COPYABLE:
  649. pass
  650. class MethodExcludeReason_NO_WRAPPER_CLASS:
  651. pass
  652. class MethodExcludeReason_ENUM:
  653. pass
  654. class MethodExcludeReason_FIRST_ARG_NOT_STRUCT:
  655. pass
  656. # Maps from <structname> to list of functions satisfying conditions specified
  657. # by find_wrappable_function_with_arg0_type() below.
  658. #
  659. find_wrappable_function_with_arg0_type_cache = None
  660. # Maps from fnname to list of strings, each string being a description of why
  661. # this fn is not suitable for wrapping by class method.
  662. #
  663. find_wrappable_function_with_arg0_type_excluded_cache = None
  664. # Maps from function name to the class that has a method that wraps this
  665. # function.
  666. #
  667. fnname_to_method_structname = dict()
  668. def find_wrappable_function_with_arg0_type_cache_populate( tu):
  669. '''
  670. Populates caches with wrappable functions.
  671. '''
  672. global find_wrappable_function_with_arg0_type_cache
  673. global find_wrappable_function_with_arg0_type_excluded_cache
  674. if find_wrappable_function_with_arg0_type_cache:
  675. return
  676. t0 = time.time()
  677. find_wrappable_function_with_arg0_type_cache = dict()
  678. find_wrappable_function_with_arg0_type_excluded_cache = dict()
  679. for fnname, cursor in state.state_.find_functions_starting_with( tu, ('fz_', 'pdf_'), method=True):
  680. exclude_reasons = []
  681. if fnname.startswith( 'fz_drop_') or fnname.startswith( 'fz_keep_'):
  682. continue
  683. if fnname.startswith( 'pdf_drop_') or fnname.startswith( 'pdf_keep_'):
  684. continue
  685. if cursor.type.is_function_variadic():
  686. exclude_reasons.append(
  687. (
  688. MethodExcludeReason_VARIADIC,
  689. 'function is variadic',
  690. ))
  691. # Look at resulttype.
  692. #
  693. result_type = cursor.type.get_result()
  694. if result_type.kind == clang.cindex.TypeKind.POINTER:
  695. result_type = result_type.get_pointee()
  696. result_type_name = state.get_name_canonical( result_type)
  697. result_type_name = util.clip( result_type.spelling, 'struct ')
  698. if result_type_name.startswith( ('fz_', 'pdf_')):
  699. if result_type.kind == clang.cindex.TypeKind.TYPEDEF:
  700. result_cursor = result_type.get_declaration()
  701. result_type = result_cursor.underlying_typedef_type
  702. if result_type.kind == state.clang.cindex.TypeKind.ELABORATED:
  703. result_type_extras = get_fz_extras( tu, result_type_name)
  704. if not result_type_extras:
  705. exclude_reasons.append(
  706. (
  707. MethodExcludeReason_NO_EXTRAS,
  708. f'no extras defined for result_type={result_type_name}.'
  709. ))
  710. else:
  711. if not result_type_extras.constructor_raw:
  712. exclude_reasons.append(
  713. (
  714. MethodExcludeReason_NO_RAW_CONSTRUCTOR,
  715. f'wrapper for result_type={result_type_name} does not have raw constructor.',
  716. ))
  717. if not result_type_extras.copyable:
  718. exclude_reasons.append(
  719. (
  720. MethodExcludeReason_NOT_COPYABLE,
  721. f'wrapper for result_type={result_type_name} is not copyable.',
  722. ))
  723. # Look at args
  724. #
  725. i = 0
  726. arg0_cursor = None
  727. for arg in get_args( tu, cursor):
  728. base_typename = get_base_typename( arg.cursor.type)
  729. if not arg.alt and base_typename.startswith( ('fz_', 'pdf_')):
  730. t_canonical = state.get_name_canonical( arg.cursor.type)
  731. if t_canonical.kind == clang.cindex.TypeKind.ENUM:
  732. # We don't (yet) wrap fz_* enums, but for now at least we
  733. # still wrap functions that take fz_* enum parameters -
  734. # callers will have to use the fz_* type.
  735. #
  736. # For example this is required by mutool_draw.py because
  737. # mudraw.c calls fz_set_separation_behavior().
  738. #
  739. jlib.logx(
  740. 'not excluding {fnname=} with enum fz_ param:'
  741. ' {arg.cursor.spelling=}'
  742. ' {arg.cursor.type.kind}'
  743. ' {state.get_name_canonical(arg.cursor.type).kind=}'
  744. )
  745. elif t_canonical.kind == clang.cindex.TypeKind.POINTER:
  746. pass
  747. else:
  748. exclude_reasons.append(
  749. (
  750. MethodExcludeReason_NO_WRAPPER_CLASS,
  751. f'no wrapper class for arg i={i}:'
  752. f' {state.get_name_canonical( arg.cursor.type).spelling}'
  753. f' {state.get_name_canonical(arg.cursor.type).kind}'
  754. ,
  755. ))
  756. if i == 0:
  757. if arg.alt:
  758. arg0_cursor = arg.alt
  759. else:
  760. exclude_reasons.append(
  761. (
  762. MethodExcludeReason_FIRST_ARG_NOT_STRUCT,
  763. 'first arg is not fz_* struct',
  764. ))
  765. i += 1
  766. if exclude_reasons:
  767. find_wrappable_function_with_arg0_type_excluded_cache[ fnname] = exclude_reasons
  768. #if fnname == 'fz_load_outline': # lgtm [py/unreachable-statement]
  769. if state.state_.show_details(fnname):
  770. jlib.log( 'Excluding {fnname=} from possible class methods because:')
  771. for i in exclude_reasons:
  772. jlib.log( ' {i}')
  773. else:
  774. if i > 0:
  775. # <fnname> is ok to wrap.
  776. arg0 = state.get_name_canonical( arg0_cursor.type).spelling
  777. arg0 = util.clip( arg0, 'struct ')
  778. #jlib.log( '=== Adding to {arg0=}: {fnname=}. {len(fnname_to_method_structname)=}')
  779. items = find_wrappable_function_with_arg0_type_cache.setdefault( arg0, [])
  780. items.append( fnname)
  781. fnname_to_method_structname[ fnname] = arg0
  782. jlib.log1( f'populating find_wrappable_function_with_arg0_type_cache took {time.time()-t0:.2f}s')
  783. def find_wrappable_function_with_arg0_type( tu, structname):
  784. '''
  785. Return list of fz_*() function names which could be wrapped as a method of
  786. our wrapper class for <structname>.
  787. The functions whose names we return, satisfy all of the following:
  788. First non-context param is <structname> (by reference, pointer or value).
  789. If return type is a fz_* struct (by reference, pointer or value), the
  790. corresponding wrapper class has a raw constructor.
  791. '''
  792. find_wrappable_function_with_arg0_type_cache_populate( tu)
  793. ret = find_wrappable_function_with_arg0_type_cache.get( structname, [])
  794. if state.state_.show_details(structname):
  795. jlib.log('{structname=}: {len(ret)=}:')
  796. for i in ret:
  797. jlib.log(' {i}')
  798. return ret
  799. find_struct_cache = None
  800. def find_class_for_wrappable_function( fn_name):
  801. '''
  802. If <fn_name>'s first arg is a struct and our wrapper class for this struct
  803. has a method that wraps <fn_name>, return name of wrapper class.
  804. Otherwise return None.
  805. '''
  806. return fnname_to_method_structname.get( fn_name)
  807. def find_struct( tu, structname, require_definition=True):
  808. '''
  809. Finds definition of struct.
  810. fixme: actually finds definition of anything, doesn't have to be a struct.
  811. Args:
  812. tu:
  813. Translation unit.
  814. structname:
  815. Name of struct to find.
  816. require_definition:
  817. Only return cursor if it is for definition of structure.
  818. Returns cursor for definition or None.
  819. '''
  820. verbose = state.state_.show_details( structname)
  821. verbose = False
  822. if verbose:
  823. jlib.log( '{=structname}')
  824. structname = util.clip( structname, ('const ', 'struct ')) # Remove any 'struct ' prefix.
  825. if verbose:
  826. jlib.log( '{=structname}')
  827. global find_struct_cache
  828. if find_struct_cache is None:
  829. find_struct_cache = dict()
  830. for cursor in get_children( tu.cursor):
  831. already = find_struct_cache.get( cursor.spelling)
  832. if already is None:
  833. find_struct_cache[ cursor.spelling] = cursor
  834. elif cursor.is_definition() and not already.is_definition():
  835. find_struct_cache[ cursor.spelling] = cursor
  836. ret = find_struct_cache.get( structname)
  837. if verbose:
  838. jlib.log( '{=ret}')
  839. if not ret:
  840. return
  841. if verbose:
  842. jlib.log( '{=require_definition ret.is_definition()}')
  843. if require_definition and not ret.is_definition():
  844. return
  845. return ret
  846. def find_name( cursor, name, nest=0):
  847. '''
  848. Returns cursor for specified name within <cursor>, or None if not found.
  849. name:
  850. Name to search for. Can contain '.' characters; we look for each
  851. element in turn, calling ourselves recursively.
  852. cursor:
  853. Item to search.
  854. '''
  855. assert cursor.spelling != ''
  856. if cursor.spelling == '':
  857. # Anonymous item; this seems to occur for (non-anonymous) unions.
  858. #
  859. # We recurse into children directly.
  860. #
  861. for c in get_members(cursor):
  862. ret = find_name_internal( c, name, nest+1)
  863. if ret:
  864. return ret
  865. d = name.find( '.')
  866. if d >= 0:
  867. head, tail = name[:d], name[d+1:]
  868. # Look for first element then for remaining.
  869. c = find_name( cursor, head, nest+1)
  870. if not c:
  871. return
  872. ret = find_name( c, tail, nest+2)
  873. return ret
  874. for c in get_members(cursor):
  875. if c.spelling == '':
  876. ret = find_name( c, name, nest+1)
  877. if ret:
  878. return ret
  879. if c.spelling == name:
  880. return c