clang_array_check.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. # ---
  2. # * Licensed under the Apache License, Version 2.0 (the "License");
  3. # * you may not use this file except in compliance with the License.
  4. # * You may obtain a copy of the License at
  5. # *
  6. # * http://www.apache.org/licenses/LICENSE-2.0
  7. # ---
  8. # by Campbell Barton
  9. """
  10. Invocation:
  11. export CLANG_BIND_DIR="/dsk/src/llvm/tools/clang/bindings/python"
  12. export CLANG_LIB_DIR="/opt/llvm/lib"
  13. python2 clang_array_check.py somefile.c -DSOME_DEFINE -I/some/include
  14. ... defines and includes are optional
  15. """
  16. # delay parsing functions until we need them
  17. USE_LAZY_INIT = True
  18. USE_EXACT_COMPARE = False
  19. # -----------------------------------------------------------------------------
  20. # predefined function/arg sizes, handy sometimes, but not complete...
  21. defs_precalc = {
  22. "glColor3bv": {0: 3},
  23. "glColor4bv": {0: 4},
  24. "glColor3ubv": {0: 3},
  25. "glColor4ubv": {0: 4},
  26. "glColor4usv": {0: 3},
  27. "glColor4usv": {0: 4},
  28. "glColor3fv": {0: 3},
  29. "glColor4fv": {0: 4},
  30. "glColor3dv": {0: 3},
  31. "glColor4dv": {0: 4},
  32. "glVertex2fv": {0: 2},
  33. "glVertex3fv": {0: 3},
  34. "glVertex4fv": {0: 4},
  35. "glEvalCoord1fv": {0: 1},
  36. "glEvalCoord1dv": {0: 1},
  37. "glEvalCoord2fv": {0: 2},
  38. "glEvalCoord2dv": {0: 2},
  39. "glRasterPos2dv": {0: 2},
  40. "glRasterPos3dv": {0: 3},
  41. "glRasterPos4dv": {0: 4},
  42. "glRasterPos2fv": {0: 2},
  43. "glRasterPos3fv": {0: 3},
  44. "glRasterPos4fv": {0: 4},
  45. "glRasterPos2sv": {0: 2},
  46. "glRasterPos3sv": {0: 3},
  47. "glRasterPos4sv": {0: 4},
  48. "glTexCoord2fv": {0: 2},
  49. "glTexCoord3fv": {0: 3},
  50. "glTexCoord4fv": {0: 4},
  51. "glTexCoord2dv": {0: 2},
  52. "glTexCoord3dv": {0: 3},
  53. "glTexCoord4dv": {0: 4},
  54. "glNormal3fv": {0: 3},
  55. "glNormal3dv": {0: 3},
  56. "glNormal3bv": {0: 3},
  57. "glNormal3iv": {0: 3},
  58. "glNormal3sv": {0: 3},
  59. }
  60. # -----------------------------------------------------------------------------
  61. import sys
  62. if 0:
  63. # Examples with LLVM as the root dir: '/dsk/src/llvm'
  64. # path containing 'clang/__init__.py'
  65. CLANG_BIND_DIR = "/dsk/src/llvm/tools/clang/bindings/python"
  66. # path containing libclang.so
  67. CLANG_LIB_DIR = "/opt/llvm/lib"
  68. else:
  69. import os
  70. CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR")
  71. CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR")
  72. if CLANG_BIND_DIR is None:
  73. print("$CLANG_BIND_DIR python binding dir not set")
  74. if CLANG_LIB_DIR is None:
  75. print("$CLANG_LIB_DIR clang lib dir not set")
  76. sys.path.append(CLANG_BIND_DIR)
  77. import clang
  78. import clang.cindex
  79. from clang.cindex import (CursorKind,
  80. TypeKind,
  81. TokenKind)
  82. clang.cindex.Config.set_library_path(CLANG_LIB_DIR)
  83. index = clang.cindex.Index.create()
  84. args = sys.argv[2:]
  85. # print(args)
  86. tu = index.parse(sys.argv[1], args)
  87. # print('Translation unit: %s' % tu.spelling)
  88. filepath = tu.spelling
  89. # -----------------------------------------------------------------------------
  90. def function_parm_wash_tokens(parm):
  91. # print(parm.kind)
  92. assert parm.kind in (CursorKind.PARM_DECL,
  93. CursorKind.VAR_DECL, # XXX, double check this
  94. CursorKind.FIELD_DECL,
  95. )
  96. """
  97. Return tolens without trailing commands and 'const'
  98. """
  99. tokens = [t for t in parm.get_tokens()]
  100. if not tokens:
  101. return tokens
  102. # if tokens[-1].kind == To
  103. # remove trailing char
  104. if tokens[-1].kind == TokenKind.PUNCTUATION:
  105. if tokens[-1].spelling in (",", ")", ";"):
  106. tokens.pop()
  107. # else:
  108. # print(tokens[-1].spelling)
  109. t_new = []
  110. for t in tokens:
  111. t_kind = t.kind
  112. t_spelling = t.spelling
  113. ok = True
  114. if t_kind == TokenKind.KEYWORD:
  115. if t_spelling in ("const", "restrict", "volatile"):
  116. ok = False
  117. elif t_spelling.startswith("__"):
  118. ok = False # __restrict
  119. elif t_kind in (TokenKind.COMMENT, ):
  120. ok = False
  121. # Use these
  122. elif t_kind in (TokenKind.LITERAL,
  123. TokenKind.PUNCTUATION,
  124. TokenKind.IDENTIFIER):
  125. # use but ignore
  126. pass
  127. else:
  128. print("Unknown!", t_kind, t_spelling)
  129. # if its OK we will add
  130. if ok:
  131. t_new.append(t)
  132. return t_new
  133. def parm_size(node_child):
  134. tokens = function_parm_wash_tokens(node_child)
  135. # print(" ".join([t.spelling for t in tokens]))
  136. # NOT PERFECT CODE, EXTRACT SIZE FROM TOKENS
  137. if len(tokens) >= 3: # foo [ 1 ]
  138. if ((tokens[-3].kind == TokenKind.PUNCTUATION and tokens[-3].spelling == "[") and
  139. (tokens[-2].kind == TokenKind.LITERAL and tokens[-2].spelling.isdigit()) and
  140. (tokens[-1].kind == TokenKind.PUNCTUATION and tokens[-1].spelling == "]")):
  141. # ---
  142. return int(tokens[-2].spelling)
  143. return -1
  144. def function_get_arg_sizes(node):
  145. # Return a dict if (index: size) items
  146. # {arg_indx: arg_array_size, ... ]
  147. arg_sizes = {}
  148. if 1: # node.spelling == "BM_vert_create", for debugging
  149. node_parms = [node_child for node_child in node.get_children()
  150. if node_child.kind == CursorKind.PARM_DECL]
  151. for i, node_child in enumerate(node_parms):
  152. # print(node_child.kind, node_child.spelling)
  153. # print(node_child.type.kind, node_child.spelling)
  154. if node_child.type.kind == TypeKind.CONSTANTARRAY:
  155. pointee = node_child.type.get_pointee()
  156. size = parm_size(node_child)
  157. if size != -1:
  158. arg_sizes[i] = size
  159. return arg_sizes
  160. # -----------------------------------------------------------------------------
  161. _defs = {}
  162. def lookup_function_size_def(func_id):
  163. if USE_LAZY_INIT:
  164. result = _defs.get(func_id, {})
  165. if type(result) != dict:
  166. result = _defs[func_id] = function_get_arg_sizes(result)
  167. return result
  168. else:
  169. return _defs.get(func_id, {})
  170. # -----------------------------------------------------------------------------
  171. def file_check_arg_sizes(tu):
  172. # main checking function
  173. def validate_arg_size(node):
  174. """
  175. Loop over args and validate sizes for args we KNOW the size of.
  176. """
  177. assert node.kind == CursorKind.CALL_EXPR
  178. if 0:
  179. print("---",
  180. " <~> ".join(
  181. [" ".join([t.spelling for t in C.get_tokens()])
  182. for C in node.get_children()]
  183. ))
  184. # print(node.location)
  185. # first child is the function call, skip that.
  186. children = list(node.get_children())
  187. if not children:
  188. return # XXX, look into this, happens on C++
  189. func = children[0]
  190. # get the func declaration!
  191. # works but we can better scan for functions ahead of time.
  192. if 0:
  193. func_dec = func.get_definition()
  194. if func_dec:
  195. print("FD", " ".join([t.spelling for t in func_dec.get_tokens()]))
  196. else:
  197. # HRMP'f - why does this fail?
  198. print("AA", " ".join([t.spelling for t in node.get_tokens()]))
  199. else:
  200. args_size_definition = () # dummy
  201. # get the key
  202. tok = list(func.get_tokens())
  203. if tok:
  204. func_id = tok[0].spelling
  205. args_size_definition = lookup_function_size_def(func_id)
  206. if not args_size_definition:
  207. return
  208. children = children[1:]
  209. for i, node_child in enumerate(children):
  210. children = list(node_child.get_children())
  211. # skip if we dont have an index...
  212. size_def = args_size_definition.get(i, -1)
  213. if size_def == -1:
  214. continue
  215. # print([c.kind for c in children])
  216. # print(" ".join([t.spelling for t in node_child.get_tokens()]))
  217. if len(children) == 1:
  218. arg = children[0]
  219. if arg.kind in (CursorKind.DECL_REF_EXPR,
  220. CursorKind.UNEXPOSED_EXPR):
  221. if arg.type.kind == TypeKind.CONSTANTARRAY:
  222. dec = arg.get_definition()
  223. if dec:
  224. size = parm_size(dec)
  225. # size == 0 is for 'float *a'
  226. if size != -1 and size != 0:
  227. # nice print!
  228. if 0:
  229. print("".join([t.spelling for t in func.get_tokens()]),
  230. i,
  231. " ".join([t.spelling for t in dec.get_tokens()]))
  232. # testing
  233. # size_def = 100
  234. if size != 1:
  235. if USE_EXACT_COMPARE:
  236. # is_err = (size != size_def) and (size != 4 and size_def != 3)
  237. is_err = (size != size_def)
  238. else:
  239. is_err = (size < size_def)
  240. if is_err:
  241. location = node.location
  242. # if "math_color_inline.c" not in str(location.file):
  243. if 1:
  244. print("%s:%d:%d: argument %d is size %d, should be %d (from %s)" %
  245. (location.file,
  246. location.line,
  247. location.column,
  248. i + 1, size, size_def,
  249. filepath # always the same but useful when running threaded
  250. ))
  251. # we dont really care what we are looking at, just scan entire file for
  252. # function calls.
  253. def recursive_func_call_check(node):
  254. if node.kind == CursorKind.CALL_EXPR:
  255. validate_arg_size(node)
  256. for c in node.get_children():
  257. recursive_func_call_check(c)
  258. recursive_func_call_check(tu.cursor)
  259. # -- first pass, cache function definitions sizes
  260. # PRINT FUNC DEFINES
  261. def recursive_arg_sizes(node, ):
  262. # print(node.kind, node.spelling)
  263. if node.kind == CursorKind.FUNCTION_DECL:
  264. if USE_LAZY_INIT:
  265. args_sizes = node
  266. else:
  267. args_sizes = function_get_arg_sizes(node)
  268. # if args_sizes:
  269. # print(node.spelling, args_sizes)
  270. _defs[node.spelling] = args_sizes
  271. # print("adding", node.spelling)
  272. for c in node.get_children():
  273. recursive_arg_sizes(c)
  274. # cache function sizes
  275. recursive_arg_sizes(tu.cursor)
  276. _defs.update(defs_precalc)
  277. # --- second pass, check against def's
  278. file_check_arg_sizes(tu)