OverCG.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. import re
  2. import argparse
  3. import linecache
  4. import subprocess
  5. def generateCallGraph(programIR=None, withExternalNode=False):
  6. '''
  7. This function is used to generate the call graph of a c program from it's
  8. LLVM IR. The basic idea of this function is the pointer analysis of the identifiers.
  9. If an identifier in a function A points to a function B (directly or indirectly), I define
  10. that this function A calls function B.
  11. programIR: the readabel LLVM IR, that is, the file ended with '.ll'.
  12. withExternalNode: if this is true, an external node will be added to the call graph,
  13. such that there is an edge from this external node to the node in
  14. the all graph.
  15. return: the call graph as a string using dot formate, so the user can use networkx
  16. or pygraphviz to analysis it.
  17. '''
  18. lines = linecache.getlines(programIR)
  19. #first, get the identifiers of functions
  20. #the functions defined by user are started with "define", while the functions
  21. #of other libraries are started with "declare"
  22. funcId = [] #the identifiers of functions
  23. globalIdRef = {} #the relationship among Ids
  24. IdInFunc = {} #identifiers in function
  25. #in LLVM IR, the identifer of a function is treated as a global identifer
  26. #that can be recognised by the following regular expression
  27. globalIdPattern = re.compile(r'[@][-a-zA-Z$._][-a-zA-Z$._0-9]*')
  28. #get all identifiers and the identifiers in the function
  29. for lcount in range(len(lines)):
  30. line = lines[lcount]
  31. if line.startswith("define"):
  32. fId = globalIdPattern.search(line)
  33. fIdStr = fId.group()
  34. if not fIdStr.startswith("@llvm"):
  35. funcId.append(fIdStr)
  36. funcBody = ""
  37. while not line.startswith("}"):
  38. funcBody += line
  39. lcount += 1
  40. line = lines[lcount]
  41. funcBody += line
  42. allIds = globalIdPattern.findall(funcBody)
  43. Ids = []
  44. for item in allIds:
  45. if not item.startswith("@llvm"):
  46. Ids.append(item)
  47. Ids.remove(fIdStr)
  48. IdInFunc[fIdStr] = Ids
  49. elif line.startswith("declare"):
  50. fId = globalIdPattern.search(line)
  51. fIdStr = fId.group()
  52. if not fIdStr.startswith("@llvm"):
  53. funcId.append(fIdStr)
  54. elif line.startswith("@"):
  55. line = line.split(" = ")
  56. lid = globalIdPattern.search(line[0]).group()
  57. rid = globalIdPattern.findall(line[1])
  58. globalIdRef[lid] = rid
  59. else:
  60. pass
  61. linecache.clearcache()
  62. #---------------------------------------------------------
  63. #analyze the reference relationship among identifiers to know
  64. #the call relationship
  65. funcCall = {}
  66. for func in IdInFunc.keys():
  67. call = []
  68. for Id in IdInFunc[func]:
  69. if Id in funcId:
  70. call.append(Id)
  71. elif Id in globalIdRef.keys():
  72. for _Id in globalIdRef[Id]:
  73. if _Id in funcId:
  74. call.append(_Id)
  75. funcCall[func] = call
  76. #------------------------------------------------------------
  77. #generate the dot graph
  78. dotgraph = "digraph \"Call Graph\"{\n"
  79. dotgraph += "label=\"Call Graph\";\n"
  80. callSet = []
  81. for func in funcCall.keys():
  82. if func not in callSet:
  83. callSet.append(func)
  84. for call in funcCall[func]:
  85. if call not in callSet:
  86. callSet.append(call)
  87. for func in set(callSet):
  88. dotgraph += "Node" + func.replace("@","").replace(".","") + " [shape=record, label=\"{" + func.lstrip("@") + "}\"];\n"
  89. if withExternalNode:
  90. dotgraph += "Node" + "External" + " [shape=record, label=\"{" + "external node" + "}\"];\n"
  91. for func in set(callSet):
  92. dotgraph += "Node" + "External" + " -> " + "Node" + func.replace("@","").replace(".","") +";\n"
  93. for func in funcCall.keys():
  94. if func in callSet:
  95. for call in funcCall[func]:
  96. dotgraph += "Node" + func.replace("@","").replace(".","") + " -> " + "Node" + call.replace("@","").replace(".","") +";\n"
  97. dotgraph += "}\n"
  98. return dotgraph
  99. if __name__ == '__main__':
  100. paraser = argparse.ArgumentParser()
  101. paraser.add_argument("-i", '--input', help='the input file')
  102. paraser.add_argument("-o", '--output', default="callgraph.dot",help='the output file')
  103. paraser.add_argument("-e", '--externalNode', action='store_true', default=False, help='make call graph with external node')
  104. paraser.add_argument("-s",'--show', action='store_true', default=False, help='show the call graph using xdot')
  105. args = paraser.parse_args()
  106. dotgraph = ""
  107. if not args.input:
  108. paraser.print_help()
  109. else:
  110. dotgraph = generateCallGraph(programIR=args.input, withExternalNode=args.externalNode)
  111. with open(args.output, "w") as fp:
  112. fp.write(dotgraph)
  113. if args.show:
  114. cmd = "xdot " + args.output
  115. subprocess.run(cmd, shell=True, stderr=subprocess.STDOUT)