gen-callgraph.sh 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. #!/bin/bash
  2. # gen-callgraph
  3. # -- A script to generate call graph from elf binary
  4. # Copyright (C) 2011 onlyuser <mailto:onlyuser@gmail.com>
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. CMD=`basename $0`
  19. show_help()
  20. {
  21. echo "Usage: $CMD <BINARY> [DEBUG={0*/1}] | dot -Tpng -ocallgraph.png"
  22. }
  23. if [ $# -ne 1 -a $# -ne 2 ]; then
  24. echo "Fail! -- Expecting 1 or 2 arguments! ==> $@"
  25. show_help
  26. exit 1
  27. fi
  28. if [ -z "`which readelf`" ]; then
  29. echo "Error: Requires \"readelf\""
  30. exit 1
  31. fi
  32. if [ -z "`which objdump`" ]; then
  33. echo "Error: Requires \"objdump\""
  34. exit 1
  35. fi
  36. if [ -z "`which c++filt`" ]; then
  37. echo "Error: Requires \"c++filt\""
  38. exit 1
  39. fi
  40. if [ -z "`which dot`" ]; then
  41. echo "Error: Requires \"dot\""
  42. exit 1
  43. fi
  44. EXEC=$1
  45. DEBUG=$2
  46. if [ ! -f "$EXEC" ]; then
  47. echo "Error: $EXEC doesn't exist!"
  48. exit 1
  49. fi
  50. if [ -z "$DEBUG" ]; then
  51. DEBUG=0
  52. fi
  53. trap "unset FUNC_PAIR_ARRAY ASM_CMD_HASHMAP UNIQ_FUNC" EXIT
  54. #readelf $EXEC --all
  55. GEN_SYM_FILE_CMD="readelf $EXEC --headers --symbols"
  56. #http://stackoverflow.com/questions/1737095/how-do-i-disassemble-raw-x86-code
  57. #http://stackoverflow.com/questions/19071461/disassemble-raw-x64-machine-code
  58. #objdump -D -b binary -mi386 -Maddr16,data16 $EXEC
  59. GEN_ASM_FILE_CMD="objdump -D -b binary -mi386:x86-64 $EXEC"
  60. if [ "$DEBUG" == 1 ]; then
  61. echo "readelf command: $GEN_SYM_FILE_CMD" 1>&2
  62. echo "objdump command: $GEN_ASM_FILE_CMD" 1>&2
  63. echo "" 1>&2
  64. fi
  65. SYM_FILE_CONTENTS="`$GEN_SYM_FILE_CMD`"
  66. ASM_FILE_CONTENTS="`$GEN_ASM_FILE_CMD`"
  67. if [ "$DEBUG" == 1 ]; then
  68. DEBUG_SYM_FILE="`mktemp`"
  69. DEBUG_ASM_FILE="`mktemp`"
  70. #trap "rm $DEBUG_SYM_FILE $DEBUG_ASM_FILE" EXIT
  71. echo "$SYM_FILE_CONTENTS" > $DEBUG_SYM_FILE
  72. echo "$ASM_FILE_CONTENTS" > $DEBUG_ASM_FILE
  73. echo "Cached readelf output: $DEBUG_SYM_FILE" 1>&2
  74. echo "Cached objdump output: $DEBUG_ASM_FILE" 1>&2
  75. echo "" 1>&2
  76. fi
  77. ENTRY_POINT_LINE="`echo \"$SYM_FILE_CONTENTS\" | grep \"Entry point address:\"`"
  78. ENTRY_POINT_ADDR="`echo \"$ENTRY_POINT_LINE\" | cut -d':' -f2 | tr -d ' ' | sed 's/^0x4[0]*//g'`"
  79. declare -a FUNC_PAIR_ARRAY
  80. echo "Analyzing symbol table.. (Step 1 of 4)" 1>&2
  81. FOUND_SYMTAB=0
  82. n="`echo \"$SYM_FILE_CONTENTS\" | wc -l`"
  83. i=0
  84. FUNC_COUNT=0
  85. while read SYM_FILE_LINE; do
  86. PROGRESS=$(( $i * 100 / $n ))
  87. printf "\r$PROGRESS%%" 1>&2
  88. if [ "$FOUND_SYMTAB" == 0 ]; then
  89. if [[ "$SYM_FILE_LINE" =~ "Symbol table '.symtab'" ]]; then
  90. FOUND_SYMTAB=1
  91. else
  92. continue
  93. fi
  94. fi
  95. SYM_TUPLE="`echo \"$SYM_FILE_LINE\" | sed 's/[ ]\+/ /g'`"
  96. if [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f4`" == "FUNC" ] &&
  97. [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f5`" != "LOCAL" ] &&
  98. [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f7`" != "UND" ];
  99. then
  100. FUNC_PAIR="`echo \"$SYM_TUPLE\" | cut -d' ' -f2,8 | sed 's/^00000000004[0]*//g'`"
  101. FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
  102. FUNC_ADDR="`printf \"%08x\" 0x$FUNC_ADDR`"
  103. FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
  104. FUNC_PAIR_ARRAY[$FUNC_COUNT]="$FUNC_ADDR $FUNC_NAME"
  105. FUNC_COUNT=$(( $FUNC_COUNT + 1 ))
  106. fi
  107. i=$(( $i + 1 ))
  108. done <<< "$SYM_FILE_CONTENTS"
  109. echo -e "\r100%" 1>&2
  110. if [ "$FOUND_SYMTAB" == 0 ]; then
  111. echo "Error: Can't find symtab section in \"$EXEC\"."
  112. exit
  113. fi
  114. IFS=$'\n'; SORTED_FUNC_PAIR_ARRAY=($(sort <<< "${FUNC_PAIR_ARRAY[*]}")); unset IFS
  115. SORTED_FUNC_PAIR_LIST="`printf \"%s\n\" \"${SORTED_FUNC_PAIR_ARRAY[@]}\"`"
  116. if [ "$DEBUG" == 1 ]; then
  117. DEBUG_FUNC_PAIR_FILE="`mktemp`"
  118. echo "$SORTED_FUNC_PAIR_LIST" > $DEBUG_FUNC_PAIR_FILE
  119. echo "Generated function address pairs: $DEBUG_FUNC_PAIR_FILE" 1>&2
  120. echo "" 1>&2
  121. fi
  122. declare -A ASM_CMD_HASHMAP
  123. TAB=`printf '\t'`
  124. echo "Analyzing disassembly.. (Step 2 of 4)" 1>&2
  125. n="`echo \"$ASM_FILE_CONTENTS\" | wc -l`"
  126. i=0
  127. while read ASM_FILE_LINE; do
  128. PROGRESS=$(( $i * 100 / $n ))
  129. printf "\r$PROGRESS%%" 1>&2
  130. REGEX="^[ ]*([a-h0-9]*):$TAB(.*)$TAB(.*)"
  131. if ! [[ "$ASM_FILE_LINE" =~ $REGEX ]]; then
  132. i=$(( $i + 1 ))
  133. continue
  134. fi
  135. ASM_FILE_LINE_ADDR="${BASH_REMATCH[1]}"
  136. ASM_FILE_LINE_CMD="${BASH_REMATCH[3]}"
  137. ASM_CMD_HASHMAP[$ASM_FILE_LINE_ADDR]="$i:$ASM_FILE_LINE_CMD"
  138. i=$(( $i + 1 ))
  139. done <<< "$ASM_FILE_CONTENTS"
  140. echo -e "\r100%" 1>&2
  141. echo "digraph `basename $EXEC | sed 's/\./_/g'` {"
  142. echo "rankdir=LR;"
  143. echo "node [shape=ellipse];"
  144. declare -A UNIQ_FUNC
  145. echo "Generating nodes.. (Step 3 of 4)" 1>&2
  146. for i in `seq 0 $(( $FUNC_COUNT - 1 ))`; do
  147. PROGRESS=$(( $i * 100 / $FUNC_COUNT ))
  148. printf "\r$PROGRESS%%" 1>&2
  149. FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$i]}
  150. # comment this continue statement to include unconnected single nodes
  151. continue
  152. FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
  153. FUNC_ADDR="`printf \"%x\" 0x$FUNC_ADDR`"
  154. if [ -n "${UNIQ_FUNC[$FUNC_ADDR]}" ]; then
  155. continue
  156. fi
  157. UNIQ_FUNC[$FUNC_ADDR]="1"
  158. FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
  159. FUNC_NAME_DEMANGLED="`echo $FUNC_NAME | c++filt`"
  160. if [ "$FUNC_ADDR" == "$ENTRY_POINT_ADDR" ]; then
  161. SHAPE_SPEC_STR=", shape=\"box\""
  162. else
  163. SHAPE_SPEC_STR=""
  164. fi
  165. echo "$FUNC_NAME [label=\"0x$FUNC_ADDR: $FUNC_NAME_DEMANGLED\"$SHAPE_SPEC_STR];"
  166. done
  167. echo -e "\r100%" 1>&2
  168. echo "Generating edges.. (Step 4 of 4)" 1>&2
  169. for i in `seq 0 $(( $FUNC_COUNT - 1 ))`; do
  170. PROGRESS=$(( $i * 100 / $FUNC_COUNT ))
  171. printf "\r$PROGRESS%%" 1>&2
  172. FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$i]}
  173. FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
  174. FUNC_ADDR="`printf \"%x\" 0x$FUNC_ADDR`"
  175. FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
  176. FUNC_ASM_LINE_NO="`echo ${ASM_CMD_HASHMAP[$FUNC_ADDR]} | cut -d':' -f1`"
  177. if [ -z "$FUNC_ASM_LINE_NO" ]; then
  178. i=$(( $i + 1 ))
  179. continue
  180. fi
  181. NEXT_FUNC_INDEX=$(( $i + 1 ))
  182. NEXT_FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$NEXT_FUNC_INDEX]}
  183. if [ -z "$NEXT_FUNC_PAIR" ]; then
  184. i=$(( $i + 1 ))
  185. continue
  186. fi
  187. NEXT_FUNC_ADDR="`echo \"$NEXT_FUNC_PAIR\" | cut -d' ' -f1`"
  188. NEXT_FUNC_ADDR="`printf \"%x\" 0x$NEXT_FUNC_ADDR`"
  189. NEXT_FUNC_NAME="`echo \"$NEXT_FUNC_PAIR\" | cut -d' ' -f2`"
  190. NEXT_FUNC_ASM_LINE_NO="`echo ${ASM_CMD_HASHMAP[$NEXT_FUNC_ADDR]} | cut -d':' -f1`"
  191. if [ -z "$NEXT_FUNC_ASM_LINE_NO" ]; then
  192. i=$(( $i + 1 ))
  193. continue
  194. fi
  195. FUNC_ASM_LAST_LINE_NO=$(( $NEXT_FUNC_ASM_LINE_NO - 1 ))
  196. FUNC_ASM_BODY_LEN=$(( $NEXT_FUNC_ASM_LINE_NO - $FUNC_ASM_LINE_NO ))
  197. FUNC_ASM_BODY="`echo \"$ASM_FILE_CONTENTS\" | head -$FUNC_ASM_LAST_LINE_NO | tail -$FUNC_ASM_BODY_LEN`"
  198. CALLEE_ASM_LINES_LIST="`echo \"$FUNC_ASM_BODY\" | egrep '\<callq\>|\<jmpq\>|\<jmp\>|\<je\>|\<jne\>|\<jg\>|\<jge\>|\<jl\>|\<jle\>'`"
  199. if [ -z "$CALLEE_ASM_LINES_LIST" ]; then
  200. i=$(( $i + 1 ))
  201. continue
  202. fi
  203. while read -r CALLEE_ASM_LINE; do
  204. CALLEE_ADDR_PART="`echo \"$CALLEE_ASM_LINE\" | cut -d$'\t' -f1`"
  205. CALL_ADDR="`echo \"$CALLEE_ADDR_PART\" | cut -d':' -f1`"
  206. CALLEE_CMD="`echo \"$CALLEE_ASM_LINE\" | cut -d$'\t' -f3`"
  207. CALLEE_ADDR="`echo \"$CALLEE_CMD\" | sed 's/callq[ ]\+0x\([^ ]\+\)/\1/g' | sed 's/j[^ ]\+[ ]\+0x\([^ ]\+\)/\1/g'`"
  208. CALLEE_NAME="`echo \"$SORTED_FUNC_PAIR_LIST\" | grep \"^[0]*$CALLEE_ADDR\" | head -1 | cut -d' ' -f2`"
  209. if [ -z "$CALLEE_NAME" ]; then
  210. continue
  211. fi
  212. echo "$FUNC_NAME -> $CALLEE_NAME [label=\"0x$CALL_ADDR\"]"
  213. done <<< "$CALLEE_ASM_LINES_LIST"
  214. done
  215. echo -e "\r100%" 1>&2
  216. echo "}"
  217. echo "Done!" 1>&2