123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264 |
- #!/bin/bash
- # gen-callgraph
- # -- A script to generate call graph from elf binary
- # Copyright (C) 2011 onlyuser <mailto:onlyuser@gmail.com>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- CMD=`basename $0`
- show_help()
- {
- echo "Usage: $CMD <BINARY> [DEBUG={0*/1}] | dot -Tpng -ocallgraph.png"
- }
- if [ $# -ne 1 -a $# -ne 2 ]; then
- echo "Fail! -- Expecting 1 or 2 arguments! ==> $@"
- show_help
- exit 1
- fi
- if [ -z "`which readelf`" ]; then
- echo "Error: Requires \"readelf\""
- exit 1
- fi
- if [ -z "`which objdump`" ]; then
- echo "Error: Requires \"objdump\""
- exit 1
- fi
- if [ -z "`which c++filt`" ]; then
- echo "Error: Requires \"c++filt\""
- exit 1
- fi
- if [ -z "`which dot`" ]; then
- echo "Error: Requires \"dot\""
- exit 1
- fi
- EXEC=$1
- DEBUG=$2
- if [ ! -f "$EXEC" ]; then
- echo "Error: $EXEC doesn't exist!"
- exit 1
- fi
- if [ -z "$DEBUG" ]; then
- DEBUG=0
- fi
- trap "unset FUNC_PAIR_ARRAY ASM_CMD_HASHMAP UNIQ_FUNC" EXIT
- #readelf $EXEC --all
- GEN_SYM_FILE_CMD="readelf $EXEC --headers --symbols"
- #http://stackoverflow.com/questions/1737095/how-do-i-disassemble-raw-x86-code
- #http://stackoverflow.com/questions/19071461/disassemble-raw-x64-machine-code
- #objdump -D -b binary -mi386 -Maddr16,data16 $EXEC
- GEN_ASM_FILE_CMD="objdump -D -b binary -mi386:x86-64 $EXEC"
- if [ "$DEBUG" == 1 ]; then
- echo "readelf command: $GEN_SYM_FILE_CMD" 1>&2
- echo "objdump command: $GEN_ASM_FILE_CMD" 1>&2
- echo "" 1>&2
- fi
- SYM_FILE_CONTENTS="`$GEN_SYM_FILE_CMD`"
- ASM_FILE_CONTENTS="`$GEN_ASM_FILE_CMD`"
- if [ "$DEBUG" == 1 ]; then
- DEBUG_SYM_FILE="`mktemp`"
- DEBUG_ASM_FILE="`mktemp`"
- #trap "rm $DEBUG_SYM_FILE $DEBUG_ASM_FILE" EXIT
- echo "$SYM_FILE_CONTENTS" > $DEBUG_SYM_FILE
- echo "$ASM_FILE_CONTENTS" > $DEBUG_ASM_FILE
- echo "Cached readelf output: $DEBUG_SYM_FILE" 1>&2
- echo "Cached objdump output: $DEBUG_ASM_FILE" 1>&2
- echo "" 1>&2
- fi
- ENTRY_POINT_LINE="`echo \"$SYM_FILE_CONTENTS\" | grep \"Entry point address:\"`"
- ENTRY_POINT_ADDR="`echo \"$ENTRY_POINT_LINE\" | cut -d':' -f2 | tr -d ' ' | sed 's/^0x4[0]*//g'`"
- declare -a FUNC_PAIR_ARRAY
- echo "Analyzing symbol table.. (Step 1 of 4)" 1>&2
- FOUND_SYMTAB=0
- n="`echo \"$SYM_FILE_CONTENTS\" | wc -l`"
- i=0
- FUNC_COUNT=0
- while read SYM_FILE_LINE; do
- PROGRESS=$(( $i * 100 / $n ))
- printf "\r$PROGRESS%%" 1>&2
- if [ "$FOUND_SYMTAB" == 0 ]; then
- if [[ "$SYM_FILE_LINE" =~ "Symbol table '.symtab'" ]]; then
- FOUND_SYMTAB=1
- else
- continue
- fi
- fi
- SYM_TUPLE="`echo \"$SYM_FILE_LINE\" | sed 's/[ ]\+/ /g'`"
- if [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f4`" == "FUNC" ] &&
- [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f5`" != "LOCAL" ] &&
- [ "`echo \"$SYM_TUPLE\" | cut -d' ' -f7`" != "UND" ];
- then
- FUNC_PAIR="`echo \"$SYM_TUPLE\" | cut -d' ' -f2,8 | sed 's/^00000000004[0]*//g'`"
- FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
- FUNC_ADDR="`printf \"%08x\" 0x$FUNC_ADDR`"
- FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
- FUNC_PAIR_ARRAY[$FUNC_COUNT]="$FUNC_ADDR $FUNC_NAME"
- FUNC_COUNT=$(( $FUNC_COUNT + 1 ))
- fi
- i=$(( $i + 1 ))
- done <<< "$SYM_FILE_CONTENTS"
- echo -e "\r100%" 1>&2
- if [ "$FOUND_SYMTAB" == 0 ]; then
- echo "Error: Can't find symtab section in \"$EXEC\"."
- exit
- fi
- IFS=$'\n'; SORTED_FUNC_PAIR_ARRAY=($(sort <<< "${FUNC_PAIR_ARRAY[*]}")); unset IFS
- SORTED_FUNC_PAIR_LIST="`printf \"%s\n\" \"${SORTED_FUNC_PAIR_ARRAY[@]}\"`"
- if [ "$DEBUG" == 1 ]; then
- DEBUG_FUNC_PAIR_FILE="`mktemp`"
- echo "$SORTED_FUNC_PAIR_LIST" > $DEBUG_FUNC_PAIR_FILE
- echo "Generated function address pairs: $DEBUG_FUNC_PAIR_FILE" 1>&2
- echo "" 1>&2
- fi
- declare -A ASM_CMD_HASHMAP
- TAB=`printf '\t'`
- echo "Analyzing disassembly.. (Step 2 of 4)" 1>&2
- n="`echo \"$ASM_FILE_CONTENTS\" | wc -l`"
- i=0
- while read ASM_FILE_LINE; do
- PROGRESS=$(( $i * 100 / $n ))
- printf "\r$PROGRESS%%" 1>&2
- REGEX="^[ ]*([a-h0-9]*):$TAB(.*)$TAB(.*)"
- if ! [[ "$ASM_FILE_LINE" =~ $REGEX ]]; then
- i=$(( $i + 1 ))
- continue
- fi
- ASM_FILE_LINE_ADDR="${BASH_REMATCH[1]}"
- ASM_FILE_LINE_CMD="${BASH_REMATCH[3]}"
- ASM_CMD_HASHMAP[$ASM_FILE_LINE_ADDR]="$i:$ASM_FILE_LINE_CMD"
- i=$(( $i + 1 ))
- done <<< "$ASM_FILE_CONTENTS"
- echo -e "\r100%" 1>&2
- echo "digraph `basename $EXEC | sed 's/\./_/g'` {"
- echo "rankdir=LR;"
- echo "node [shape=ellipse];"
- declare -A UNIQ_FUNC
- echo "Generating nodes.. (Step 3 of 4)" 1>&2
- for i in `seq 0 $(( $FUNC_COUNT - 1 ))`; do
- PROGRESS=$(( $i * 100 / $FUNC_COUNT ))
- printf "\r$PROGRESS%%" 1>&2
- FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$i]}
- # comment this continue statement to include unconnected single nodes
- continue
- FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
- FUNC_ADDR="`printf \"%x\" 0x$FUNC_ADDR`"
- if [ -n "${UNIQ_FUNC[$FUNC_ADDR]}" ]; then
- continue
- fi
- UNIQ_FUNC[$FUNC_ADDR]="1"
- FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
- FUNC_NAME_DEMANGLED="`echo $FUNC_NAME | c++filt`"
- if [ "$FUNC_ADDR" == "$ENTRY_POINT_ADDR" ]; then
- SHAPE_SPEC_STR=", shape=\"box\""
- else
- SHAPE_SPEC_STR=""
- fi
- echo "$FUNC_NAME [label=\"0x$FUNC_ADDR: $FUNC_NAME_DEMANGLED\"$SHAPE_SPEC_STR];"
- done
- echo -e "\r100%" 1>&2
- echo "Generating edges.. (Step 4 of 4)" 1>&2
- for i in `seq 0 $(( $FUNC_COUNT - 1 ))`; do
- PROGRESS=$(( $i * 100 / $FUNC_COUNT ))
- printf "\r$PROGRESS%%" 1>&2
- FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$i]}
- FUNC_ADDR="`echo \"$FUNC_PAIR\" | cut -d' ' -f1`"
- FUNC_ADDR="`printf \"%x\" 0x$FUNC_ADDR`"
- FUNC_NAME="`echo \"$FUNC_PAIR\" | cut -d' ' -f2`"
- FUNC_ASM_LINE_NO="`echo ${ASM_CMD_HASHMAP[$FUNC_ADDR]} | cut -d':' -f1`"
- if [ -z "$FUNC_ASM_LINE_NO" ]; then
- i=$(( $i + 1 ))
- continue
- fi
- NEXT_FUNC_INDEX=$(( $i + 1 ))
- NEXT_FUNC_PAIR=${SORTED_FUNC_PAIR_ARRAY[$NEXT_FUNC_INDEX]}
- if [ -z "$NEXT_FUNC_PAIR" ]; then
- i=$(( $i + 1 ))
- continue
- fi
- NEXT_FUNC_ADDR="`echo \"$NEXT_FUNC_PAIR\" | cut -d' ' -f1`"
- NEXT_FUNC_ADDR="`printf \"%x\" 0x$NEXT_FUNC_ADDR`"
- NEXT_FUNC_NAME="`echo \"$NEXT_FUNC_PAIR\" | cut -d' ' -f2`"
- NEXT_FUNC_ASM_LINE_NO="`echo ${ASM_CMD_HASHMAP[$NEXT_FUNC_ADDR]} | cut -d':' -f1`"
- if [ -z "$NEXT_FUNC_ASM_LINE_NO" ]; then
- i=$(( $i + 1 ))
- continue
- fi
- FUNC_ASM_LAST_LINE_NO=$(( $NEXT_FUNC_ASM_LINE_NO - 1 ))
- FUNC_ASM_BODY_LEN=$(( $NEXT_FUNC_ASM_LINE_NO - $FUNC_ASM_LINE_NO ))
- FUNC_ASM_BODY="`echo \"$ASM_FILE_CONTENTS\" | head -$FUNC_ASM_LAST_LINE_NO | tail -$FUNC_ASM_BODY_LEN`"
- CALLEE_ASM_LINES_LIST="`echo \"$FUNC_ASM_BODY\" | egrep '\<callq\>|\<jmpq\>|\<jmp\>|\<je\>|\<jne\>|\<jg\>|\<jge\>|\<jl\>|\<jle\>'`"
- if [ -z "$CALLEE_ASM_LINES_LIST" ]; then
- i=$(( $i + 1 ))
- continue
- fi
- while read -r CALLEE_ASM_LINE; do
- CALLEE_ADDR_PART="`echo \"$CALLEE_ASM_LINE\" | cut -d$'\t' -f1`"
- CALL_ADDR="`echo \"$CALLEE_ADDR_PART\" | cut -d':' -f1`"
- CALLEE_CMD="`echo \"$CALLEE_ASM_LINE\" | cut -d$'\t' -f3`"
- CALLEE_ADDR="`echo \"$CALLEE_CMD\" | sed 's/callq[ ]\+0x\([^ ]\+\)/\1/g' | sed 's/j[^ ]\+[ ]\+0x\([^ ]\+\)/\1/g'`"
- CALLEE_NAME="`echo \"$SORTED_FUNC_PAIR_LIST\" | grep \"^[0]*$CALLEE_ADDR\" | head -1 | cut -d' ' -f2`"
- if [ -z "$CALLEE_NAME" ]; then
- continue
- fi
- echo "$FUNC_NAME -> $CALLEE_NAME [label=\"0x$CALL_ADDR\"]"
- done <<< "$CALLEE_ASM_LINES_LIST"
- done
- echo -e "\r100%" 1>&2
- echo "}"
- echo "Done!" 1>&2
|