objcg.peg 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # objdump language lexer+parser to use with packcc parser generator
  2. %prefix "objgv"
  3. %source{
  4. /*
  5. This is a objdump output parser created using packcc parser tool
  6. This is used to generate a callgraph of a binary file
  7. Compile a test program with symbols like this
  8. gcc -g -O0 test.c -o test
  9. Create a assembly listing using objdump like this
  10. objdump -d -j .text --demangle=auto --show-raw-insn test >out.txt
  11. To create a callgraph
  12. cat out.txt |./objcg
  13. To see all possible binary formats objdump supports use
  14. objdump --help
  15. This is used with x86_64 elf binary on Linux
  16. For other type of binary formats the objdump command must be different
  17. For other type of binary formats the peg file must be edited
  18. Then generate new parser using
  19. packcc objcg.peg
  20. The peg scans for funtion start which look like this
  21. 0000000000001b4a <sfg_init>:
  22. The peg scans for function call which look like this
  23. 1418: e8 2d 07 00 00 callq 1b4a <sfg_init>
  24. This is a indirect call
  25. 2eb3: ff d0 callq *%rax
  26. This is a call to glibc
  27. 1104: ff 15 d6 de 00 00 callq *0xded6(%rip) # efe0 <__libc_start_main@GLIBC_2.2.5>
  28. This is a call to a external library function
  29. 4140: e8 3b cf ff ff callq 1080 <calloc@plt>
  30. When the binary is stripped with strip -s only the calls to lib routines are available
  31. */
  32. #include <stdio.h>
  33. #include <string.h>
  34. /* set to 1 to get parse debug info */
  35. static int debug = 0;
  36. static const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" };
  37. #define PCC_DEBUG(event, rule, level, pos, buffer, length) \
  38. if (debug) fprintf(stdout, "%*s%s %s @%d [%.*s]\n", (int)(level * 2), "", dbg_str[event], rule, (int)pos, (int)length, buffer); fflush(stdout)
  39. /* NOTE: To guarantee the output order, stderr, which can lead a race condition with stdout, is not used. */
  40. static int linenr = 1;
  41. static char *curfname = "";
  42. static void edgeto (char *str)
  43. {
  44. char *p0=NULL;
  45. char *p1=NULL;
  46. char *p2 = NULL;
  47. char *p=NULL;
  48. if(str==NULL) { return; }
  49. if(strlen(str)==0) { return; }
  50. /* if this is allowed the calls to libs are included. here is a option. */
  51. if (strchr(str,'@')) { return; }
  52. p0 = strchr(str,'<');
  53. if (p0==NULL) { return; }
  54. p1 = strstr (str,"callq");
  55. if (p1 == NULL) { return; }
  56. p2 = strstr (str, "@plt");
  57. if (p2) { return; }
  58. printf (" \"%s\" -> \"",curfname);
  59. p = strchr (str,'<');
  60. p++;
  61. while (*p)
  62. {
  63. if (*p == '>') { break ; }
  64. if(*p =='+') { break; }
  65. fputc (*p, stdout);
  66. p++;
  67. }
  68. printf("\";\n");
  69. return;
  70. }
  71. }
  72. # start of input
  73. file <- lines* !.
  74. lines <- line endofline
  75. line <- funcdef
  76. / (!endofline .)* { edgeto ($0); }
  77. # function entry looks like this
  78. # 0000000000001b4a <sfg_init>:
  79. funcdef <- fdaddr _ '<' fdname '>:'
  80. fdaddr <- (!'<' .)*
  81. fdname <- (!'>' .)* { curfname = strdup($0); printf (" /* at function %s() */\n",$0); }
  82. # call looks like this
  83. # 1418: e8 2d 07 00 00 callq 1b4a <sfg_init>
  84. # 2eb3: ff d0 callq *%rax
  85. # 1104: ff 15 d6 de 00 00 callq *0xded6(%rip) # efe0 <__libc_start_main@GLIBC_2.2.5>
  86. # 4140: e8 3b cf ff ff callq 1080 <calloc@plt>
  87. #calldef <- callopc 'callq' _ calladr endofline
  88. #callopc <- (!'callq' .)*
  89. #calladr <- (!'<' .)* '<' callname '>'
  90. # / (!endofline .)*
  91. #callname <- callname1 '@plt'
  92. # / callname1 { edgeto ($0); }
  93. #callname1 <- (!'>' !'@' .)*
  94. _ <- space*
  95. space <- (' ' / '\t')
  96. endofline <- ( '\r\n' / '\n' / '\r' / '\n\r' ) { linenr++; }
  97. %%
  98. int main() {
  99. objgv_context_t *ctx = objgv_create(NULL);
  100. printf ("/* generated callgraph from binary by objcg.peg */\ndigraph objcg {\n");
  101. while (objgv_parse(ctx, NULL)){;}
  102. objgv_destroy(ctx);
  103. printf (" /* %d lines parsed */\n}\n",linenr);
  104. return 0;
  105. }