objlibcg.peg 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # objdump language lexer+parser to use with packcc parser generator
  2. %prefix "objlibcg"
  3. %source{
  4. /*
  5. This is a objdump output parser created using packcc parser tool
  6. This is used to generate a callgraph of a binary file with only calls to library routines
  7. Compile a test program with symbols like this
  8. gcc -g -O0 test.c -o test
  9. Create a assembly listing using objdump like this
  10. objdump -d -j .text --demangle=auto --show-raw-insn test >out.txt
  11. To create a callgraph
  12. cat out.txt |./objcg
  13. To see all possible binary formats objdump supports use
  14. objdump --help
  15. This is used with x86_64 elf binary on Linux
  16. For other type of binary formats the objdump command must be different
  17. For other type of binary formats the peg file must be edited
  18. Then generate new parser using
  19. packcc objcg.peg
  20. The peg scans for funtion start which look like this
  21. 0000000000001b4a <sfg_init>:
  22. The peg scans for function call which look like this
  23. 1418: e8 2d 07 00 00 callq 1b4a <sfg_init>
  24. This is a indirect call
  25. 2eb3: ff d0 callq *%rax
  26. This is a call to glibc
  27. 1104: ff 15 d6 de 00 00 callq *0xded6(%rip) # efe0 <__libc_start_main@GLIBC_2.2.5>
  28. This is a call to a external library function
  29. 4140: e8 3b cf ff ff callq 1080 <calloc@plt>
  30. When the binary is stripped with strip -s only the calls to lib routines are available
  31. */
  32. #include <stdio.h>
  33. #include <string.h>
  34. /* set to 1 to get parse debug info */
  35. static int debug = 0;
  36. static const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" };
  37. #define PCC_DEBUG(event, rule, level, pos, buffer, length) \
  38. if (debug) fprintf(stdout, "%*s%s %s @%d [%.*s]\n", (int)(level * 2), "", dbg_str[event], rule, (int)pos, (int)length, buffer); fflush(stdout)
  39. /* NOTE: To guarantee the output order, stderr, which can lead a race condition with stdout, is not used. */
  40. static int linenr = 1;
  41. static char *curfname = "";
  42. static void edgeto (char *str)
  43. {
  44. char *p0=NULL;
  45. char *p1=NULL;
  46. char *p2 = NULL;
  47. char *p=NULL;
  48. if(str==NULL) { return; }
  49. if(strlen(str)==0) { return; }
  50. /* if this is allowed the calls to libs are included. here is a option. */
  51. if (strchr(str,'@')) {
  52. p0 = strchr(str,'<');
  53. if (p0==NULL) { return; }
  54. p1 = strstr (str,"callq");
  55. if (p1 == NULL) { return; }
  56. p2 = strstr (str, "@plt");
  57. if (p2) { /* allow these lib calls return; */ }
  58. printf (" \"%s\" -> \"",curfname);
  59. p = strchr (str,'<');
  60. p++;
  61. while (*p)
  62. {
  63. if (*p == '@') { break; }
  64. if (*p == '>') { break ; }
  65. if(*p =='+') { break; }
  66. fputc (*p, stdout);
  67. p++;
  68. }
  69. printf("\";\n");
  70. }
  71. return;
  72. }
  73. }
  74. # start of input
  75. file <- lines* !.
  76. lines <- line endofline { if (debug) { fprintf(stderr,"%s",$0); } linenr++; }
  77. line <- funcdef
  78. / (!endofline .)* { edgeto ($0); }
  79. # function entry looks like this
  80. # 0000000000001b4a <sfg_init>:
  81. funcdef <- fdaddr _ '<' fdname '>:'
  82. fdaddr <- (!'<' .)*
  83. fdname <- (!'>' .)* { curfname = strdup($0); printf (" /* at function %s() */\n",$0); }
  84. # call looks like this
  85. # 1418: e8 2d 07 00 00 callq 1b4a <sfg_init>
  86. # 2eb3: ff d0 callq *%rax
  87. # 1104: ff 15 d6 de 00 00 callq *0xded6(%rip) # efe0 <__libc_start_main@GLIBC_2.2.5>
  88. # 4140: e8 3b cf ff ff callq 1080 <calloc@plt>
  89. #calldef <- callopc 'callq' _ calladr endofline
  90. #callopc <- (!'callq' .)*
  91. #calladr <- (!'<' .)* '<' callname '>'
  92. # / (!endofline .)*
  93. #callname <- callname1 '@plt'
  94. # / callname1 { edgeto ($0); }
  95. #callname1 <- (!'>' !'@' .)*
  96. _ <- space*
  97. space <- (' ' / '\t')
  98. endofline <- ( '\r\n' / '\n' / '\r' / '\n\r' )
  99. %%
  100. int main() {
  101. objlibcg_context_t *ctx = objlibcg_create(NULL);
  102. printf ("/* generated callgraph from binary by objlibcg.peg */\ndigraph objlibcg {\n");
  103. while (objlibcg_parse(ctx, NULL)){;}
  104. objlibcg_destroy(ctx);
  105. printf (" /* %d lines parsed */\n}\n",linenr);
  106. return 0;
  107. }