Disassembler.pm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. # Disassembler.pm
  2. #
  3. # Copyright (c) 1996 Malcolm Beattie
  4. # Copyright (c) 2008,2009,2010,2011,2012 Reini Urban
  5. #
  6. # You may distribute under the terms of either the GNU General Public
  7. # License or the Artistic License, as specified in the README file.
  8. $B::Disassembler::VERSION = '1.13';
  9. package B::Disassembler::BytecodeStream;
  10. use FileHandle;
  11. use Carp;
  12. use Config qw(%Config);
  13. use B qw(cstring cast_I32);
  14. @ISA = qw(FileHandle);
  15. sub readn {
  16. my ( $fh, $len ) = @_;
  17. my $data;
  18. read( $fh, $data, $len );
  19. croak "reached EOF while reading $len bytes" unless length($data) == $len;
  20. return $data;
  21. }
  22. sub GET_U8 {
  23. my $fh = shift;
  24. my $c = $fh->getc;
  25. croak "reached EOF while reading U8" unless defined($c);
  26. return ord($c);
  27. }
  28. sub GET_U16 {
  29. my $fh = shift;
  30. my $str = $fh->readn(2);
  31. croak "reached EOF while reading U16" unless length($str) == 2;
  32. # Todo: check byteorder
  33. return unpack( "S", $str );
  34. }
  35. sub GET_NV {
  36. my $fh = shift;
  37. my ( $str, $c );
  38. while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
  39. $str .= $c;
  40. }
  41. croak "reached EOF while reading double" unless defined($c);
  42. return $str;
  43. }
  44. sub GET_U32 {
  45. my $fh = shift;
  46. my $str = $fh->readn(4);
  47. croak "reached EOF while reading U32" unless length($str) == 4;
  48. # Todo: check byteorder
  49. return unpack( "L", $str );
  50. }
  51. sub GET_I32 {
  52. my $fh = shift;
  53. my $str = $fh->readn(4);
  54. croak "reached EOF while reading I32" unless length($str) == 4;
  55. # Todo: check byteorder
  56. return unpack( "l", $str );
  57. }
  58. sub GET_objindex {
  59. my $fh = shift;
  60. my $str = $fh->readn(4);
  61. croak "reached EOF while reading objindex" unless length($str) == 4;
  62. # Todo: check byteorder
  63. return unpack( "L", $str );
  64. }
  65. sub GET_opindex {
  66. my $fh = shift;
  67. my $str = $fh->readn(4);
  68. croak "reached EOF while reading opindex" unless length($str) == 4;
  69. # Todo: check byteorder
  70. return unpack( "L", $str );
  71. }
  72. sub GET_svindex {
  73. my $fh = shift;
  74. my $str = $fh->readn(4);
  75. croak "reached EOF while reading svindex" unless length($str) == 4;
  76. # Todo: check byteorder
  77. return unpack( "L", $str );
  78. }
  79. sub GET_pvindex {
  80. my $fh = shift;
  81. my $str = $fh->readn(4);
  82. croak "reached EOF while reading pvindex" unless length($str) == 4;
  83. # Todo: check byteorder
  84. return unpack( "L", $str );
  85. }
  86. sub GET_hekindex {
  87. my $fh = shift;
  88. my $str = $fh->readn(4);
  89. croak "reached EOF while reading hekindex" unless length($str) == 4;
  90. # Todo: check byteorder
  91. return unpack( "L", $str );
  92. }
  93. sub GET_strconst {
  94. my $fh = shift;
  95. my ( $str, $c );
  96. $str = '';
  97. while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
  98. $str .= $c;
  99. }
  100. croak "reached EOF while reading strconst" unless defined($c);
  101. return cstring($str);
  102. }
  103. sub GET_pvcontents { }
  104. sub GET_PV {
  105. my $fh = shift;
  106. my $str;
  107. my $len = $fh->GET_U32;
  108. if ($len) {
  109. read( $fh, $str, $len );
  110. croak "reached EOF while reading PV" unless length($str) == $len;
  111. return cstring($str);
  112. }
  113. else {
  114. return '""';
  115. }
  116. }
  117. sub GET_comment_t {
  118. my $fh = shift;
  119. my ( $str, $c );
  120. while ( defined( $c = $fh->getc ) && $c ne "\n" ) {
  121. $str .= $c;
  122. }
  123. croak "reached EOF while reading comment" unless defined($c);
  124. return cstring($str);
  125. }
  126. sub GET_double {
  127. my $fh = shift;
  128. my ( $str, $c );
  129. while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
  130. $str .= $c;
  131. }
  132. croak "reached EOF while reading double" unless defined($c);
  133. return $str;
  134. }
  135. sub GET_none { }
  136. sub GET_op_tr_array {
  137. my $fh = shift;
  138. my $len = unpack "S", $fh->readn(2);
  139. my @ary = unpack "S*", $fh->readn( $len * 2 );
  140. return join( ",", $len, @ary );
  141. }
  142. sub GET_IV64 {
  143. my $fh = shift;
  144. my $str = $fh->readn(8);
  145. croak "reached EOF while reading I32" unless length($str) == 8;
  146. # Todo: check byteorder
  147. my $i = unpack( "q", $str );
  148. return $i > 8 ? sprintf "0x%09llx", $i : $i;
  149. }
  150. sub GET_IV {
  151. # Check the header settings, not the current settings.
  152. $B::Disassembler::ivsize == 4 ? &GET_I32 : &GET_IV64;
  153. # $Config{ivsize} == 4 ? &GET_I32 : &GET_IV64;
  154. }
  155. sub GET_PADOFFSET {
  156. # Check the header settings, not the current settings.
  157. $B::Disassembler::ptrsize == 8 ? &GET_IV64 : &GET_U32;
  158. # $Config{ptrsize} == 8 ? &GET_IV64 : &GET_U32;
  159. }
  160. sub GET_long {
  161. # Check the header settings, not the current settings.
  162. # B::Disassembler::ivsize or longsize if ge xxx?
  163. if ($B::Disassembler::longsize) {
  164. return $B::Disassembler::longsize == 8 ? &GET_IV64 : &GET_U32;
  165. } else {
  166. # return $Config{longsize} == 8 ? &GET_IV64 : &GET_U32;
  167. return $B::Disassembler::ivsize == 8 ? &GET_IV64 : &GET_U32;
  168. }
  169. }
  170. sub GET_pmflags {
  171. my $fh = shift;
  172. my $size = 2;
  173. if ($B::Disassembler::blversion ge '"0.07"') {
  174. if ($B::Disassembler::perlversion ge '"5.013"') {
  175. return $fh->GET_U32;
  176. }
  177. }
  178. return $fh->GET_U16;
  179. }
  180. package B::Disassembler;
  181. use Exporter;
  182. @ISA = qw(Exporter);
  183. @EXPORT_OK = qw(disassemble_fh get_header print_insn print_insn_bare @opname);
  184. use Carp;
  185. use strict;
  186. use B::Asmdata qw(%insn_data @insn_name);
  187. use Opcode qw(opset_to_ops full_opset);
  188. use Config qw(%Config);
  189. use B::Concise;
  190. BEGIN {
  191. if ( $] < 5.009 ) {
  192. B::Asmdata->import(qw(@specialsv_name));
  193. }
  194. else {
  195. B->import(qw(@specialsv_name));
  196. }
  197. }
  198. my $ix;
  199. my $opname;
  200. our @opname = opset_to_ops(full_opset);
  201. our (
  202. $magic, $archname, $blversion, $ivsize,
  203. $ptrsize, $longsize, $byteorder, $archflag, $perlversion
  204. );
  205. # >=5.12
  206. our @svnames = ("NULL"); # 0
  207. push @svnames, "BIND" if $] >= 5.009 and $] < 5.019002; # 1
  208. push @svnames, ("IV", "NV"); # 2,3
  209. push @svnames, "RV" if $] < 5.011; #
  210. push @svnames, "PV";
  211. push @svnames, "INVLIST" if $] >= 5.019002; # 4
  212. push @svnames, ("PVIV", "PVNV", "PVMG"); # 4-7
  213. push @svnames, "BM" if $] < 5.009;
  214. push @svnames, "REGEXP" if $] >= 5.011; # 8
  215. push @svnames, "GV" if $] >= 5.009; # 9
  216. push @svnames, ("PVLV", "AV", "HV", "CV"); # 10-13
  217. push @svnames, "GV" if $] < 5.009;
  218. push @svnames, ("FM", "IO"); # 14,15
  219. sub dis_header($) {
  220. my ($fh) = @_;
  221. my $str = $fh->readn(3);
  222. if ($str eq '#! ') {
  223. $str .= $fh->GET_comment_t;
  224. $str .= $fh->GET_comment_t;
  225. $magic = $fh->GET_U32;
  226. } else {
  227. $str .= $fh->readn(1);
  228. $magic = unpack( "L", $str );
  229. }
  230. warn("bad magic") if $magic != 0x43424c50;
  231. $archname = $fh->GET_strconst();
  232. $blversion = $fh->GET_strconst();
  233. $ivsize = $fh->GET_U32();
  234. $ptrsize = $fh->GET_U32();
  235. if ( $blversion ge '"0.06_03"' ) {
  236. $longsize = $fh->GET_U32();
  237. }
  238. if ( $blversion gt '"0.06"' or $blversion eq '"0.04"' ) {
  239. $byteorder = $fh->GET_strconst();
  240. }
  241. if ( $blversion ge '"0.06_05"' ) {
  242. $archflag = $fh->GET_U16();
  243. }
  244. if ( $blversion ge '"0.06_06"' ) {
  245. $perlversion = $fh->GET_strconst();
  246. }
  247. }
  248. sub get_header() {
  249. my @result = (
  250. $magic, $archname, $blversion, $ivsize,
  251. $ptrsize, $byteorder, $longsize, $archflag,
  252. $perlversion
  253. );
  254. if (wantarray) {
  255. return @result;
  256. }
  257. else {
  258. my $hash = {
  259. magic => $magic,
  260. archname => $archname,
  261. blversion => $blversion,
  262. ivsize => $ivsize,
  263. ptrsize => $ptrsize,
  264. };
  265. for (qw(magic archname blversion ivsize ptrsize byteorder
  266. longsize archflag perlversion))
  267. {
  268. $hash->{$_} = $$_ if defined $$_;
  269. }
  270. return $hash;
  271. }
  272. }
  273. sub print_insn {
  274. my ( $insn, $arg, $comment ) = @_;
  275. undef $comment unless $comment;
  276. if ( defined($arg) ) {
  277. # threaded or unthreaded
  278. if ( $insn eq 'newopx' or $insn eq 'ldop' and $] > 5.007) {
  279. my $type = $arg >> 7;
  280. my $size = $arg - ( $type << 7 );
  281. $arg .= sprintf( " \t# size:%d, type:%d %s", $size, $type) if $comment;
  282. $opname = $opname[$type];
  283. printf "\n# [%s %d]\n", $opname, $ix++;
  284. }
  285. elsif ( !$comment ) {
  286. ;
  287. }
  288. elsif ( $insn eq 'comment' ) {
  289. $arg .= "comment $arg";
  290. $arg .= " \t#" . $comment if $comment ne '1';
  291. }
  292. elsif ( $insn eq 'stpv' ) {
  293. $arg .= "\t# " . $comment if $comment ne '1';
  294. printf "# -%s- %d\n", 'PV', $ix++;
  295. }
  296. elsif ( $insn eq 'newsvx' ) {
  297. my $type = $arg & 0xff; # SVTYPEMASK
  298. $arg .= sprintf("\t# type=%d,flags=0x%x", $type, $arg);
  299. $arg .= $comment if $comment ne '1';
  300. printf "\n# [%s %d]\n", $svnames[$type], $ix++;
  301. }
  302. elsif ( $insn eq 'newpadlx' ) {
  303. $arg .= "\t# " . $comment if $comment ne '1';
  304. printf "\n# [%s %d]\n", "PADLIST", $ix++;
  305. }
  306. elsif ( $insn eq 'newpadnlx' ) {
  307. $arg .= "\t# " . $comment if $comment ne '1';
  308. printf "\n# [%s %d]\n", "PADNAMELIST", $ix++;
  309. }
  310. elsif ( $insn eq 'newpadnx' ) {
  311. $arg .= "\t# " . $comment if $comment ne '1';
  312. printf "\n# [%s %d]\n", "PADNAME", $ix++;
  313. }
  314. elsif ( $insn eq 'gv_stashpvx' ) {
  315. $arg .= "\t# " . $comment if $comment ne '1';
  316. printf "\n# [%s %d]\n", "STASH", $ix++;
  317. }
  318. elsif ( $insn eq 'ldspecsvx' ) {
  319. $arg .= "\t# $specialsv_name[$arg]";
  320. $arg .= $comment if $comment ne '1';
  321. printf "\n# [%s %d]\n", "SPECIAL", $ix++;
  322. }
  323. elsif ( $insn eq 'ldsv' ) {
  324. $arg .= "\t# " . $comment if $comment ne '1';
  325. printf "# -%s-\n", 'GP/AV/HV/NULL/MG';
  326. }
  327. elsif ( $insn eq 'gv_fetchpvx' ) {
  328. $arg .= "\t# " . $comment if $comment ne '1';
  329. printf "\n# [%s %d]\n", 'GV', $ix++;
  330. }
  331. elsif ( $insn eq 'sv_magic' ) {
  332. $arg .= sprintf( "\t# '%s'", chr($arg) );
  333. }
  334. elsif ( $insn =~ /_flags/ ) {
  335. my $f = $arg;
  336. $arg .= sprintf( "\t# 0x%x", $f ) if $comment;
  337. $arg .= " ".B::Concise::op_flags($f) if $insn eq 'op_flags' and $comment;
  338. }
  339. elsif ( $comment and $insn eq 'op_private' ) {
  340. my $f = $arg;
  341. $arg .= sprintf( "\t# 0x%x", $f );
  342. $arg .= " ".B::Concise::private_flags($opname, $f);
  343. }
  344. elsif ( $insn eq 'op_type' and $] < 5.007 ) {
  345. my $type = $arg;
  346. $arg .= sprintf( "\t# [ %s ]", $opname[$type] );
  347. }
  348. else {
  349. $arg .= "\t# " . $comment if $comment ne '1';
  350. }
  351. printf "%s %s\n", $insn, $arg;
  352. }
  353. else {
  354. $insn .= "\t# " . $comment if $comment ne '1';
  355. print $insn, "\n";
  356. }
  357. }
  358. sub print_insn_bare {
  359. my ( $insn, $arg ) = @_;
  360. if ( defined($arg) ) {
  361. printf "%s %s\n", $insn, $arg;
  362. }
  363. else {
  364. print $insn, "\n";
  365. }
  366. }
  367. sub disassemble_fh {
  368. my $fh = shift;
  369. my $out = shift;
  370. my $verbose = shift;
  371. my ( $c, $getmeth, $insn, $arg );
  372. $ix = 1;
  373. bless $fh, "B::Disassembler::BytecodeStream";
  374. dis_header($fh);
  375. if ($verbose) {
  376. printf "#magic 0x%x\n", $magic; #0x43424c50
  377. printf "#archname %s\n", $archname;
  378. printf "#blversion %s\n", $blversion;
  379. printf "#ivsize %d\n", $ivsize;
  380. printf "#ptrsize %d\n", $ptrsize;
  381. printf "#byteorder %s\n", $byteorder if $byteorder;
  382. printf "#longsize %d\n", $longsize if $longsize;
  383. printf "#archflag %d\n", $archflag if defined $archflag;
  384. printf "#perlversion %s\n", $perlversion if $perlversion;
  385. print "\n";
  386. }
  387. while ( defined( $c = $fh->getc ) ) {
  388. $c = ord($c);
  389. $insn = $insn_name[$c];
  390. if ( !defined($insn) || $insn eq "unused" ) {
  391. my $pos = $fh->tell - 1;
  392. warn "Illegal instruction code $c at stream offset $pos.\n";
  393. }
  394. $getmeth = $insn_data{$insn}->[2];
  395. #warn "EOF at $insn $getmeth" if $fh->eof();
  396. $arg = $fh->$getmeth();
  397. if ( defined($arg) ) {
  398. &$out( $insn, $arg, $verbose );
  399. }
  400. else {
  401. &$out( $insn, undef, $verbose );
  402. }
  403. }
  404. }
  405. 1;
  406. __END__
  407. =head1 NAME
  408. B::Disassembler - Disassemble Perl bytecode
  409. =head1 SYNOPSIS
  410. use Disassembler qw(print_insn);
  411. my $fh = new FileHandle "<$ARGV[0]";
  412. disassemble_fh($fh, \&print_insn);
  413. =head1 DESCRIPTION
  414. disassemble_fh takes an filehandle with bytecode and a printer function.
  415. The printer function gets three arguments: insn, arg (optional) and the comment.
  416. See F<lib/B/Disassembler.pm> and F<scripts/disassemble>.
  417. =head1 disassemble_fh (filehandle, printer_coderef, [ verbose ])
  418. disassemble_fh takes an filehandle with bytecode and a printer coderef.
  419. Two default printer functions are provided:
  420. print_insn print_insn_bare
  421. =head1 print_insn
  422. Callback function for disassemble_fh, which gets three arguments from
  423. the disassembler. insn (a string), arg (a string or number or undef)
  424. and the comment (an optional string).
  425. This supports the new behaviour in F<scripts/disassemble>. It prints
  426. each insn and optional argument with some additional comments, which
  427. looks similar to B::Assembler with option -S (commented source).
  428. =head1 print_insn_bare
  429. This is the same as the old behaviour of scripts/disassemble. It
  430. prints each insn and optional argument without any comments. Line per
  431. line.
  432. =head1 get_header
  433. Returns the .plc header as array of
  434. ( magic, archname, blversion, ivsize, ptrsize,
  435. byteorder, longsize, archflag, perlversion )
  436. in ARRAY context, or in SCALAR context the array from above as named hash.
  437. B<magic> is always "PLBC". "PLJC" is reserved for JIT'ted code also
  438. loaded via ByteLoader.
  439. B<archname> is the archname string and is in the ByteLoader up to 0.06
  440. checked strictly. Starting with ByteLoader 0.06_05 platform
  441. compatibility is implemented by checking the $archflag, and doing
  442. byteorder swapping for same length longsize, and adjusting the ivsize
  443. and ptrsize.
  444. B<blversion> is the ByteLoader version from the header as string.
  445. Up to ByteLoader 0.06 this version must have matched exactly, since 0.07
  446. earlier ByteLoader versions are also accepted in the ByteLoader.
  447. B<ivsize> matches $Config{ivsize} of the assembling perl.
  448. A number, 4 or 8 only supported.
  449. B<ptrsize> matches $Config{ptrsize} of the assembling perl.
  450. A number, 4 or 8 only supported.
  451. B<longsize> is $Config{longsize} of the assembling perl.
  452. A number, 4 or 8.
  453. Only since blversion 0.06_03.
  454. B<byteorder> is a string of "0x12345678" on big-endian or "0x78563412" (?)
  455. on little-endian machines. The beginning "0x" is stripped for compatibility
  456. with intermediate ByteLoader versions, i.e. 5.6.1 to 5.8.0,
  457. Added with blversion 0.06_03, and also with blversion 0.04.
  458. See L<BcVersions>
  459. B<archflag> is a bitmask of opcode platform-dependencies.
  460. Currently used:
  461. bit 1 for USE_ITHREADS
  462. bit 2 for MULTIPLICITY
  463. Added with blversion 0.06_05.
  464. B<perlversion> $] of the perl which produced this bytecode as string.
  465. Added with blversion 0.06_06.
  466. =head1 AUTHORS
  467. Malcolm Beattie C<MICB at cpan.org> I<(retired)>,
  468. Reini Urban C<perl-compiler@googlegroups.com> since 2008.
  469. =cut
  470. # Local Variables:
  471. # mode: cperl
  472. # cperl-indent-level: 2
  473. # fill-column: 100
  474. # End:
  475. # vim: expandtab shiftwidth=2: