123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554 |
- # Disassembler.pm
- #
- # Copyright (c) 1996 Malcolm Beattie
- # Copyright (c) 2008,2009,2010,2011,2012 Reini Urban
- #
- # You may distribute under the terms of either the GNU General Public
- # License or the Artistic License, as specified in the README file.
- $B::Disassembler::VERSION = '1.13';
- package B::Disassembler::BytecodeStream;
- use FileHandle;
- use Carp;
- use Config qw(%Config);
- use B qw(cstring cast_I32);
- @ISA = qw(FileHandle);
- sub readn {
- my ( $fh, $len ) = @_;
- my $data;
- read( $fh, $data, $len );
- croak "reached EOF while reading $len bytes" unless length($data) == $len;
- return $data;
- }
- sub GET_U8 {
- my $fh = shift;
- my $c = $fh->getc;
- croak "reached EOF while reading U8" unless defined($c);
- return ord($c);
- }
- sub GET_U16 {
- my $fh = shift;
- my $str = $fh->readn(2);
- croak "reached EOF while reading U16" unless length($str) == 2;
- # Todo: check byteorder
- return unpack( "S", $str );
- }
- sub GET_NV {
- my $fh = shift;
- my ( $str, $c );
- while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
- $str .= $c;
- }
- croak "reached EOF while reading double" unless defined($c);
- return $str;
- }
- sub GET_U32 {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading U32" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_I32 {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading I32" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "l", $str );
- }
- sub GET_objindex {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading objindex" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_opindex {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading opindex" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_svindex {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading svindex" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_pvindex {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading pvindex" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_hekindex {
- my $fh = shift;
- my $str = $fh->readn(4);
- croak "reached EOF while reading hekindex" unless length($str) == 4;
- # Todo: check byteorder
- return unpack( "L", $str );
- }
- sub GET_strconst {
- my $fh = shift;
- my ( $str, $c );
- $str = '';
- while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
- $str .= $c;
- }
- croak "reached EOF while reading strconst" unless defined($c);
- return cstring($str);
- }
- sub GET_pvcontents { }
- sub GET_PV {
- my $fh = shift;
- my $str;
- my $len = $fh->GET_U32;
- if ($len) {
- read( $fh, $str, $len );
- croak "reached EOF while reading PV" unless length($str) == $len;
- return cstring($str);
- }
- else {
- return '""';
- }
- }
- sub GET_comment_t {
- my $fh = shift;
- my ( $str, $c );
- while ( defined( $c = $fh->getc ) && $c ne "\n" ) {
- $str .= $c;
- }
- croak "reached EOF while reading comment" unless defined($c);
- return cstring($str);
- }
- sub GET_double {
- my $fh = shift;
- my ( $str, $c );
- while ( defined( $c = $fh->getc ) && $c ne "\0" ) {
- $str .= $c;
- }
- croak "reached EOF while reading double" unless defined($c);
- return $str;
- }
- sub GET_none { }
- sub GET_op_tr_array {
- my $fh = shift;
- my $len = unpack "S", $fh->readn(2);
- my @ary = unpack "S*", $fh->readn( $len * 2 );
- return join( ",", $len, @ary );
- }
- sub GET_IV64 {
- my $fh = shift;
- my $str = $fh->readn(8);
- croak "reached EOF while reading I32" unless length($str) == 8;
- # Todo: check byteorder
- my $i = unpack( "q", $str );
- return $i > 8 ? sprintf "0x%09llx", $i : $i;
- }
- sub GET_IV {
- # Check the header settings, not the current settings.
- $B::Disassembler::ivsize == 4 ? &GET_I32 : &GET_IV64;
- # $Config{ivsize} == 4 ? &GET_I32 : &GET_IV64;
- }
- sub GET_PADOFFSET {
- # Check the header settings, not the current settings.
- $B::Disassembler::ptrsize == 8 ? &GET_IV64 : &GET_U32;
- # $Config{ptrsize} == 8 ? &GET_IV64 : &GET_U32;
- }
- sub GET_long {
- # Check the header settings, not the current settings.
- # B::Disassembler::ivsize or longsize if ge xxx?
- if ($B::Disassembler::longsize) {
- return $B::Disassembler::longsize == 8 ? &GET_IV64 : &GET_U32;
- } else {
- # return $Config{longsize} == 8 ? &GET_IV64 : &GET_U32;
- return $B::Disassembler::ivsize == 8 ? &GET_IV64 : &GET_U32;
- }
- }
- sub GET_pmflags {
- my $fh = shift;
- my $size = 2;
- if ($B::Disassembler::blversion ge '"0.07"') {
- if ($B::Disassembler::perlversion ge '"5.013"') {
- return $fh->GET_U32;
- }
- }
- return $fh->GET_U16;
- }
- package B::Disassembler;
- use Exporter;
- @ISA = qw(Exporter);
- @EXPORT_OK = qw(disassemble_fh get_header print_insn print_insn_bare @opname);
- use Carp;
- use strict;
- use B::Asmdata qw(%insn_data @insn_name);
- use Opcode qw(opset_to_ops full_opset);
- use Config qw(%Config);
- use B::Concise;
- BEGIN {
- if ( $] < 5.009 ) {
- B::Asmdata->import(qw(@specialsv_name));
- }
- else {
- B->import(qw(@specialsv_name));
- }
- }
- my $ix;
- my $opname;
- our @opname = opset_to_ops(full_opset);
- our (
- $magic, $archname, $blversion, $ivsize,
- $ptrsize, $longsize, $byteorder, $archflag, $perlversion
- );
- # >=5.12
- our @svnames = ("NULL"); # 0
- push @svnames, "BIND" if $] >= 5.009 and $] < 5.019002; # 1
- push @svnames, ("IV", "NV"); # 2,3
- push @svnames, "RV" if $] < 5.011; #
- push @svnames, "PV";
- push @svnames, "INVLIST" if $] >= 5.019002; # 4
- push @svnames, ("PVIV", "PVNV", "PVMG"); # 4-7
- push @svnames, "BM" if $] < 5.009;
- push @svnames, "REGEXP" if $] >= 5.011; # 8
- push @svnames, "GV" if $] >= 5.009; # 9
- push @svnames, ("PVLV", "AV", "HV", "CV"); # 10-13
- push @svnames, "GV" if $] < 5.009;
- push @svnames, ("FM", "IO"); # 14,15
- sub dis_header($) {
- my ($fh) = @_;
- my $str = $fh->readn(3);
- if ($str eq '#! ') {
- $str .= $fh->GET_comment_t;
- $str .= $fh->GET_comment_t;
- $magic = $fh->GET_U32;
- } else {
- $str .= $fh->readn(1);
- $magic = unpack( "L", $str );
- }
- warn("bad magic") if $magic != 0x43424c50;
- $archname = $fh->GET_strconst();
- $blversion = $fh->GET_strconst();
- $ivsize = $fh->GET_U32();
- $ptrsize = $fh->GET_U32();
- if ( $blversion ge '"0.06_03"' ) {
- $longsize = $fh->GET_U32();
- }
- if ( $blversion gt '"0.06"' or $blversion eq '"0.04"' ) {
- $byteorder = $fh->GET_strconst();
- }
- if ( $blversion ge '"0.06_05"' ) {
- $archflag = $fh->GET_U16();
- }
- if ( $blversion ge '"0.06_06"' ) {
- $perlversion = $fh->GET_strconst();
- }
- }
- sub get_header() {
- my @result = (
- $magic, $archname, $blversion, $ivsize,
- $ptrsize, $byteorder, $longsize, $archflag,
- $perlversion
- );
- if (wantarray) {
- return @result;
- }
- else {
- my $hash = {
- magic => $magic,
- archname => $archname,
- blversion => $blversion,
- ivsize => $ivsize,
- ptrsize => $ptrsize,
- };
- for (qw(magic archname blversion ivsize ptrsize byteorder
- longsize archflag perlversion))
- {
- $hash->{$_} = $$_ if defined $$_;
- }
- return $hash;
- }
- }
- sub print_insn {
- my ( $insn, $arg, $comment ) = @_;
- undef $comment unless $comment;
- if ( defined($arg) ) {
- # threaded or unthreaded
- if ( $insn eq 'newopx' or $insn eq 'ldop' and $] > 5.007) {
- my $type = $arg >> 7;
- my $size = $arg - ( $type << 7 );
- $arg .= sprintf( " \t# size:%d, type:%d %s", $size, $type) if $comment;
- $opname = $opname[$type];
- printf "\n# [%s %d]\n", $opname, $ix++;
- }
- elsif ( !$comment ) {
- ;
- }
- elsif ( $insn eq 'comment' ) {
- $arg .= "comment $arg";
- $arg .= " \t#" . $comment if $comment ne '1';
- }
- elsif ( $insn eq 'stpv' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "# -%s- %d\n", 'PV', $ix++;
- }
- elsif ( $insn eq 'newsvx' ) {
- my $type = $arg & 0xff; # SVTYPEMASK
- $arg .= sprintf("\t# type=%d,flags=0x%x", $type, $arg);
- $arg .= $comment if $comment ne '1';
- printf "\n# [%s %d]\n", $svnames[$type], $ix++;
- }
- elsif ( $insn eq 'newpadlx' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "\n# [%s %d]\n", "PADLIST", $ix++;
- }
- elsif ( $insn eq 'newpadnlx' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "\n# [%s %d]\n", "PADNAMELIST", $ix++;
- }
- elsif ( $insn eq 'newpadnx' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "\n# [%s %d]\n", "PADNAME", $ix++;
- }
- elsif ( $insn eq 'gv_stashpvx' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "\n# [%s %d]\n", "STASH", $ix++;
- }
- elsif ( $insn eq 'ldspecsvx' ) {
- $arg .= "\t# $specialsv_name[$arg]";
- $arg .= $comment if $comment ne '1';
- printf "\n# [%s %d]\n", "SPECIAL", $ix++;
- }
- elsif ( $insn eq 'ldsv' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "# -%s-\n", 'GP/AV/HV/NULL/MG';
- }
- elsif ( $insn eq 'gv_fetchpvx' ) {
- $arg .= "\t# " . $comment if $comment ne '1';
- printf "\n# [%s %d]\n", 'GV', $ix++;
- }
- elsif ( $insn eq 'sv_magic' ) {
- $arg .= sprintf( "\t# '%s'", chr($arg) );
- }
- elsif ( $insn =~ /_flags/ ) {
- my $f = $arg;
- $arg .= sprintf( "\t# 0x%x", $f ) if $comment;
- $arg .= " ".B::Concise::op_flags($f) if $insn eq 'op_flags' and $comment;
- }
- elsif ( $comment and $insn eq 'op_private' ) {
- my $f = $arg;
- $arg .= sprintf( "\t# 0x%x", $f );
- $arg .= " ".B::Concise::private_flags($opname, $f);
- }
- elsif ( $insn eq 'op_type' and $] < 5.007 ) {
- my $type = $arg;
- $arg .= sprintf( "\t# [ %s ]", $opname[$type] );
- }
- else {
- $arg .= "\t# " . $comment if $comment ne '1';
- }
- printf "%s %s\n", $insn, $arg;
- }
- else {
- $insn .= "\t# " . $comment if $comment ne '1';
- print $insn, "\n";
- }
- }
- sub print_insn_bare {
- my ( $insn, $arg ) = @_;
- if ( defined($arg) ) {
- printf "%s %s\n", $insn, $arg;
- }
- else {
- print $insn, "\n";
- }
- }
- sub disassemble_fh {
- my $fh = shift;
- my $out = shift;
- my $verbose = shift;
- my ( $c, $getmeth, $insn, $arg );
- $ix = 1;
- bless $fh, "B::Disassembler::BytecodeStream";
- dis_header($fh);
- if ($verbose) {
- printf "#magic 0x%x\n", $magic; #0x43424c50
- printf "#archname %s\n", $archname;
- printf "#blversion %s\n", $blversion;
- printf "#ivsize %d\n", $ivsize;
- printf "#ptrsize %d\n", $ptrsize;
- printf "#byteorder %s\n", $byteorder if $byteorder;
- printf "#longsize %d\n", $longsize if $longsize;
- printf "#archflag %d\n", $archflag if defined $archflag;
- printf "#perlversion %s\n", $perlversion if $perlversion;
- print "\n";
- }
- while ( defined( $c = $fh->getc ) ) {
- $c = ord($c);
- $insn = $insn_name[$c];
- if ( !defined($insn) || $insn eq "unused" ) {
- my $pos = $fh->tell - 1;
- warn "Illegal instruction code $c at stream offset $pos.\n";
- }
- $getmeth = $insn_data{$insn}->[2];
- #warn "EOF at $insn $getmeth" if $fh->eof();
- $arg = $fh->$getmeth();
- if ( defined($arg) ) {
- &$out( $insn, $arg, $verbose );
- }
- else {
- &$out( $insn, undef, $verbose );
- }
- }
- }
- 1;
- __END__
- =head1 NAME
- B::Disassembler - Disassemble Perl bytecode
- =head1 SYNOPSIS
- use Disassembler qw(print_insn);
- my $fh = new FileHandle "<$ARGV[0]";
- disassemble_fh($fh, \&print_insn);
- =head1 DESCRIPTION
- disassemble_fh takes an filehandle with bytecode and a printer function.
- The printer function gets three arguments: insn, arg (optional) and the comment.
- See F<lib/B/Disassembler.pm> and F<scripts/disassemble>.
- =head1 disassemble_fh (filehandle, printer_coderef, [ verbose ])
- disassemble_fh takes an filehandle with bytecode and a printer coderef.
- Two default printer functions are provided:
- print_insn print_insn_bare
- =head1 print_insn
- Callback function for disassemble_fh, which gets three arguments from
- the disassembler. insn (a string), arg (a string or number or undef)
- and the comment (an optional string).
- This supports the new behaviour in F<scripts/disassemble>. It prints
- each insn and optional argument with some additional comments, which
- looks similar to B::Assembler with option -S (commented source).
- =head1 print_insn_bare
- This is the same as the old behaviour of scripts/disassemble. It
- prints each insn and optional argument without any comments. Line per
- line.
- =head1 get_header
- Returns the .plc header as array of
- ( magic, archname, blversion, ivsize, ptrsize,
- byteorder, longsize, archflag, perlversion )
- in ARRAY context, or in SCALAR context the array from above as named hash.
- B<magic> is always "PLBC". "PLJC" is reserved for JIT'ted code also
- loaded via ByteLoader.
- B<archname> is the archname string and is in the ByteLoader up to 0.06
- checked strictly. Starting with ByteLoader 0.06_05 platform
- compatibility is implemented by checking the $archflag, and doing
- byteorder swapping for same length longsize, and adjusting the ivsize
- and ptrsize.
- B<blversion> is the ByteLoader version from the header as string.
- Up to ByteLoader 0.06 this version must have matched exactly, since 0.07
- earlier ByteLoader versions are also accepted in the ByteLoader.
- B<ivsize> matches $Config{ivsize} of the assembling perl.
- A number, 4 or 8 only supported.
- B<ptrsize> matches $Config{ptrsize} of the assembling perl.
- A number, 4 or 8 only supported.
- B<longsize> is $Config{longsize} of the assembling perl.
- A number, 4 or 8.
- Only since blversion 0.06_03.
- B<byteorder> is a string of "0x12345678" on big-endian or "0x78563412" (?)
- on little-endian machines. The beginning "0x" is stripped for compatibility
- with intermediate ByteLoader versions, i.e. 5.6.1 to 5.8.0,
- Added with blversion 0.06_03, and also with blversion 0.04.
- See L<BcVersions>
- B<archflag> is a bitmask of opcode platform-dependencies.
- Currently used:
- bit 1 for USE_ITHREADS
- bit 2 for MULTIPLICITY
- Added with blversion 0.06_05.
- B<perlversion> $] of the perl which produced this bytecode as string.
- Added with blversion 0.06_06.
- =head1 AUTHORS
- Malcolm Beattie C<MICB at cpan.org> I<(retired)>,
- Reini Urban C<perl-compiler@googlegroups.com> since 2008.
- =cut
- # Local Variables:
- # mode: cperl
- # cperl-indent-level: 2
- # fill-column: 100
- # End:
- # vim: expandtab shiftwidth=2:
|