Assembler.pm 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. # Assembler.pm
  2. #
  3. # Copyright (c) 1996 Malcolm Beattie
  4. # Copyright (c) 2008,2009,2010,2011,2012 Reini Urban
  5. #
  6. # You may distribute under the terms of either the GNU General Public
  7. # License or the Artistic License, as specified in the README file.
  8. package B::Assembler;
  9. use Exporter;
  10. use B qw(ppname);
  11. use B::Asmdata qw(%insn_data @insn_name);
  12. use Config qw(%Config);
  13. require ByteLoader; # we just need its $VERSION
  14. no warnings; # XXX
  15. @ISA = qw(Exporter);
  16. @EXPORT_OK = qw(assemble_fh newasm endasm assemble asm maxopix maxsvix);
  17. $VERSION = '1.11';
  18. use strict;
  19. my %opnumber;
  20. my ( $i, $opname );
  21. for ( $i = 0 ; defined( $opname = ppname($i) ) ; $i++ ) {
  22. $opnumber{$opname} = $i;
  23. }
  24. my ( $linenum, $errors, $out ); # global state, set up by newasm
  25. sub error {
  26. my $str = shift;
  27. warn "$linenum: $str\n";
  28. $errors++;
  29. }
  30. my $debug = 0;
  31. sub debug { $debug = shift }
  32. my $quiet = 0;
  33. sub quiet { $quiet = shift }
  34. my ( $maxopix, $maxsvix ) = ( 0xffffffff, 0xffffffff );
  35. sub maxopix { $maxopix = shift }
  36. sub maxsvix { $maxsvix = shift }
  37. sub limcheck($$$$) {
  38. my ( $val, $lo, $hi, $loc ) = @_;
  39. if ( $val < $lo || $hi < $val ) {
  40. error "argument for $loc outside [$lo, $hi]: $val";
  41. $val = $hi;
  42. }
  43. return $val;
  44. }
  45. #
  46. # First define all the data conversion subs to which Asmdata will refer
  47. #
  48. sub B::Asmdata::PUT_U8 {
  49. my $arg = shift;
  50. my $c = uncstring($arg);
  51. if ( defined($c) ) {
  52. if ( length($c) != 1 ) {
  53. error "argument for U8 is too long: $c";
  54. $c = substr( $c, 0, 1 );
  55. }
  56. }
  57. else {
  58. $arg = limcheck( $arg, 0, 0xff, 'U8' );
  59. $c = chr($arg);
  60. }
  61. return $c;
  62. }
  63. sub B::Asmdata::PUT_U16 {
  64. my $arg = limcheck( $_[0], 0, 0xffff, 'U16' );
  65. pack( "S", $arg );
  66. }
  67. sub B::Asmdata::PUT_U32 {
  68. my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' );
  69. pack( "L", $arg );
  70. }
  71. sub B::Asmdata::PUT_I32 {
  72. my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' );
  73. pack( "l", $arg );
  74. }
  75. sub B::Asmdata::PUT_NV {
  76. sprintf( "%s\0", $_[0] );
  77. } # "%lf" looses precision and pack('d',...)
  78. # may not even be portable between compilers
  79. sub B::Asmdata::PUT_objindex { # could allow names here
  80. my $maxidx = $_[1] || 0xffffffff;
  81. my $what = $_[2] || 'ix';
  82. my $arg = limcheck( $_[0], 0, $maxidx, $what );
  83. pack( "L", $arg );
  84. }
  85. sub B::Asmdata::PUT_svindex { B::Asmdata::PUT_objindex( @_, $maxsvix, 'svix' ) }
  86. sub B::Asmdata::PUT_opindex { B::Asmdata::PUT_objindex( @_, $maxopix, 'opix' ) }
  87. sub B::Asmdata::PUT_pvindex { B::Asmdata::PUT_objindex( @_, $maxsvix, 'pvix' ) }
  88. sub B::Asmdata::PUT_hekindex { B::Asmdata::PUT_objindex( @_ ) }
  89. sub B::Asmdata::PUT_strconst {
  90. my $arg = shift;
  91. my $str = uncstring($arg);
  92. if ( !defined($str) ) {
  93. my @callstack = caller(1);
  94. error "bad string constant: '$arg', called from ".$callstack[3]
  95. ." line:".$callstack[2];
  96. $str = '';
  97. }
  98. if ( $str =~ s/\0//g ) {
  99. error "string constant argument contains NUL: $arg";
  100. $str = '';
  101. }
  102. return $str . "\0";
  103. }
  104. sub B::Asmdata::PUT_pvcontents {
  105. my $arg = shift;
  106. error "extraneous argument: $arg" if defined $arg;
  107. return "";
  108. }
  109. sub B::Asmdata::PUT_PV {
  110. my $arg = shift;
  111. my $str = uncstring($arg);
  112. if ( !defined($str) ) {
  113. error "bad string argument: $arg";
  114. $str = '';
  115. }
  116. return pack( "L", length($str) ) . $str;
  117. }
  118. sub B::Asmdata::PUT_comment_t {
  119. my $arg = shift;
  120. $arg = uncstring($arg);
  121. error "bad string argument: $arg" unless defined($arg);
  122. if ( $arg =~ s/\n//g ) {
  123. error "comment argument contains linefeed: $arg";
  124. }
  125. return $arg . "\n";
  126. }
  127. sub B::Asmdata::PUT_double { sprintf( "%s\0", $_[0] ) } # see PUT_NV above
  128. sub B::Asmdata::PUT_none {
  129. my $arg = shift;
  130. error "extraneous argument: $arg" if defined $arg;
  131. return "";
  132. }
  133. sub B::Asmdata::PUT_op_tr_array {
  134. my @ary = split /\s*,\s*/, shift;
  135. return pack "S*", @ary;
  136. }
  137. sub B::Asmdata::PUT_IV64 {
  138. return pack "Q", shift;
  139. }
  140. sub B::Asmdata::PUT_IV {
  141. $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64;
  142. }
  143. sub B::Asmdata::PUT_PADOFFSET {
  144. $Config{ptrsize} == 8 ? &B::Asmdata::PUT_IV64 : &B::Asmdata::PUT_U32;
  145. }
  146. sub B::Asmdata::PUT_long {
  147. $Config{longsize} == 8 ? &B::Asmdata::PUT_IV64 : &B::Asmdata::PUT_U32;
  148. }
  149. sub B::Asmdata::PUT_svtype {
  150. $Config{longsize} == 8 ? &B::Asmdata::PUT_IV64 : &B::Asmdata::PUT_U32;
  151. }
  152. sub B::Asmdata::PUT_pmflags {
  153. return ($] < 5.013) ? B::Asmdata::PUT_U16(@_) : B::Asmdata::PUT_U32(@_);
  154. }
  155. my %unesc = (
  156. n => "\n",
  157. r => "\r",
  158. t => "\t",
  159. a => "\a",
  160. b => "\b",
  161. f => "\f",
  162. v => "\013"
  163. );
  164. sub uncstring {
  165. my $s = shift;
  166. $s =~ s/^"// and $s =~ s/"$// or return undef;
  167. $s =~ s/\\(\d\d\d|.)/length($1) == 3 ? chr(oct($1)) : ($unesc{$1}||$1)/eg;
  168. return $s;
  169. }
  170. sub strip_comments {
  171. my $stmt = shift;
  172. # Comments only allowed in instructions which don't take string arguments
  173. # Treat string as a single line so .* eats \n characters.
  174. my $line = $stmt;
  175. $stmt =~ s{
  176. ^\s* # Ignore leading whitespace
  177. (
  178. [^"]* # A double quote '"' indicates a string argument. If we
  179. # find a double quote, the match fails and we strip nothing.
  180. )
  181. \s*\# # Any amount of whitespace plus the comment marker...
  182. \s*(.*)$ # ...which carries on to end-of-string.
  183. }{$1}sx; # Keep only the instruction and optional argument.
  184. return ($stmt) if $line eq $stmt;
  185. $stmt =~ m{
  186. ^\s*
  187. (
  188. [^"]*
  189. )
  190. \s*\#
  191. \s*(.*)$
  192. }sx; # Keep only the instruction and optional argument.
  193. my ( $line, $comment ) = ( $1, $2 );
  194. # $line =~ s/\t$// if $comment;
  195. return ( $line, $comment );
  196. }
  197. # create the ByteCode header:
  198. # magic, archname, ByteLoader $VERSION, ivsize, ptrsize, longsize, byteorder,
  199. # archflag, perlversion
  200. # byteorder is strconst, not U32 because of varying size issues (?)
  201. # archflag: bit 1: useithreads, bit 2: multiplicity
  202. # perlversion for the bytecode translation.
  203. sub gen_header {
  204. my $header = gen_header_hash();
  205. my $string = "";
  206. $string .= B::Asmdata::PUT_U32( $header->{magic} );
  207. $string .= B::Asmdata::PUT_strconst( '"' . $header->{archname} . '"' );
  208. $string .= B::Asmdata::PUT_strconst( '"' . $header->{blversion} . '"' );
  209. $string .= B::Asmdata::PUT_U32( $header->{ivsize} );
  210. $string .= B::Asmdata::PUT_U32( $header->{ptrsize} );
  211. if ( exists $header->{longsize} ) {
  212. $string .= B::Asmdata::PUT_U32( $header->{longsize} );
  213. }
  214. $string .= B::Asmdata::PUT_strconst( sprintf(qq["0x%s"], $header->{byteorder} ));
  215. if ( exists $header->{archflag} ) {
  216. $string .= B::Asmdata::PUT_U16( $header->{archflag} );
  217. }
  218. if ( exists $header->{perlversion} ) {
  219. $string .= B::Asmdata::PUT_strconst( '"' . $header->{perlversion} . '"');
  220. }
  221. $string;
  222. }
  223. # Calculate the ByteCode header values:
  224. # magic, archname, ByteLoader $VERSION, ivsize, ptrsize, longsize, byteorder
  225. # archflag, perlversion
  226. # nvtype is irrelevant (floats are stored as strings)
  227. # byteorder is strconst, not U32 because of varying size issues (?)
  228. # archflag: bit 1: useithreads, bit 2: multiplicity
  229. # perlversion for the bytecode translation.
  230. sub gen_header_hash {
  231. my $header = {};
  232. my $blversion = "$ByteLoader::VERSION";
  233. #if ($] < 5.009 and $blversion eq '0.06_01') {
  234. # $blversion = '0.06';# fake the old backwards compatible version
  235. #}
  236. $header->{magic} = 0x43424c50;
  237. $header->{archname} = $Config{archname};
  238. $header->{blversion} = $blversion;
  239. $header->{ivsize} = $Config{ivsize};
  240. $header->{ptrsize} = $Config{ptrsize};
  241. if ( $blversion ge "0.06_03" ) {
  242. $header->{longsize} = $Config{longsize};
  243. }
  244. my $byteorder = $Config{byteorder};
  245. if ($] < 5.007) {
  246. # until 5.6 the $Config{byteorder} was dependent on ivsize, which was wrong. we need longsize.
  247. my $t = $Config{ivtype};
  248. my $s = $Config{longsize};
  249. my $f = $t eq 'long' ? 'L!' : $s == 8 ? 'Q': 'I';
  250. if ($s == 4 || $s == 8) {
  251. my $i = 0;
  252. foreach my $c (reverse(2..$s)) { $i |= ord($c); $i <<= 8 }
  253. $i |= ord(1);
  254. $byteorder = join('', unpack('a'x$s, pack($f, $i)));
  255. } else {
  256. $byteorder = '?'x$s;
  257. }
  258. }
  259. $header->{byteorder} = $byteorder;
  260. if ( $blversion ge "0.06_05" ) {
  261. my $archflag = 0;
  262. $archflag += 1 if $Config{useithreads};
  263. $archflag += 2 if $Config{usemultiplicity};
  264. $header->{archflag} = $archflag;
  265. }
  266. if ( $blversion ge "0.06_06" ) {
  267. $header->{perlversion} = $];
  268. }
  269. $header;
  270. }
  271. sub parse_statement {
  272. my $stmt = shift;
  273. my ( $insn, $arg ) = $stmt =~ m{
  274. ^\s* # allow (but ignore) leading whitespace
  275. (.*?) # Ignore -S op groups. Instruction continues up until...
  276. (?: # ...an optional whitespace+argument group
  277. \s+ # first whitespace.
  278. (.*) # The argument is all the rest (newlines included).
  279. )?$ # anchor at end-of-line
  280. }sx;
  281. if ( defined($arg) ) {
  282. if ( $arg =~ s/^0x(?=[0-9a-fA-F]+$)// ) {
  283. $arg = hex($arg);
  284. }
  285. elsif ( $arg =~ s/^0(?=[0-7]+$)// ) {
  286. $arg = oct($arg);
  287. }
  288. elsif ( $arg =~ /^pp_/ ) {
  289. $arg =~ s/\s*$//; # strip trailing whitespace
  290. my $opnum = $opnumber{$arg};
  291. if ( defined($opnum) ) {
  292. $arg = $opnum;
  293. }
  294. else {
  295. # TODO: ignore [op] from O=Bytecode,-S
  296. error qq(No such op type "$arg");
  297. $arg = 0;
  298. }
  299. }
  300. }
  301. return ( $insn, $arg );
  302. }
  303. sub assemble_insn {
  304. my ( $insn, $arg ) = @_;
  305. my $data = $insn_data{$insn};
  306. if ( defined($data) ) {
  307. my ( $bytecode, $putsub ) = @{$data}[ 0, 1 ];
  308. error qq(unsupported instruction "$insn") unless $putsub;
  309. return "" unless $putsub;
  310. my $argcode = &$putsub($arg);
  311. return chr($bytecode) . $argcode;
  312. }
  313. else {
  314. error qq(no such instruction "$insn");
  315. return "";
  316. }
  317. }
  318. sub assemble_fh {
  319. my ( $fh, $out ) = @_;
  320. my $line;
  321. my $asm = newasm($out);
  322. while ( $line = <$fh> ) {
  323. assemble($line);
  324. }
  325. endasm();
  326. }
  327. sub newasm {
  328. my ($outsub) = @_;
  329. die "Invalid printing routine for B::Assembler\n"
  330. unless ref $outsub eq 'CODE';
  331. die <<EOD if ref $out;
  332. Can't have multiple byteassembly sessions at once!
  333. (perhaps you forgot an endasm()?)
  334. EOD
  335. $linenum = $errors = 0;
  336. $out = $outsub;
  337. $out->( gen_header() );
  338. }
  339. sub endasm {
  340. if ($errors) {
  341. die "There were $errors assembly errors\n";
  342. }
  343. $linenum = $errors = $out = 0;
  344. }
  345. ### interface via whole line, and optional comments
  346. sub assemble {
  347. my ($line) = @_;
  348. my ( $insn, $arg, $comment );
  349. $linenum++;
  350. chomp $line;
  351. $line =~ s/\cM$//;
  352. if ($debug) {
  353. my $quotedline = $line;
  354. $quotedline =~ s/\\/\\\\/g;
  355. $quotedline =~ s/"/\\"/g;
  356. $out->( assemble_insn( "comment", qq("$quotedline") ) );
  357. }
  358. ( $line, $comment ) = strip_comments($line);
  359. if ($line) {
  360. ( $insn, $arg ) = parse_statement($line);
  361. if ($debug and !$comment and $insn =~ /_flags/) {
  362. $comment = sprintf("0x%x", $arg);
  363. }
  364. $out->( assemble_insn( $insn, $arg, $comment ) );
  365. if ($debug) {
  366. $out->( assemble_insn( "nop", undef ) );
  367. }
  368. }
  369. elsif ( $debug and $comment ) {
  370. $out->( assemble_insn( "nop", undef, $comment ) );
  371. }
  372. }
  373. ### temporary workaround
  374. ### interface via 2-3 args
  375. sub asm ($;$$) {
  376. return if $_[0] =~ /\s*\W/;
  377. if ( defined $_[1] ) {
  378. return
  379. if $_[1] eq "0"
  380. and $_[0] !~ /^(?:ldsv|stsv|newsvx?|newpadlx|av_pushx?|av_extend|xav_flags)$/;
  381. return if $_[1] eq "1" and $]>5.007 and $_[0] =~ /^(?:sv_refcnt)$/;
  382. }
  383. my ( $insn, $arg, $comment ) = @_;
  384. if ($] < 5.007) {
  385. if ($insn eq "newsvx") {
  386. $arg = $arg & 0xff; # sv not SVt_NULL
  387. $insn = "newsv";
  388. # XXX but this needs stsv tix-1 also
  389. } elsif ($insn eq "newopx") {
  390. $insn = "newop";
  391. } elsif ($insn eq "av_pushx") {
  392. $insn = "av_push";
  393. } elsif ($insn eq "ldspecsvx") {
  394. $insn = "ldspecsv";
  395. } elsif ($insn eq "gv_stashpvx") {
  396. $insn = "gv_stashpv";
  397. } elsif ($insn eq "gv_fetchpvx") {
  398. $insn = "gv_fetchpv";
  399. } elsif ($insn eq "main_cv") {
  400. return;
  401. }
  402. }
  403. $out->( assemble_insn( $insn, $arg, $comment ) );
  404. $linenum++;
  405. # assemble "@_";
  406. }
  407. 1;
  408. __END__
  409. =head1 NAME
  410. B::Assembler - Assemble Perl bytecode
  411. =head1 SYNOPSIS
  412. perl -MO=Bytecode,-S,-omy.asm my.pl
  413. assemble my.asm > my.plc
  414. use B::Assembler qw(newasm endasm assemble);
  415. newasm(\&printsub); # sets up for assembly
  416. assemble($buf); # assembles one line
  417. asm(opcode, arg, [comment]);
  418. endasm(); # closes down
  419. use B::Assembler qw(assemble_fh);
  420. assemble_fh($fh, \&printsub); # assemble everything in $fh
  421. =head1 DESCRIPTION
  422. B::Bytecode helper module.
  423. =head1 AUTHORS
  424. Malcolm Beattie C<MICB at cpan.org> I<(1996, retired)>,
  425. Per-statement interface by Benjamin Stuhl C<sho_pi@hotmail.com>,
  426. Reini Urban C<perl-compiler@googlegroups.com> I(2008-)
  427. =cut
  428. # Local Variables:
  429. # mode: cperl
  430. # cperl-indent-level: 2
  431. # fill-column: 100
  432. # End:
  433. # vim: expandtab shiftwidth=2: