assembler.t 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. #!./perl -w
  2. =pod
  3. =head1 TEST FOR B::Assembler.pm AND B::Disassembler.pm
  4. =head2 Description
  5. The general idea is to test by assembling a choice set of assembler
  6. instructions, then disassemble them, and check that we've completed the
  7. round trip. Also, error checking of Assembler.pm is tested by feeding
  8. it assorted errors.
  9. Since Assembler.pm likes to assemble a file, we comply by writing a
  10. text file. This file contains three sections:
  11. testing operand categories
  12. use each opcode
  13. erronous assembler instructions
  14. An "operand category" is identified by the suffix of the PUT_/GET_
  15. subroutines as shown in the C<%Asmdata::insn_data> initialization, e.g.
  16. opcode C<ldsv> has operand category C<svindex>:
  17. insn_data{ldsv} = [1, \&PUT_svindex, "GET_svindex"];
  18. Because Disassembler.pm also assumes input from a file, we write the
  19. resulting object code to a file. And disassembled output is written to
  20. yet another text file which is then compared to the original input.
  21. (Erronous assembler instructions still generate code, but this is not
  22. written to the object file; therefore disassembly bails out at the first
  23. instruction in error.)
  24. All files are kept in memory by using TIEHASH.
  25. =head2 Caveats
  26. An error where Assembler.pm and Disassembler.pm agree but Assembler.pm
  27. generates invalid object code will not be detected.
  28. Due to the way this test has been set up, failure of a single test
  29. could cause all subsequent tests to fail as well: After an unexpected
  30. assembler error no output is written, and disassembled lines will be
  31. out of sync for all lines thereafter.
  32. Not all possibilities for writing a valid operand value can be tested
  33. because disassembly results in a uniform representation.
  34. =head2 Maintenance
  35. New opcodes are added automatically.
  36. A new operand category will cause this program to die ("no operand list
  37. for XXX"). The cure is to add suitable entries to C<%goodlist> and
  38. C<%badlist>. (Since the data in Asmdata.pm is autogenerated, it may also
  39. happen that the corresponding assembly or disassembly subroutine is
  40. missing.) Note that an empty array as a C<%goodlist> entry means that
  41. opcodes of the operand category do not take an operand (and therefore the
  42. corresponding entry in C<%badlist> should have one). An C<undef> entry
  43. in C<%badlist> means that any value is acceptable (and thus there is no
  44. way to cause an error).
  45. Set C<$dbg> to debug this test.
  46. B::Disassembler was enhanced to add comments about some insn.
  47. The additional third verbose argument for easier roundtrip checking
  48. is ignored.
  49. =cut
  50. package VirtFile;
  51. use strict;
  52. # Note: This is NOT a general purpose package. It implements
  53. # sequential text and binary file i/o in a rather simple form.
  54. sub TIEHANDLE($;$){
  55. my( $class, $data ) = @_;
  56. my $obj = { data => defined( $data ) ? $data : '',
  57. pos => 0 };
  58. return bless( $obj, $class );
  59. }
  60. sub PRINT($@){
  61. my( $self ) = shift;
  62. $self->{data} .= join( '', @_ );
  63. }
  64. sub WRITE($$;$$){
  65. my( $self, $buf, $len, $offset ) = @_;
  66. unless( defined( $len ) ){
  67. $len = length( $buf );
  68. $offset = 0;
  69. }
  70. unless( defined( $offset ) ){
  71. $offset = 0;
  72. }
  73. $self->{data} .= substr( $buf, $offset, $len );
  74. return $len;
  75. }
  76. sub GETC($){
  77. my( $self ) = @_;
  78. return undef() if $self->{pos} >= length( $self->{data} );
  79. return substr( $self->{data}, $self->{pos}++, 1 );
  80. }
  81. sub READLINE($){
  82. my( $self ) = @_;
  83. return undef() if $self->{pos} >= length( $self->{data} );
  84. # Todo; strip comments and empty lines
  85. my $lfpos = index( $self->{data}, "\n", $self->{pos} );
  86. if( $lfpos < 0 ){
  87. $lfpos = length( $self->{data} );
  88. }
  89. my $pos = $self->{pos};
  90. $self->{pos} = $lfpos + 1;
  91. return substr( $self->{data}, $pos, $self->{pos} - $pos );
  92. }
  93. sub READ($@){
  94. my $self = shift();
  95. my $bufref = \$_[0];
  96. my( undef, $len, $offset ) = @_;
  97. if( $offset ){
  98. die( "offset beyond end of buffer\n" )
  99. if ! defined( $$bufref ) || $offset > length( $$bufref );
  100. } else {
  101. $$bufref = '';
  102. $offset = 0;
  103. }
  104. my $remlen = length( $self->{data} ) - $self->{pos};
  105. $len = $remlen if $remlen < $len;
  106. return 0 unless $len;
  107. substr( $$bufref, $offset, $len ) =
  108. substr( $self->{data}, $self->{pos}, $len );
  109. $self->{pos} += $len;
  110. return $len;
  111. }
  112. sub TELL($){
  113. my $self = shift();
  114. return $self->{pos};
  115. }
  116. sub CLOSE($){
  117. my( $self ) = @_;
  118. $self->{pos} = 0;
  119. }
  120. 1;
  121. package main;
  122. use strict;
  123. use Test::More;
  124. use Config qw(%Config);
  125. BEGIN {
  126. if ($ENV{PERL_CORE} and ($Config{'extensions'} !~ /\bB\b/) ){
  127. print "1..0 # Skip -- Perl configured without B module\n";
  128. exit 0;
  129. }
  130. if ($ENV{PERL_CORE} and ($Config{'extensions'} !~ /\bByteLoader\b/) ){
  131. print "1..0 # Skip -- Perl configured without ByteLoader module\n";
  132. exit 0;
  133. }
  134. if ($] < 5.007 ){
  135. print "1..0 # Skip -- use the CORE Perl assembler instead, which cannot be tested like this.\n";
  136. exit 0;
  137. }
  138. }
  139. use B::Asmdata qw( %insn_data );
  140. use B::Assembler qw( &assemble_fh );
  141. use B::Disassembler qw( &disassemble_fh &get_header );
  142. my( %opsByType, @code2name );
  143. my( $lineno, $dbg, $firstbadline, @descr );
  144. $dbg = 0; # debug switch
  145. # $SIG{__WARN__} handler to catch Assembler error messages
  146. #
  147. my $warnmsg;
  148. sub catchwarn($){
  149. $warnmsg = $_[0];
  150. print "# error: $warnmsg\n" if $dbg;
  151. }
  152. # Callback for writing assembled bytes. This is where we check
  153. # that we do get an error.
  154. #
  155. sub putobj($){
  156. if( ++$lineno >= $firstbadline ){
  157. ok( $warnmsg && $warnmsg =~ /^\d+:\s/, $descr[$lineno] );
  158. undef( $warnmsg );
  159. } else {
  160. my $l = syswrite( OBJ, $_[0] );
  161. }
  162. }
  163. # Callback for writing a disassembled statement.
  164. # Fixed to support the new optional verbose argument, which we ignore here.
  165. sub putdis(@){
  166. my ($insn, $arg, $verbose) = @_;
  167. my $line = defined($arg) ? "$insn $arg" : $insn;
  168. ++$lineno;
  169. print DIS "$line\n";
  170. if ($dbg) {
  171. $verbose = 0 unless $verbose;
  172. printf ("# %5d %s verbose:%d\n", $lineno, $line, $verbose);
  173. }
  174. }
  175. # Generate assembler instructions from a hash of operand types: each
  176. # existing entry contains a list of good or bad operand values. The
  177. # corresponding opcodes can be found in %opsByType.
  178. #
  179. sub gen_type($$$){
  180. my( $href, $descref, $text ) = @_;
  181. for my $odt ( sort( keys( %opsByType ) ) ){
  182. my $opcode = $opsByType{$odt}->[0];
  183. my $sel = $odt;
  184. $sel =~ s/^GET_//;
  185. die( "no operand list for $sel\n" ) unless exists( $href->{$sel} );
  186. if( defined( $href->{$sel} ) ){
  187. if( @{$href->{$sel}} ){
  188. for my $od ( @{$href->{$sel}} ){
  189. ++$lineno;
  190. $descref->[$lineno] = "$text: $code2name[$opcode] $od";
  191. print ASM "$code2name[$opcode] $od\n";
  192. printf "# %5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
  193. }
  194. } else {
  195. ++$lineno;
  196. $descref->[$lineno] = "$text: $code2name[$opcode]";
  197. print ASM "$code2name[$opcode]\n";
  198. printf "# %5d %s\n", $lineno, $code2name[$opcode] if $dbg;
  199. }
  200. }
  201. }
  202. }
  203. # Interesting operand values
  204. #
  205. my %goodlist = (
  206. comment_t => [ '"a comment"' ], # no \n
  207. none => [],
  208. svindex => [ 0x7fffffff, 0 ],
  209. opindex => [ 0x7fffffff, 0 ],
  210. pvindex => [ 0x7fffffff, 0 ],
  211. hekindex => [ 0x7fffffff, 0 ],
  212. pmflags => ($] < 5.013 ? [ 0xff, 0 ] : [ 0xffffffff, 0 ]),
  213. U32 => [ 0xffffffff, 0 ],
  214. U8 => [ 0xff, 0 ],
  215. PV => [ '""', '"a string"', ],
  216. I32 => [ -0x80000000, 0x7fffffff ],
  217. IV64 => [ '0x000000000', '0x0ffffffff', '0x000000001' ], # disass formats 0x%09x
  218. IV => $Config{ivsize} == 4 ?
  219. [ -0x80000000, 0x7fffffff ] :
  220. [ '0x000000000', '0x0ffffffff', '0x000000001' ],
  221. NV => [ 1.23456789E3 ],
  222. U16 => [ 0xffff, 0 ],
  223. pvcontents => [],
  224. strconst => [ '""', '"another string"' ], # no NUL
  225. op_tr_array => [ join( ',', 256, 0..255 ) ],
  226. PADOFFSET => undef,
  227. long => undef,
  228. );
  229. # Erronous operand values
  230. #
  231. my %badlist = (
  232. comment_t => [ '"multi-line\ncomment"' ], # no \n
  233. none => [ '"spurious arg"' ],
  234. svindex => [ 0xffffffff * 2, -1 ],
  235. opindex => [ 0xffffffff * 2, -2 ],
  236. pvindex => [ 0xffffffff * 2, -3 ],
  237. hekindex => [ 0xffffffff * 2, -4 ],
  238. pmflags => ($] < 5.013 ? [ 0x5ffff, -5 ] : [ 0xffffffff * 2, -5 ]),
  239. U32 => [ 0xffffffff * 2, -5 ],
  240. U16 => [ 0x5ffff, -5 ],
  241. U8 => [ 0x6ff, -6 ],
  242. PV => [ 'no quote"' ],
  243. I32 => [ -0x80000001, 0x80000000 ],
  244. IV64 => undef, # PUT_IV64 doesn't check - no integrity there
  245. IV => $Config{ivsize} == 4 ?
  246. [ -0x80000001, 0x80000000 ] : undef,
  247. NV => undef, # PUT_NV accepts anything - it shouldn't, real-ly
  248. pvcontents => [ '"spurious arg"' ],
  249. strconst => [ 'no quote"', '"with NUL '."\0".' char"' ], # no NUL
  250. op_tr_array => undef, # op_pv_tr is no longer exactly 256 shorts
  251. PADOFFSET => undef,
  252. long => undef,
  253. );
  254. # Determine all operand types from %Asmdata::insn_data
  255. #
  256. for my $opname ( keys( %insn_data ) ){
  257. my ( $opcode, $put, $getname ) = @{$insn_data{$opname}};
  258. push( @{$opsByType{$getname}}, $opcode ) if $put;
  259. $code2name[$opcode] = $opname if $put;
  260. }
  261. # Write instruction(s) for correct operand values each operand type class
  262. #
  263. $lineno = 0;
  264. tie( *ASM, 'VirtFile' );
  265. gen_type( \%goodlist, \@descr, 'round trip' );
  266. # Write one instruction for each opcode.
  267. #
  268. for my $opcode ( 0..$#code2name ){
  269. next unless defined( $code2name[$opcode] );
  270. my $sel = $insn_data{$code2name[$opcode]}->[2];
  271. $sel =~ s/^GET_//;
  272. die( "no operand list for $sel\n" ) unless exists( $goodlist{$sel} );
  273. if( defined( $goodlist{$sel} ) ){
  274. ++$lineno;
  275. if( @{$goodlist{$sel}} ){
  276. my $od = $goodlist{$sel}[0];
  277. $descr[$lineno] = "round trip: $code2name[$opcode] $od";
  278. print ASM "$code2name[$opcode] $od\n";
  279. printf "# %5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
  280. } else {
  281. $descr[$lineno] = "round trip: $code2name[$opcode]";
  282. print ASM "$code2name[$opcode]\n";
  283. printf "# %5d %s\n", $lineno, $code2name[$opcode] if $dbg;
  284. }
  285. }
  286. }
  287. # Write instruction(s) for incorrect operand values each operand type class
  288. #
  289. $firstbadline = $lineno + 1;
  290. gen_type( \%badlist, \@descr, 'asm error' );
  291. # invalid opcode is an odd-man-out ;-)
  292. #
  293. ++$lineno;
  294. $descr[$lineno] = "asm error: Gollum";
  295. print ASM "Gollum\n";
  296. printf "# %5d %s\n", $lineno, 'Gollum' if $dbg;
  297. close( ASM );
  298. # Now that we have defined all of our tests: plan
  299. #
  300. plan( tests => $lineno );
  301. print "# firstbadline=$firstbadline\n" if $dbg;
  302. # assemble (guard against warnings and death from assembly errors)
  303. #
  304. $SIG{'__WARN__'} = \&catchwarn;
  305. $lineno = -1; # account for the assembly header
  306. tie( *OBJ, 'VirtFile' );
  307. eval { assemble_fh( \*ASM, \&putobj ); };
  308. print "# eval: $@" if $dbg;
  309. close( ASM );
  310. close( OBJ );
  311. $SIG{'__WARN__'} = 'DEFAULT';
  312. # disassemble
  313. #
  314. print "# --- disassembling ---\n" if $dbg;
  315. $lineno = 0;
  316. tie( *DIS, 'VirtFile' );
  317. disassemble_fh( \*OBJ, \&putdis );
  318. close( OBJ );
  319. close( DIS );
  320. # get header (for debugging only)
  321. #
  322. if( $dbg ){
  323. my( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder, $longsize, $archflag ) =
  324. get_header();
  325. printf "# Magic: 0x%08x\n", $magic;
  326. print "# Architecture: $archname\n";
  327. print "# Byteloader V: $blversion\n";
  328. print "# ivsize: $ivsize\n";
  329. print "# ptrsize: $ptrsize\n";
  330. print "# longsize: $longsize\n";
  331. printf "# Byteorder: $byteorder\n";
  332. print "# archflag: $archflag\n";
  333. }
  334. # check by comparing files line by line
  335. #
  336. print "# --- checking ---\n" if $dbg;
  337. $lineno = 0;
  338. our( $asmline, $disline );
  339. while( defined( $asmline = <ASM> ) ){
  340. $disline = <DIS>;
  341. ++$lineno;
  342. last if $lineno eq $firstbadline; # bail out where errors begin
  343. if ($asmline ne $disline) {
  344. # $asmline might be hex, if < 8 it will be disassembled as int
  345. my ($op, $n) = $asmline =~ /(\w+) (0x\d+)/;
  346. if ($n =~ /^0x/ and hex($n) < 8) {
  347. $asmline = "$op ".hex($n)."\n";
  348. }
  349. }
  350. ok( $asmline eq $disline, $descr[$lineno] );
  351. printf "# %5d %s# = %s\n", $lineno, $asmline, $disline if $dbg;
  352. }
  353. close( ASM );
  354. close( DIS );
  355. __END__