parse_structinfo.pl 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. #!/usr/bin/perl
  2. # $OpenBSD: parse_structinfo.pl,v 1.3 2015/04/29 06:06:38 guenther Exp $
  3. #
  4. # Copyright (c) 2009 Miodrag Vallat.
  5. # Copyright (c) 2013 Philip Guenther.
  6. #
  7. # Permission to use, copy, modify, and distribute this software for any
  8. # purpose with or without fee is hereby granted, provided that the above
  9. # copyright notice and this permission notice appear in all copies.
  10. #
  11. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14. # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16. # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17. # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18. #
  19. # This ugly script parses the output of objdump -g in order to extract
  20. # structure layout information, to be used by ddb.
  21. #
  22. # The output of this script is the following static data:
  23. # - for each struct:
  24. # - its name
  25. # - its size (individual element size if an array)
  26. # - the number of elements in the array (1 if not)
  27. # - its first and last field indexes
  28. # - for each field:
  29. # - its name
  30. # - its offset and size
  31. # - the index of the struct it is member of
  32. # This allows fast struct -> field information retrieval.
  33. #
  34. # To retrieve information from a field size or offset, we also output
  35. # the following reverse arrays:
  36. # - for each offset, in ascending order, a variable length list of field
  37. # indexes.
  38. # - for each size, in ascending order, a variable length list of field
  39. # indexes.
  40. #
  41. # The compromise here is that I want to minimize linear searches. Memory
  42. # use is considered secondary, hence the back `pointer' to the struct in the
  43. # fields array.
  44. use strict;
  45. use warnings;
  46. use integer;
  47. use IO::File;
  48. use constant MAX_COLUMN => 72;
  49. my $depth = 0;
  50. my $ignore = 0;
  51. my $cur_struct;
  52. my $max_offs = 0;
  53. my $max_fsize = 0;
  54. my $max_ssize = 0;
  55. # Variables used in generating the raw, textual output
  56. my $txt; # IO::File to write to
  57. my @id2struct; # mapping of objdump's struct ids to @structs idxes
  58. my @subfield; # list of subfields to dump at the end
  59. # count of how many times each literal string appears
  60. my %strings;
  61. my @strings_by_len;
  62. sub add_string
  63. {
  64. my $string = shift;
  65. if ($strings{$string}++ == 0) {
  66. push @{ $strings_by_len[ length $string ] }, $string;
  67. }
  68. }
  69. my @structs;
  70. my %offs_to_fields;
  71. my %size_to_fields;
  72. my @fields = ( {
  73. name => "",
  74. offs => 0,
  75. size => 0,
  76. items => 0,
  77. struct => 0,
  78. } );
  79. sub new_field
  80. {
  81. my($name, $offs, $size, $items, $id) = @_;
  82. $items //= 1;
  83. add_string($name);
  84. push @fields, {
  85. name => $name,
  86. offs => $offs,
  87. size => $size,
  88. items => $items,
  89. struct => scalar(@structs),
  90. };
  91. $max_offs = $offs if $offs > $max_offs;
  92. $max_fsize = $size if $size > $max_fsize;
  93. push @{ $offs_to_fields{$offs} }, $#fields;
  94. push @{ $size_to_fields{$size} }, $#fields;
  95. if ($txt) {
  96. raw($offs, $size * $items, $cur_struct->{name}, $name);
  97. if (defined $id) {
  98. push @subfield, [ $cur_struct->{name}, $name, $offs, $id ];
  99. }
  100. }
  101. }
  102. # Generate textual output for those who are ddb challenged.
  103. $txt = IO::File->new("db_structinfo.txt", "w")
  104. or warn "$0: unable to create db_structinfo.txt: $!";
  105. sub raw {
  106. my($offs, $size, $struct, $member) = @_;
  107. $txt->print(join("\t", $offs, $size, $offs+$size, $struct, $member), "\n");
  108. }
  109. $txt and $txt->print(join("\t", qw(offset size next struct member)), "\n");
  110. while (<>) {
  111. chomp; # strip record separator
  112. if (m!^struct (\w+) \{ /\* size (\d+) id (\d+) !) {
  113. $depth = 1;
  114. $cur_struct = {
  115. name => $1,
  116. size => $2,
  117. fieldmin => scalar(@fields)
  118. };
  119. $id2struct[$3] = scalar(@structs);
  120. next
  121. }
  122. if (/^};/) {
  123. if ($depth == 0) {
  124. $ignore--;
  125. next
  126. }
  127. $depth = 0;
  128. if (scalar(@fields) == $cur_struct->{fieldmin}) {
  129. # empty struct, ignore it
  130. undef $cur_struct;
  131. next
  132. }
  133. $cur_struct->{fieldmax} = $#fields;
  134. add_string( $cur_struct->{name} );
  135. $max_ssize = $cur_struct->{size} if $cur_struct->{size} > $max_ssize;
  136. push @structs, $cur_struct;
  137. next
  138. }
  139. next if /\{.*\}/; # single line enum
  140. if (/\{/) {
  141. # subcomponent
  142. if ($depth) {
  143. $depth++;
  144. } else {
  145. $ignore++;
  146. }
  147. next
  148. }
  149. if (/\}/) {
  150. if ($ignore) {
  151. $ignore--;
  152. next
  153. }
  154. $depth--;
  155. next if $depth != 1;
  156. # FALL THROUGH
  157. }
  158. if (/bitsize (\d+), bitpos (\d+)/) {
  159. next if $ignore;
  160. next if $depth != 1;
  161. # Bitfields are a PITA... From a ddb point of view, we can't really
  162. # access storage units smaller than a byte.
  163. # So we'll report all bitfields as having size 0, and the
  164. # rounded down byte position where they start.
  165. my $cursize = ($1 % 8) ? 0 : ($1 / 8);
  166. my $curoffs = $2 / 8;
  167. # Try and gather the field name.
  168. # The most common case: not a function pointer or array
  169. if (m!\s(\**)(\w+);\s/\* bitsize!) {
  170. my $pointer = $1 ne "";
  171. my $name = $2;
  172. # check for a struct id to match up
  173. my($id) = !$pointer && m!/\* id (\d+) \*/.*;!;
  174. new_field($name, $curoffs, $cursize, 1, $id);
  175. next
  176. }
  177. # How about a function pointer?
  178. if (m!\s\**\(\*+(\w+)\) \(/\* unknown \*/\);\s/\* bitsize!) {
  179. new_field($1, $curoffs, $cursize);
  180. next
  181. }
  182. # Maybe it's an array
  183. if (m!\s(\**)([][:\w]+);\s/\* bitsize!) {
  184. my $pointer = $1 ne "";
  185. my $name = $2;
  186. my $items = 1;
  187. while ($name =~ s/\[(\d+)\]:\w+//) {
  188. $items *= $1;
  189. }
  190. # check for a struct id to match up
  191. my($id) = !$pointer && m!/\* id (\d+) \*/.*;!;
  192. new_field($name, $curoffs, $cursize / $items, $items, $id);
  193. next
  194. }
  195. # skip any anonymous unions {
  196. next if m!\}; /\*!;
  197. # Should be nothing left
  198. print STDERR "unknown member type: $_\n";
  199. next
  200. }
  201. }
  202. # Do all the subfield processing
  203. # XXX Should recurse into subsub...fields?
  204. foreach my $sf (@subfield) {
  205. my($struct_name, $name, $offs, $id) = @$sf;
  206. my $s = $id2struct[$id];
  207. # We don't remember unions. No point in doing so
  208. next if !defined $s;
  209. my $struct = $structs[$s];
  210. foreach my $i ($struct->{fieldmin} .. $struct->{fieldmax}) {
  211. my $f = $fields[$i];
  212. raw($offs + $f->{offs}, $f->{size} * $f->{items},
  213. $struct_name, "$name.$f->{name}");
  214. }
  215. }
  216. # Pick a type for ddb_field_off: if the offsets and sizes are all less than
  217. # 65536 then we'll use u_short, otherwise u_int.
  218. my $f_type = "u_short";
  219. if ($max_offs > 65535 || $max_fsize > 65535 || $max_ssize > 65535) {
  220. $f_type = "u_int";
  221. print STDERR "Using u_int for struct/field sizes and offsets\n";
  222. }
  223. print <<EOM;
  224. /*
  225. * THIS IS A GENERATED FILE. DO NOT EDIT!
  226. */
  227. #include <sys/param.h>
  228. #include <sys/types.h>
  229. typedef $f_type ddb_field_off;
  230. struct ddb_struct_info {
  231. u_short name;
  232. ddb_field_off size;
  233. u_short fmin, fmax;
  234. };
  235. struct ddb_field_info {
  236. u_short name;
  237. u_short sidx;
  238. ddb_field_off offs;
  239. ddb_field_off size;
  240. u_short nitems;
  241. };
  242. struct ddb_field_offsets {
  243. ddb_field_off offs;
  244. u_short list;
  245. };
  246. struct ddb_field_sizes {
  247. ddb_field_off size;
  248. u_short list;
  249. };
  250. EOM
  251. my $prefix = qq(static const char ddb_structfield_strings[] =\n\t"\\0);
  252. my %string_to_offset = ( "" => 0 );
  253. my $soff = 1;
  254. for (my $len = $#strings_by_len; $len > 0; $len--) {
  255. foreach my $string (@{ $strings_by_len[$len] }) {
  256. next if exists $string_to_offset{$string};
  257. my $off = $string_to_offset{$string} = $soff;
  258. $soff += $len + 1; # for the NUL
  259. print $prefix, $string;
  260. $prefix = qq(\\0"\n\t");
  261. # check for suffixes that are also strings
  262. for (my $o = 1; $o < $len; $o++) {
  263. my $sstr = substr($string, $o);
  264. next unless exists $strings{$sstr};
  265. next if exists $string_to_offset{$sstr};
  266. $string_to_offset{$sstr} = $off + $o;
  267. #print STDERR "found $sstr inside $string\n";
  268. }
  269. }
  270. }
  271. print qq(";\n);
  272. sub resolve_string
  273. {
  274. my $string = shift;
  275. if (! exists $string_to_offset{$string}) {
  276. die "no mapping for $string";
  277. }
  278. return $string_to_offset{$string};
  279. }
  280. # Check for overflow and, if so, print some stats
  281. if ($soff > 65535 || @structs > 65535 || @fields > 65535) {
  282. print STDERR <<EOM;
  283. ERROR: value out of range of u_short Time to change types?
  284. max string offset: $soff
  285. max field offset: $max_offs
  286. max field size: $max_fsize
  287. max struct size: $max_ssize
  288. number of structs: ${\scalar(@structs)}
  289. number of fields: ${\scalar(@fields)}
  290. EOM
  291. exit 1
  292. }
  293. print "#define NSTRUCT ", scalar(@structs), "\n";
  294. print "static const struct ddb_struct_info ddb_struct_info[NSTRUCT] = {\n";
  295. foreach my $s (@structs) {
  296. my $name = resolve_string($s->{name});
  297. print "\t{ ",
  298. join(", ", $name, @{$s}{qw( size fieldmin fieldmax )}),
  299. " },\n";
  300. }
  301. printf "};\n\n";
  302. print "#define NFIELD ", scalar(@fields), "\n";
  303. print "static const struct ddb_field_info ddb_field_info[NFIELD] = {\n";
  304. foreach my $f (@fields) {
  305. my $name = resolve_string($f->{name});
  306. print "\t{ ",
  307. join(", ", $name, @{$f}{qw( struct offs size items )}),
  308. " },\n";
  309. }
  310. printf "};\n\n";
  311. # Given a mapping from values to fields that have that value, generate
  312. # two C arrays: one containing lists of fields with each value, in order;
  313. # the other indexing into that one for each value. I.e., to get the
  314. # fields that have a given value, find the value in the second array and
  315. # then iterate from where that points into the first array until you hit
  316. # an entry with field==0.
  317. sub print_reverse_mapping
  318. {
  319. my($prefix, $map, $max) = @_;
  320. print "static const ddb_field_off ddb_fields_by_${prefix}[] = {";
  321. my @heads;
  322. my $w = 0;
  323. foreach my $val (sort { $a <=> $b } keys %$map) {
  324. push @heads, [$val, $w];
  325. foreach my $field (@{ $map->{$val} }, 0) {
  326. print( ($w++ % 10) == 0 ? "\n\t" : " ", $field, ",");
  327. }
  328. }
  329. print "\n};\n\n";
  330. print "#define $max ", scalar(@heads), "\n";
  331. print "static const struct ddb_field_${prefix}s",
  332. " ddb_field_${prefix}s[$max] = {\n";
  333. foreach my $h (@heads) {
  334. print "\t{ $h->[0], $h->[1] },\n";
  335. }
  336. print "};\n";
  337. }
  338. # reverse arrays
  339. print_reverse_mapping("offset", \%offs_to_fields, "NOFFS");
  340. print "\n";
  341. # The size->field mapping isn't used by ddb currently, so don't output it
  342. # print_reverse_mapping("size", \%size_to_fields, "NSIZES");