texi-elements-by-size 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #! /usr/bin/env perl
  2. # texi-elements-by-size -- dump list of elements based on words or line counts.
  3. # Also serve as an example of using the Texinfo::Parser module,
  4. # including the usual per-format options.
  5. #
  6. # Copyright 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
  7. #
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 3 of the License,
  11. # or (at your option) any later version.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. #
  21. # Original author: Patrice Dumas <pertusus@free.fr>
  22. use strict;
  23. use Config; # to determine the path separator
  24. use Getopt::Long qw(GetOptions);
  25. Getopt::Long::Configure("gnu_getopt");
  26. BEGIN {
  27. # The purpose of these includes is to make it possible to run the
  28. # script from a Texinfo source checkout. If that's not relevant,
  29. # probably best to simply assume all the needed packages are in the
  30. # Perl include path.
  31. #
  32. (my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
  33. my $txi_libdir = "$mydir/../tp"; # find tp relative to $0
  34. unshift (@INC, $txi_libdir);
  35. #
  36. my @txi_maint_dirs = qw(Text-Unidecode Unicode-EastAsianWidth libintl-perl);
  37. unshift (@INC, map { "$txi_libdir/maintain/lib/$_/lib" } @txi_maint_dirs );
  38. }
  39. use Texinfo::Parser;
  40. use Texinfo::Structuring;
  41. use Texinfo::Convert::TextContent;
  42. my $my_version = "0.1 (TP $Texinfo::Parser::VERSION)";
  43. (my $real_command_name = $0) =~ s/.*\///;
  44. $real_command_name =~ s/\.pl$//;
  45. # determine the path separators
  46. my $path_separator = $Config{'path_sep'};
  47. $path_separator = ':' if (!defined($path_separator));
  48. my $quoted_path_separator = quotemeta($path_separator);
  49. my $force = 0;
  50. my $use_sections = 0;
  51. my $count_words = 0;
  52. my $no_warn = 0;
  53. # placeholder for future i18n.
  54. sub __($) {
  55. return $_[0];
  56. }
  57. my $format = 'info'; # make our counts from the Info output
  58. # this is the format associated with the output format, which is replaced
  59. # when the output format changes. It may also be removed if there is the
  60. # corresponding --no-ifformat.
  61. #my $default_expanded_format = [ $format ];
  62. # directories specified on the command line.
  63. my @include_dirs = ();
  64. my @prepend_dirs = ();
  65. my $parser_default_options = {
  66. #'expanded_formats' => [],
  67. 'expanded_formats' => [ $format ],
  68. 'values' => {},
  69. #'gettext' => \&__
  70. };
  71. sub set_expansion($$) {
  72. my $region = shift;
  73. my $set = shift;
  74. $set = 1 if (!defined($set));
  75. if ($set) {
  76. push @{$parser_default_options->{'expanded_formats'}}, $region
  77. unless (grep {$_ eq $region} @{$parser_default_options->{'expanded_formats'}});
  78. } else {
  79. @{$parser_default_options->{'expanded_formats'}} =
  80. grep {$_ ne $region} @{$parser_default_options->{'expanded_formats'}};
  81. # @{$default_expanded_format}
  82. # = grep {$_ ne $region} @{$default_expanded_format};
  83. }
  84. }
  85. my $result_options = Getopt::Long::GetOptions (
  86. 'help|h' => sub { print help(); exit 0; },
  87. 'version|V' => sub {print "$real_command_name $my_version\n\n";
  88. printf __("Copyright (C) %s Free Software Foundation, Inc.
  89. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
  90. This is free software: you are free to change and redistribute it.
  91. There is NO WARRANTY, to the extent permitted by law.\n"), "2016";
  92. exit 0;},
  93. 'force' => \$force,
  94. 'ifhtml!' => sub { set_expansion('html', $_[1]); },
  95. 'ifinfo!' => sub { set_expansion('info', $_[1]); },
  96. 'ifxml!' => sub { set_expansion('xml', $_[1]); },
  97. 'ifdocbook!' => sub { set_expansion('docbook', $_[1]); },
  98. 'iftex!' => sub { set_expansion('tex', $_[1]); },
  99. 'ifplaintext!' => sub { set_expansion('plaintext', $_[1]); },
  100. 'use-sections!' => \$use_sections,
  101. 'count-words!' => \$count_words,
  102. 'no-warn' => \$no_warn,
  103. 'D=s' => sub {$parser_default_options->{'values'}->{$_[1]} = 1;},
  104. 'U=s' => sub {delete $parser_default_options->{'values'}->{$_[1]};},
  105. 'I=s' => sub {
  106. push @include_dirs, split(/$quoted_path_separator/, $_[1]); },
  107. 'P=s' => sub { unshift @prepend_dirs, split(/$quoted_path_separator/, $_[1]); },
  108. 'number-sections!' => sub { set_from_cmdline('NUMBER_SECTIONS', $_[1]); },
  109. );
  110. exit 1 if (!$result_options);
  111. my @input_files = @ARGV;
  112. # use STDIN if not a tty, like makeinfo does
  113. @input_files = ('-') if (!scalar(@input_files) and !-t STDIN);
  114. die sprintf(__("%s: missing file argument.\n"), $real_command_name)
  115. .sprintf(__("Try `%s --help' for more information.\n"), $real_command_name)
  116. unless (scalar(@input_files) >= 1);
  117. if (scalar(@input_files) > 1) {
  118. warn sprintf(__("%s: superfluous file arguments: @input_files\n"),
  119. $real_command_name);
  120. }
  121. my $input_file_name = shift @input_files;
  122. sub help() {
  123. my $help =
  124. sprintf(__("Usage: %s [OPTION]... TEXINFO-FILE...\n"), $real_command_name)
  125. ."\n".
  126. __("Write to standard output a list of Texinfo elements (nodes or sections)
  127. sorted by the number of lines (or words) they contain,
  128. after translation to Info format.\n")
  129. ."\n";
  130. $help .= __("General Options:
  131. --count-words count words instead of lines.
  132. --force keep going even if Texinfo file parsing fails.
  133. --help display this help and exit.
  134. --no-warn suppress warnings (but not errors).
  135. --use-sections use sections as elements instead of nodes.
  136. --version display version information and exit.\n")
  137. ."\n";
  138. $help .= __("Input file options:
  139. -D VAR define the variable VAR, as with \@set.
  140. -I DIR append DIR to the \@include search path.
  141. -P DIR prepend DIR to the \@include search path.
  142. -U VAR undefine the variable VAR, as with \@clear.\n")
  143. ."\n";
  144. $help .= __("Conditional processing in input:
  145. --ifdocbook process \@ifdocbook and \@docbook.
  146. --ifhtml process \@ifhtml and \@html.
  147. --ifinfo process \@ifinfo.
  148. --ifplaintext process \@ifplaintext.
  149. --iftex process \@iftex and \@tex.
  150. --ifxml process \@ifxml and \@xml.
  151. --no-ifdocbook do not process \@ifdocbook and \@docbook text.
  152. --no-ifhtml do not process \@ifhtml and \@html text.
  153. --no-ifinfo do not process \@ifinfo text.
  154. --no-ifplaintext do not process \@ifplaintext text.
  155. --no-iftex do not process \@iftex and \@tex text.
  156. --no-ifxml do not process \@ifxml and \@xml text.
  157. Also, for the --no-ifFORMAT options, do process \@ifnotFORMAT text.\n");
  158. return $help;
  159. }
  160. sub _exit($) {
  161. my $error_count = shift;
  162. exit (1) if ($error_count and !$force);
  163. }
  164. sub handle_errors($$) {
  165. my $self = shift;
  166. my $error_count = shift;
  167. my ($errors, $new_error_count) = $self->errors();
  168. $error_count += $new_error_count if ($new_error_count);
  169. foreach my $error_message (@$errors) {
  170. warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error'
  171. or !$no_warn);
  172. }
  173. _exit($error_count);
  174. return $error_count;
  175. }
  176. my $input_directory = '.';
  177. if ($input_file_name =~ /(.*\/)/) {
  178. $input_directory = $1;
  179. }
  180. my $parser_options = { %$parser_default_options };
  181. $parser_options->{'include_directories'} = [@include_dirs];
  182. my @prepended_include_directories = ('.');
  183. push @prepended_include_directories, $input_directory
  184. if ($input_directory ne '.');
  185. unshift @{$parser_options->{'include_directories'}},
  186. @prepended_include_directories;
  187. unshift @{$parser_options->{'include_directories'}}, @prepend_dirs;
  188. my $error_count = 0;
  189. my $parser = Texinfo::Parser::parser($parser_options);
  190. my $tree = $parser->parse_texi_file($input_file_name);
  191. if (!defined($tree)) {
  192. handle_errors($parser, $error_count);
  193. exit (1);
  194. }
  195. my $converter_options = {};
  196. $converter_options->{'parser'} = $parser;
  197. my $converter = Texinfo::Convert::TextContent->converter($converter_options);
  198. my ($sorted_name_counts_array, $formatted_result)
  199. = $converter->sort_element_counts($tree, $use_sections,
  200. $count_words);
  201. print STDOUT $formatted_result;
  202. exit (0);