nodenormalization.t 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. use strict;
  2. BEGIN {
  3. require Texinfo::ModulePath;
  4. Texinfo::ModulePath::init(undef, undef, 'updirs' => 2);
  5. }
  6. use Test::More;
  7. BEGIN { plan tests => 8; }
  8. use Texinfo::Convert::NodeNameNormalization qw(normalize_node transliterate_texinfo);
  9. use Texinfo::Parser;
  10. use Data::Dumper;
  11. # Currently, tests check that NodeNameNormalization do not break with complete
  12. # Texinfo trees, not that the output is correct.
  13. my $srcdir = $ENV{'srcdir'};
  14. if (defined($srcdir)) {
  15. $srcdir =~ s/\/*$/\//;
  16. } else {
  17. $srcdir = '';
  18. }
  19. my $strings_textdomain = 'texinfo_document';
  20. Locale::Messages->select_package ('gettext_pp');
  21. Locale::Messages::bindtextdomain ('texinfo_document', 't/locales');
  22. my $parser = Texinfo::Parser::parser({'TEST' => 1,
  23. 'include_directories' => [
  24. 't/include_dir/',
  25. 't/include/',
  26. $srcdir.'t/include/'],
  27. 'expanded_formats' => ['html', 'tex']});
  28. my $invalid_line = '@noindent Text @titlefont{in titlefont} @anchor{in anchor}@footnote{footnote} @exdent exdent';
  29. my $tree = $parser->parse_texi_text($invalid_line);
  30. #print STDERR Data::Dumper->Dump([$tree]);
  31. my $normalized_invalid = normalize_node($tree);
  32. #print STDERR "Invalid: $normalized_invalid\n";
  33. # misc commands are ignored, this implies @node and sectioning contents
  34. # are ignored...
  35. my $node_texi = '@node Top
  36. in node
  37. @top top section
  38. in top section
  39. ';
  40. my $node_tree = $parser->parse_texi_text($node_texi);
  41. my $normalized_node = normalize_node($node_tree);
  42. is ($normalized_node, '', 'node ignored');
  43. # try on a full manual, but with node and sections commented out
  44. # as they are ignored
  45. my $texinfo_manual = '@setfilename toto.info
  46. @definfoenclose some, ;, ;
  47. @documentlanguage fr
  48. @settitle test manual
  49. @copying
  50. Your rights
  51. @sp 2
  52. Here
  53. @end copying
  54. @paragraphindent 6
  55. @c @node Top
  56. @c @top top @~e
  57. @insertcopying
  58. @noindent
  59. Para. @LaTeX{}, @sc{@AA{} bbb}. @image{unknown,,,}. @ref{index}
  60. @anchor{anchor}. @abbr{ABR, expl}. @abbr{ABR}.
  61. @~@@. @some{infoenclosed}. @today{}.
  62. @cindex index
  63. @html
  64. in html
  65. @end html
  66. @multitable {a@TeX{}} {b}
  67. @item gg @tab hhh
  68. @end multitable
  69. @example
  70. in example
  71. @vtable @emph
  72. @item item
  73. @itemx itemx
  74. in vtable.
  75. @end vtable
  76. @deffn a b {c} d e f
  77. in deffn
  78. @end deffn
  79. @exdent exdented
  80. @end example
  81. @flushright
  82. right
  83. @end flushright
  84. @float Theor@`eme, label
  85. in float
  86. @math{a \frac{a}{b} @\ @minus{}}
  87. @caption{in caption}
  88. @end float
  89. @menu
  90. * index::
  91. @end menu
  92. @c @node index
  93. @c @appendix appendix
  94. @printindex cp
  95. @heading list of floats
  96. @listoffloats Theor@`eme
  97. @bye
  98. ';
  99. my $manual_tree = $parser->parse_texi_text($texinfo_manual);
  100. my $check_texinfo = Texinfo::Convert::Texinfo::convert($manual_tree);
  101. is ($texinfo_manual, $check_texinfo, 'check manual parsing');
  102. #print STDERR Data::Dumper->Dump([$manual_tree]);
  103. my $normalized_manual = normalize_node($manual_tree);
  104. #print STDERR "Manual: $normalized_manual\n";
  105. ok($normalized_manual =~ /^[\w\-]+$/, 'normalized tree is a valid id');
  106. # Now test some node normalizations
  107. my $texi_line = 'A @sc{sc} accents @"i @"{@dotless{i}} @`{@=E} @l{} @,{@\'C} @={@,{@~{n}}} @v{@\'{r}} @={@~{@dotless{i}}} @"y @dotless{i} @dotless{j} @,{C} @ogonek{E} @udotaccent{a} @tieaccent{a} @dotaccent{a} characters @l{} @exclamdown{} @aa{} @oe{} @comma{} @error{} @today{} @dots{} @enddots{} no brace commands @@ @: @. @ @* @} signs -- --- `` \'\' !_"#$%&\'()*+-. /;<=>?[\\]^_`|~';
  108. my $line_tree = $parser->parse_texi_text($texi_line);
  109. my $normalized_line = normalize_node($line_tree);
  110. is ($normalized_line,
  111. 'A-SC-accents-_00ef-_00ef-_1e14-_0142-_1e08-_0146_0303_0304-_0155_030c-_0129_0304-_00ff-_0131-j-_00c7-_0118-_1ea1-a_0361-_0227-characters-_0142-_00a1-_00e5-_0153-_002c-error_002d_002d_003e--_2026-_002e_002e_002e-no-brace-commands-_0040--_002e-----_007d-signs-_002d_002d-_002d_002d_002d-_0060_0060-_0027_0027-_0021_005f_0022_0023_0024_0025_0026_0027_0028_0029_002a_002b_002d_002e-_002f_003b_003c_003d_003e_003f_005b_005c_005d_005e_005f_0060_007c_007e',
  112. 'normalized complex line');
  113. my $transliterated_line = transliterate_texinfo($line_tree);
  114. is ($transliterated_line,
  115. 'A-SC-accents-i-i-E-l-C-n-r-i-y-i-j-C-E-a-a-a-characters-l-_00a1-aa-oe-_002c-error_002d_002d_003e--_2026-_002e_002e_002e-no-brace-commands-_0040--_002e-----_007d-signs-_002d_002d-_002d_002d_002d-_0060_0060-_0027_0027-_0021_005f_0022_0023_0024_0025_0026_0027_0028_0029_002a_002b_002d_002e-_002f_003b_003c_003d_003e_003f_005b_005c_005d_005e_005f_0060_007c_007e',
  116. 'transliterated complex line');
  117. my $transliterated_line_no_unidecode = transliterate_texinfo($line_tree, 1);
  118. is ($transliterated_line_no_unidecode,
  119. 'A-SC-accents-i-i-_1e14-l-_1e08-n-r-i-y-_0131-j-C-E-a-a-a-characters-l-_00a1-aa-oe-_002c-error_002d_002d_003e--_2026-_002e_002e_002e-no-brace-commands-_0040--_002e-----_007d-signs-_002d_002d-_002d_002d_002d-_0060_0060-_0027_0027-_0021_005f_0022_0023_0024_0025_0026_0027_0028_0029_002a_002b_002d_002e-_002f_003b_003c_003d_003e_003f_005b_005c_005d_005e_005f_0060_007c_007e',
  120. 'transliterated complex line no unidecode');
  121. my $top_text = ' tOp';
  122. my $top_tree = $parser->parse_texi_text($top_text);
  123. my $top_normalized = normalize_node($top_tree);
  124. is ($top_normalized, 'Top', 'normalize Top node');
  125. my $top_and_spaces_text = 'TOP ';
  126. my $top_and_spaces_tree = $parser->parse_texi_text($top_and_spaces_text);
  127. my $top_and_spaces_normalized = normalize_node($top_and_spaces_tree);
  128. is ($top_and_spaces_normalized, 'TOP-', 'normalize Top node followed by spaces');