accents.t 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. use strict;
  2. BEGIN {
  3. require Texinfo::ModulePath;
  4. Texinfo::ModulePath::init(undef, undef, 'updirs' => 2);
  5. }
  6. use Test::More;
  7. BEGIN { plan tests => 63; }
  8. use Texinfo::Convert::Text;
  9. use Texinfo::Convert::Converter;
  10. use Texinfo::Convert::HTML;
  11. use Texinfo::Parser;
  12. ok(1, "modules loading");
  13. sub test_accent_stack ($)
  14. {
  15. my $test = shift;
  16. my $texi = $test->[0];
  17. my $name = $test->[1];
  18. my $reference = $test->[2];
  19. my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
  20. my $tree = $parser->parse_texi_text($texi);
  21. my ($contents, $commands_stack) =
  22. Texinfo::Common::find_innermost_accent_contents($tree->{'contents'}->[0]);
  23. my $text = Texinfo::Convert::Text::convert({'contents' => $contents});
  24. my @stack = map {$_->{'cmdname'}} @$commands_stack;
  25. if (defined($reference)) {
  26. ok ($reference eq join('|',($text, @stack)), 'innermost '.$name);
  27. } else {
  28. print STDERR join('|',($text, @stack))."\n";
  29. }
  30. }
  31. foreach my $test (['@~e', 'simple', 'e|~'],
  32. ['@~{@dotless{i}}','dotless','i|~|dotless'],
  33. ['@~{@c comment
  34. e}', 'comment', 'e|~'],
  35. ['@~{@@}','no_brace_command', '@|~'],
  36. ['@~{@TeX{}}','brace_no_arg_command', 'TeX|~'],
  37. ['@~{@TeX{}@^{a@dotless{i}}}','text_and_accent', 'i|~|^|dotless'],
  38. ['@~{@^{a}@ringaccent b}}','two_accents', 'a|~|^'],
  39. ) {
  40. test_accent_stack($test);
  41. }
  42. sub ord_hex_string($)
  43. {
  44. my $result = shift;
  45. my $ord = '';
  46. my $hex = '';
  47. foreach my $char (split '', $result) {
  48. $ord .= ord($char).'-';
  49. $hex .= sprintf("%04x-", ord($char));
  50. }
  51. $ord =~ s/-$//;
  52. $hex =~ s/-$//;
  53. return ($ord, $hex);
  54. }
  55. sub test_enable_encoding ($)
  56. {
  57. my $test = shift;
  58. my $texi = $test->[0];
  59. my $name = $test->[1];
  60. my $reference = $test->[2];
  61. my $reference_xml = $test->[3];
  62. my $reference_xml_entity = $test->[4];
  63. my $reference_unicode = $test->[5];
  64. my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
  65. my $text_root = $parser->parse_texi_text($texi);
  66. my $tree = $text_root->{'contents'}->[0];
  67. my ($contents, $commands_stack) =
  68. Texinfo::Common::find_innermost_accent_contents($tree);
  69. my $text = Texinfo::Convert::Text::convert({'contents' => $contents});
  70. my $result =
  71. Texinfo::Convert::Unicode::eight_bit_accents(undef, $text, $commands_stack,
  72. 'iso-8859-1', \&Texinfo::Convert::Text::ascii_accent_fallback);
  73. my $html_converter = Texinfo::Convert::HTML->converter();
  74. $html_converter->{'conf'}->{'USE_NUMERIC_ENTITY'} = 0;
  75. my $result_xml = Texinfo::Convert::Converter::xml_accents($html_converter,
  76. $tree);
  77. $html_converter->{'conf'}->{'USE_NUMERIC_ENTITY'} = 1;
  78. my $result_xml_entity
  79. = Texinfo::Convert::Converter::xml_accents($html_converter, $tree);
  80. ($contents, $commands_stack) =
  81. Texinfo::Common::find_innermost_accent_contents($tree);
  82. $text = Texinfo::Convert::Text::convert({'contents' => $contents},
  83. {'enabled_encoding' => 'utf-8'});
  84. my $result_unicode = Texinfo::Convert::Unicode::unicode_accents(undef, $text,
  85. $commands_stack, \&Texinfo::Convert::Text::ascii_accent_fallback);
  86. if (defined($reference)) {
  87. #ok (Encode::decode('iso-8859-1', $reference) eq $result, $name);
  88. #ok ($reference eq Encode::encode('iso-8859-1', $result), $name);
  89. is (Encode::encode('iso-8859-1', $result), $reference, $name);
  90. } else {
  91. my ($ord, $hex) = ord_hex_string($result);
  92. print STDERR "$name ($ord)--> utf8: ".Encode::encode('utf8', $result).
  93. " latin1: ".Encode::encode('iso-8859-1', $result)."\n";
  94. }
  95. if (defined($reference_xml)) {
  96. is ($result_xml, $reference_xml, "$name xml");
  97. } else {
  98. print STDERR "$name xml: $result_xml\n";
  99. #print STDERR "<p>$texi $name xml: $result_xml\n</p>";
  100. }
  101. if (defined($reference_xml_entity)) {
  102. is ($result_xml_entity, $reference_xml_entity, "$name xml");
  103. } else {
  104. print STDERR "$name xml entity: $result_xml_entity\n";
  105. #print STDERR "<p>$texi $name xml entity: $result_xml_entity\n</p>";
  106. }
  107. if (defined($reference_unicode)) {
  108. is ($result_unicode, $reference_unicode, "$name unicode");
  109. } else {
  110. my ($ord, $hex) = ord_hex_string($result);
  111. my ($ord_unicode, $hex_unicode) = ord_hex_string($result_unicode);
  112. print STDERR "$name ($ord/$hex)--> result utf8: ".Encode::encode('utf8', $result).
  113. " ($ord_unicode/$hex_unicode)--> unicode: ".Encode::encode('utf8', $result_unicode)."\n";
  114. }
  115. }
  116. sub chrx(@)
  117. {
  118. my $result = '';
  119. foreach my $hex_string(@_) {
  120. $result .= chr(hex($hex_string));
  121. }
  122. return $result;
  123. }
  124. # some come from encodings/weird_accents.texi
  125. foreach my $test (
  126. ['@~e', 'no 8bit encoding', "e~", 'e~', '&#7869;',
  127. chrx('1ebd')],
  128. ['@~n', 'simple encoding', chr(241), '&ntilde;',
  129. '&ntilde;', chrx('00f1')],
  130. ['@~{n}' , 'brace encoding', chr(241), '&ntilde;',
  131. '&ntilde;', chrx('00f1')],
  132. ['@^{@dotless{i}}', 'dotless', chr(238), '&icirc;',
  133. '&icirc;', chrx('00ee')],
  134. ['@~{@dotless{i}}', 'no 8bit dotless', 'i~', 'i~', '&#297;',
  135. chrx('0129')],
  136. ['@={@~{@dotless{i}}}', 'no 8 cplx dotless', 'i~=', 'i~=', '&#297;=',
  137. chrx('0129','0304')],
  138. ['@={@^{@dotless{i}}}', 'complex dotless', chr(238).'=', '&icirc;=',
  139. '&icirc;=',
  140. chrx('00ee','0304')],
  141. ['@={@,{@~{n}}}', 'complex encoding', chr(241).',=', '&ntilde;,=',
  142. '&ntilde;,=',
  143. chrx('0146','0303','0304')],
  144. ['@udotaccent{r}', 'udotaccent', '.r', '.r', '&#7771;',
  145. chrx('1e5b')],
  146. ['@={@ubaraccent{a}}', 'complex ubaraccent', 'a_=', 'a_=', 'a_=',
  147. chrx('0101','0332')],
  148. ['@^{@udotaccent{@`r}}', 'complex udotaccent', '.r`^', '.r`^', '.r`^',
  149. chrx('1e5b','0300','0302')],
  150. ['@v{@\'{r}}', 'utf8 possible inside', 'r\'<', 'r\'&lt;',
  151. '&#341;&lt;',
  152. chrx('0155','030c')],
  153. ['@={@code{@\'{@`{r}}}}', 'command in accent', '=', '=', '=', chrx('0304')]
  154. ) {
  155. test_enable_encoding($test);
  156. }
  157. #my $aa = Texinfo::Parser::parse_texi_line(undef, '@aa{}');
  158. my $res_e = Texinfo::Parser::parse_texi_line(undef, '@^e');
  159. my $result = Texinfo::Convert::Text::convert($res_e, {'enabled_encoding' => 'utf-8'});
  160. is ($result, "\x{00EA}", 'enable encoding @^e');
  161. my $res_aa = Texinfo::Parser::parse_texi_line(undef, '@aa{}');
  162. $result = Texinfo::Convert::Text::convert($res_aa, {'enabled_encoding' => 'utf-8'});
  163. is ($result, "\x{00E5}", 'enable encoding @aa{}');
  164. $result = Texinfo::Convert::Text::convert($res_aa, {'enabled_encoding' => 'iso-8859-1'});
  165. is ($result, "\x{00E5}", 'enable encoding latin1 @aa{}');
  166. #print STDERR "$result\n";
  167. #print STDERR "`$result'\n".ord($result)."\n".sprintf("%x\n",ord($result));
  168. #print STDERR "".Encode::encode('utf8', "\x{00E5}\n");
  169. 1;