encodings.pl 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # encodings.pl - Download IANA text and compute alias list.
  2. # Assumes you are running this program from gnu/gcj/convert/.
  3. # Output suitable for direct inclusion in IOConverter.java.
  4. # Map IANA canonical names onto our canonical names.
  5. %map = (
  6. 'ANSI_X3.4-1968' => 'ASCII',
  7. 'ISO_8859-1:1987' => '8859_1',
  8. 'UTF-8' => 'UTF8',
  9. 'Shift_JIS' => 'SJIS',
  10. 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS',
  11. 'UTF16-LE' => 'UnicodeLittle',
  12. 'UTF16-BE' => 'UnicodeBig'
  13. );
  14. if ($ARGV[0] eq '')
  15. {
  16. $file = 'character-sets';
  17. if (! -f $file)
  18. {
  19. # Too painful to figure out how to get Perl to do it.
  20. system 'wget -o .wget-log http://www.iana.org/assignments/character-sets';
  21. }
  22. }
  23. else
  24. {
  25. $file = $ARGV[0];
  26. }
  27. # Include canonical names in the output.
  28. foreach $key (keys %map)
  29. {
  30. $output{lc ($key)} = $map{$key};
  31. }
  32. open (INPUT, "< $file") || die "couldn't open $file: $!";
  33. $body = 0;
  34. $current = '';
  35. while (<INPUT>)
  36. {
  37. chop;
  38. $body = 1 if /^Name:/;
  39. next unless $body;
  40. if (/^$/)
  41. {
  42. $current = '';
  43. next;
  44. }
  45. ($type, $name) = split (/\s+/);
  46. # Encoding names are case-insensitive. We do all processing on
  47. # the lower-case form.
  48. my $lower = lc ($name);
  49. if ($type eq 'Name:')
  50. {
  51. $current = $map{$name};
  52. if ($current)
  53. {
  54. $output{$lower} = $current;
  55. }
  56. }
  57. elsif ($type eq 'Alias:')
  58. {
  59. # The IANA list has some ugliness.
  60. if ($name ne '' && $lower ne 'none' && $current)
  61. {
  62. $output{$lower} = $current;
  63. }
  64. }
  65. }
  66. close (INPUT);
  67. foreach $key (sort keys %output)
  68. {
  69. print " hash.put (\"$key\", \"$output{$key}\");\n";
  70. }