123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- #! /usr/local/bin/perl -w
- # vim: tabstop=4
- # vim: syntax=perl
- use strict;
- use Test;
- BEGIN {
- plan tests => 7;
- }
- use Locale::Recode;
- sub int2utf8;
- my $local2ucs = {};
- my $ucs2local = {};
- while (<DATA>) {
- my ($code, $ucs, undef) = map { oct $_ } split /\s+/, $_;
- $local2ucs->{$code} = $ucs;
- $ucs2local->{$ucs} = $code unless $ucs == 0xfffd;
- }
- my $cd_int = Locale::Recode->new (from => 'INIS-CYRILLIC',
- to => 'INTERNAL');
- ok !$cd_int->getError;
- my $cd_utf8 = Locale::Recode->new (from => 'INIS-CYRILLIC',
- to => 'UTF-8');
- ok !$cd_utf8->getError;
- my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
- to => 'INIS-CYRILLIC');
- ok !$cd_rev->getError;
- # Convert into internal representation.
- my $result_int = 1;
- while (my ($code, $ucs) = each %$local2ucs) {
- my $outbuf = chr $code;
- my $result = $cd_int->recode ($outbuf);
- unless ($result && $outbuf->[0] == $ucs) {
- $result_int = 0;
- last;
- }
- }
- ok $result_int;
- # Convert to UTF-8.
- my $result_utf8 = 1;
- while (my ($code, $ucs) = each %$local2ucs) {
- my $outbuf = chr $code;
- my $result = $cd_utf8->recode ($outbuf);
- unless ($result && $outbuf eq int2utf8 $ucs) {
- $result_utf8 = 0;
- last;
- }
- }
- ok $result_utf8;
- # Convert from internal representation.
- my $result_rev = 1;
- while (my ($ucs, $code) = each %$ucs2local) {
- my $outbuf = [ $ucs ];
- my $result = $cd_rev->recode ($outbuf);
- unless ($result && $code == ord $outbuf) {
- $result_int = 0;
- last;
- }
- }
- ok $result_int;
- # Check handling of unknown characters.
- my $test_string1 = [ unpack 'c*', ' Supergirl ' ];
- $test_string1->[0] = 0xad0be;
- $test_string1->[-1] = 0xbeefbabe;
- my $test_string2 = [ unpack 'c*', 'Supergirl' ];
- my $unknown = "\x3f"; # Unknown character!
- $cd_rev = Locale::Recode->new (from => 'INTERNAL',
- to => 'INIS-CYRILLIC',
- )
- && $cd_rev->recode ($test_string1)
- && $cd_rev->recode ($test_string2)
- && ($test_string2 = $unknown . $test_string2 . $unknown);
- ok $test_string1 eq $test_string2;
- sub int2utf8
- {
- my $ucs4 = shift;
-
- if ($ucs4 <= 0x7f) {
- return chr $ucs4;
- } elsif ($ucs4 <= 0x7ff) {
- return pack ("C2",
- (0xc0 | (($ucs4 >> 6) & 0x1f)),
- (0x80 | ($ucs4 & 0x3f)));
- } elsif ($ucs4 <= 0xffff) {
- return pack ("C3",
- (0xe0 | (($ucs4 >> 12) & 0xf)),
- (0x80 | (($ucs4 >> 6) & 0x3f)),
- (0x80 | ($ucs4 & 0x3f)));
- } elsif ($ucs4 <= 0x1fffff) {
- return pack ("C4",
- (0xf0 | (($ucs4 >> 18) & 0x7)),
- (0x80 | (($ucs4 >> 12) & 0x3f)),
- (0x80 | (($ucs4 >> 6) & 0x3f)),
- (0x80 | ($ucs4 & 0x3f)));
- } elsif ($ucs4 <= 0x3ffffff) {
- return pack ("C5",
- (0xf0 | (($ucs4 >> 24) & 0x3)),
- (0x80 | (($ucs4 >> 18) & 0x3f)),
- (0x80 | (($ucs4 >> 12) & 0x3f)),
- (0x80 | (($ucs4 >> 6) & 0x3f)),
- (0x80 | ($ucs4 & 0x3f)));
- } else {
- return pack ("C6",
- (0xf0 | (($ucs4 >> 30) & 0x3)),
- (0x80 | (($ucs4 >> 24) & 0x1)),
- (0x80 | (($ucs4 >> 18) & 0x3f)),
- (0x80 | (($ucs4 >> 12) & 0x3f)),
- (0x80 | (($ucs4 >> 6) & 0x3f)),
- (0x80 | ($ucs4 & 0x3f)));
- }
- }
- #Local Variables:
- #mode: perl
- #perl-indent-level: 4
- #perl-continued-statement-offset: 4
- #perl-continued-brace-offset: 0
- #perl-brace-offset: -4
- #perl-brace-imaginary-offset: 0
- #perl-label-offset: -4
- #tab-width: 4
- #End:
- __DATA__
- 0x00 0x0000
- 0x01 0x0001
- 0x02 0x0002
- 0x03 0x0003
- 0x04 0x0004
- 0x05 0x0005
- 0x06 0x0006
- 0x07 0x0007
- 0x08 0x0008
- 0x09 0x0009
- 0x0a 0x000a
- 0x0b 0x000b
- 0x0c 0x000c
- 0x0d 0x000d
- 0x0e 0x000e
- 0x0f 0x000f
- 0x10 0x0010
- 0x11 0x0011
- 0x12 0x0012
- 0x13 0x0013
- 0x14 0x0014
- 0x15 0x0015
- 0x16 0x0016
- 0x17 0x0017
- 0x18 0x0018
- 0x19 0x0019
- 0x1a 0x001a
- 0x1b 0x001b
- 0x1c 0x001c
- 0x1d 0x001d
- 0x1e 0x001e
- 0x1f 0x001f
- 0x20 0x0020
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0xfffd
- 0x2c 0x221a
- 0x2e 0xfffd
- 0x2e 0x2192
- 0x2f 0x222b
- 0x30 0x03b1
- 0x31 0x03b2
- 0x32 0x03b3
- 0x33 0x03b4
- 0x34 0x03a3
- 0x35 0x03bc
- 0x36 0x03bd
- 0x37 0x03c9
- 0x38 0x03c0
- 0x39 0x039e
- 0x3a 0x0394
- 0x3b 0x039b
- 0x3c 0x03a9
- 0x3d 0x042a
- 0x3e 0x207b
- 0x3f 0x207a
- 0x40 0x044e
- 0x41 0x0430
- 0x42 0x0431
- 0x43 0x0446
- 0x44 0x0434
- 0x45 0x0435
- 0x46 0x0444
- 0x47 0x0433
- 0x48 0x0445
- 0x49 0x0438
- 0x4a 0x0439
- 0x4b 0x043a
- 0x4c 0x043b
- 0x4d 0x043c
- 0x4e 0x043d
- 0x4f 0x043e
- 0x50 0x043f
- 0x51 0x044f
- 0x52 0x0440
- 0x53 0x0441
- 0x54 0x0442
- 0x55 0x0443
- 0x56 0x0436
- 0x57 0x0432
- 0x58 0x044c
- 0x59 0x044b
- 0x5a 0x0437
- 0x5b 0x0448
- 0x5c 0x044d
- 0x5d 0x0449
- 0x5e 0x0447
- 0x5f 0x044a
- 0x60 0x042e
- 0x61 0x0410
- 0x62 0x0411
- 0x63 0x0426
- 0x64 0x0414
- 0x65 0x0415
- 0x66 0x0424
- 0x67 0x0413
- 0x68 0x0425
- 0x69 0x0418
- 0x6a 0x0419
- 0x6b 0x041a
- 0x6c 0x041b
- 0x6d 0x041c
- 0x6e 0x041d
- 0x6f 0x041e
- 0x70 0x041f
- 0x71 0x042f
- 0x72 0x0420
- 0x73 0x0421
- 0x74 0x0422
- 0x75 0x0423
- 0x76 0x0416
- 0x77 0x0412
- 0x78 0x042c
- 0x79 0x042b
- 0x7a 0x0417
- 0x7b 0x0428
- 0x7c 0x042d
- 0x7d 0x0429
- 0x7e 0x0427
- 0x7f 0x007f
- 0x80 0xfffd
- 0x81 0xfffd
- 0x82 0xfffd
- 0x83 0xfffd
- 0x84 0xfffd
- 0x85 0xfffd
- 0x86 0xfffd
- 0x87 0xfffd
- 0x88 0xfffd
- 0x89 0xfffd
- 0x8a 0xfffd
- 0x8b 0xfffd
- 0x8c 0xfffd
- 0x8d 0xfffd
- 0x8e 0xfffd
- 0x8f 0xfffd
- 0x90 0xfffd
- 0x91 0xfffd
- 0x92 0xfffd
- 0x93 0xfffd
- 0x94 0xfffd
- 0x95 0xfffd
- 0x96 0xfffd
- 0x97 0xfffd
- 0x98 0xfffd
- 0x99 0xfffd
- 0x9a 0xfffd
- 0x9b 0xfffd
- 0x9c 0xfffd
- 0x9d 0xfffd
- 0x9e 0xfffd
- 0x9f 0xfffd
- 0xa0 0xfffd
- 0xa1 0xfffd
- 0xa2 0xfffd
- 0xa3 0xfffd
- 0xa4 0xfffd
- 0xa5 0xfffd
- 0xa6 0xfffd
- 0xa7 0xfffd
- 0xa8 0xfffd
- 0xa9 0xfffd
- 0xaa 0xfffd
- 0xab 0xfffd
- 0xac 0xfffd
- 0xad 0xfffd
- 0xae 0xfffd
- 0xaf 0xfffd
- 0xb0 0xfffd
- 0xb1 0xfffd
- 0xb2 0xfffd
- 0xb3 0xfffd
- 0xb4 0xfffd
- 0xb5 0xfffd
- 0xb6 0xfffd
- 0xb7 0xfffd
- 0xb8 0xfffd
- 0xb9 0xfffd
- 0xba 0xfffd
- 0xbb 0xfffd
- 0xbc 0xfffd
- 0xbd 0xfffd
- 0xbe 0xfffd
- 0xbf 0xfffd
- 0xc0 0xfffd
- 0xc1 0xfffd
- 0xc2 0xfffd
- 0xc3 0xfffd
- 0xc4 0xfffd
- 0xc5 0xfffd
- 0xc6 0xfffd
- 0xc7 0xfffd
- 0xc8 0xfffd
- 0xc9 0xfffd
- 0xca 0xfffd
- 0xcb 0xfffd
- 0xcc 0xfffd
- 0xcd 0xfffd
- 0xce 0xfffd
- 0xcf 0xfffd
- 0xd0 0xfffd
- 0xd1 0xfffd
- 0xd2 0xfffd
- 0xd3 0xfffd
- 0xd4 0xfffd
- 0xd5 0xfffd
- 0xd6 0xfffd
- 0xd7 0xfffd
- 0xd8 0xfffd
- 0xd9 0xfffd
- 0xda 0xfffd
- 0xdb 0xfffd
- 0xdc 0xfffd
- 0xdd 0xfffd
- 0xde 0xfffd
- 0xdf 0xfffd
- 0xe0 0xfffd
- 0xe1 0xfffd
- 0xe2 0xfffd
- 0xe3 0xfffd
- 0xe4 0xfffd
- 0xe5 0xfffd
- 0xe6 0xfffd
- 0xe7 0xfffd
- 0xe8 0xfffd
- 0xe9 0xfffd
- 0xea 0xfffd
- 0xeb 0xfffd
- 0xec 0xfffd
- 0xed 0xfffd
- 0xee 0xfffd
- 0xef 0xfffd
- 0xf0 0xfffd
- 0xf1 0xfffd
- 0xf2 0xfffd
- 0xf3 0xfffd
- 0xf4 0xfffd
- 0xf5 0xfffd
- 0xf6 0xfffd
- 0xf7 0xfffd
- 0xf8 0xfffd
- 0xf9 0xfffd
- 0xfa 0xfffd
- 0xfb 0xfffd
- 0xfc 0xfffd
- 0xfd 0xfffd
- 0xfe 0xfffd
- 0xff 0xfffd
|