02iso-8859-11.t 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. #! /usr/local/bin/perl -w
  2. # vim: tabstop=4
  3. # vim: syntax=perl
  4. use strict;
  5. use Test;
  6. BEGIN {
  7. eval {
  8. require Encode;
  9. if ($Encode::VERSION < "1.78") {
  10. print "1..0 # Skip: Encode $Encode::VERSION has a " .
  11. "bug, please upgrade!\n";
  12. exit 0;
  13. }
  14. };
  15. plan tests => 7;
  16. }
  17. use Locale::Recode;
  18. sub int2utf8;
  19. my $local2ucs = {};
  20. my $ucs2local = {};
  21. while (<DATA>) {
  22. my ($code, $ucs, undef) = map { oct $_ } split /\s+/, $_;
  23. $local2ucs->{$code} = $ucs;
  24. $ucs2local->{$ucs} = $code unless $ucs == 0xfffd;
  25. }
  26. my $cd_int = Locale::Recode->new (from => 'ISO-8859-11',
  27. to => 'INTERNAL');
  28. ok !$cd_int->getError;
  29. my $cd_utf8 = Locale::Recode->new (from => 'ISO-8859-11',
  30. to => 'UTF-8');
  31. ok !$cd_utf8->getError;
  32. my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  33. to => 'ISO-8859-11');
  34. ok !$cd_rev->getError;
  35. # Convert into internal representation.
  36. my $result_int = 1;
  37. while (my ($code, $ucs) = each %$local2ucs) {
  38. my $outbuf = chr $code;
  39. my $result = $cd_int->recode ($outbuf);
  40. unless ($result && $outbuf->[0] == $ucs) {
  41. $result_int = 0;
  42. last;
  43. }
  44. }
  45. ok $result_int;
  46. # Convert to UTF-8.
  47. my $result_utf8 = 1;
  48. while (my ($code, $ucs) = each %$local2ucs) {
  49. my $outbuf = chr $code;
  50. my $result = $cd_utf8->recode ($outbuf);
  51. unless ($result && $outbuf eq int2utf8 $ucs) {
  52. $result_utf8 = 0;
  53. last;
  54. }
  55. }
  56. ok $result_utf8;
  57. # Convert from internal representation.
  58. my $result_rev = 1;
  59. while (my ($ucs, $code) = each %$ucs2local) {
  60. my $outbuf = [ $ucs ];
  61. my $result = $cd_rev->recode ($outbuf);
  62. unless ($result && $code == ord $outbuf) {
  63. $result_int = 0;
  64. last;
  65. }
  66. }
  67. ok $result_int;
  68. # Check handling of unknown characters.
  69. my $test_string1 = [ unpack 'c*', ' Supergirl ' ];
  70. $test_string1->[0] = 0xad0be;
  71. $test_string1->[-1] = 0xbeefbabe;
  72. my $test_string2 = [ unpack 'c*', 'Supergirl' ];
  73. my $unknown = "\x3f"; # Unknown character!
  74. $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  75. to => 'ISO-8859-11',
  76. )
  77. && $cd_rev->recode ($test_string1)
  78. && $cd_rev->recode ($test_string2)
  79. && ($test_string2 = $unknown . $test_string2 . $unknown);
  80. ok $test_string1 eq $test_string2;
  81. sub int2utf8
  82. {
  83. my $ucs4 = shift;
  84. if ($ucs4 <= 0x7f) {
  85. return chr $ucs4;
  86. } elsif ($ucs4 <= 0x7ff) {
  87. return pack ("C2",
  88. (0xc0 | (($ucs4 >> 6) & 0x1f)),
  89. (0x80 | ($ucs4 & 0x3f)));
  90. } elsif ($ucs4 <= 0xffff) {
  91. return pack ("C3",
  92. (0xe0 | (($ucs4 >> 12) & 0xf)),
  93. (0x80 | (($ucs4 >> 6) & 0x3f)),
  94. (0x80 | ($ucs4 & 0x3f)));
  95. } elsif ($ucs4 <= 0x1fffff) {
  96. return pack ("C4",
  97. (0xf0 | (($ucs4 >> 18) & 0x7)),
  98. (0x80 | (($ucs4 >> 12) & 0x3f)),
  99. (0x80 | (($ucs4 >> 6) & 0x3f)),
  100. (0x80 | ($ucs4 & 0x3f)));
  101. } elsif ($ucs4 <= 0x3ffffff) {
  102. return pack ("C5",
  103. (0xf0 | (($ucs4 >> 24) & 0x3)),
  104. (0x80 | (($ucs4 >> 18) & 0x3f)),
  105. (0x80 | (($ucs4 >> 12) & 0x3f)),
  106. (0x80 | (($ucs4 >> 6) & 0x3f)),
  107. (0x80 | ($ucs4 & 0x3f)));
  108. } else {
  109. return pack ("C6",
  110. (0xf0 | (($ucs4 >> 30) & 0x3)),
  111. (0x80 | (($ucs4 >> 24) & 0x1)),
  112. (0x80 | (($ucs4 >> 18) & 0x3f)),
  113. (0x80 | (($ucs4 >> 12) & 0x3f)),
  114. (0x80 | (($ucs4 >> 6) & 0x3f)),
  115. (0x80 | ($ucs4 & 0x3f)));
  116. }
  117. }
  118. #Local Variables:
  119. #mode: perl
  120. #perl-indent-level: 4
  121. #perl-continued-statement-offset: 4
  122. #perl-continued-brace-offset: 0
  123. #perl-brace-offset: -4
  124. #perl-brace-imaginary-offset: 0
  125. #perl-label-offset: -4
  126. #tab-width: 4
  127. #End:
  128. __DATA__
  129. 0x00 0x0000
  130. 0x01 0x0001
  131. 0x02 0x0002
  132. 0x03 0x0003
  133. 0x04 0x0004
  134. 0x05 0x0005
  135. 0x06 0x0006
  136. 0x07 0x0007
  137. 0x08 0x0008
  138. 0x09 0x0009
  139. 0x0a 0x000a
  140. 0x0b 0x000b
  141. 0x0c 0x000c
  142. 0x0d 0x000d
  143. 0x0e 0x000e
  144. 0x0f 0x000f
  145. 0x10 0x0010
  146. 0x11 0x0011
  147. 0x12 0x0012
  148. 0x13 0x0013
  149. 0x14 0x0014
  150. 0x15 0x0015
  151. 0x16 0x0016
  152. 0x17 0x0017
  153. 0x18 0x0018
  154. 0x19 0x0019
  155. 0x1a 0x001a
  156. 0x1b 0x001b
  157. 0x1c 0x001c
  158. 0x1d 0x001d
  159. 0x1e 0x001e
  160. 0x1f 0x001f
  161. 0x20 0x0020
  162. 0x21 0x0021
  163. 0x22 0x0022
  164. 0x23 0x0023
  165. 0x24 0x0024
  166. 0x25 0x0025
  167. 0x26 0x0026
  168. 0x27 0x0027
  169. 0x28 0x0028
  170. 0x29 0x0029
  171. 0x2a 0x002a
  172. 0x2b 0x002b
  173. 0x2c 0x002c
  174. 0x2d 0x002d
  175. 0x2e 0x002e
  176. 0x2f 0x002f
  177. 0x30 0x0030
  178. 0x31 0x0031
  179. 0x32 0x0032
  180. 0x33 0x0033
  181. 0x34 0x0034
  182. 0x35 0x0035
  183. 0x36 0x0036
  184. 0x37 0x0037
  185. 0x38 0x0038
  186. 0x39 0x0039
  187. 0x3a 0x003a
  188. 0x3b 0x003b
  189. 0x3c 0x003c
  190. 0x3d 0x003d
  191. 0x3e 0x003e
  192. 0x3f 0x003f
  193. 0x40 0x0040
  194. 0x41 0x0041
  195. 0x42 0x0042
  196. 0x43 0x0043
  197. 0x44 0x0044
  198. 0x45 0x0045
  199. 0x46 0x0046
  200. 0x47 0x0047
  201. 0x48 0x0048
  202. 0x49 0x0049
  203. 0x4a 0x004a
  204. 0x4b 0x004b
  205. 0x4c 0x004c
  206. 0x4d 0x004d
  207. 0x4e 0x004e
  208. 0x4f 0x004f
  209. 0x50 0x0050
  210. 0x51 0x0051
  211. 0x52 0x0052
  212. 0x53 0x0053
  213. 0x54 0x0054
  214. 0x55 0x0055
  215. 0x56 0x0056
  216. 0x57 0x0057
  217. 0x58 0x0058
  218. 0x59 0x0059
  219. 0x5a 0x005a
  220. 0x5b 0x005b
  221. 0x5c 0x005c
  222. 0x5d 0x005d
  223. 0x5e 0x005e
  224. 0x5f 0x005f
  225. 0x60 0x0060
  226. 0x61 0x0061
  227. 0x62 0x0062
  228. 0x63 0x0063
  229. 0x64 0x0064
  230. 0x65 0x0065
  231. 0x66 0x0066
  232. 0x67 0x0067
  233. 0x68 0x0068
  234. 0x69 0x0069
  235. 0x6a 0x006a
  236. 0x6b 0x006b
  237. 0x6c 0x006c
  238. 0x6d 0x006d
  239. 0x6e 0x006e
  240. 0x6f 0x006f
  241. 0x70 0x0070
  242. 0x71 0x0071
  243. 0x72 0x0072
  244. 0x73 0x0073
  245. 0x74 0x0074
  246. 0x75 0x0075
  247. 0x76 0x0076
  248. 0x77 0x0077
  249. 0x78 0x0078
  250. 0x79 0x0079
  251. 0x7a 0x007a
  252. 0x7b 0x007b
  253. 0x7c 0x007c
  254. 0x7d 0x007d
  255. 0x7e 0x007e
  256. 0x7f 0x007f
  257. 0xa0 0x00a0
  258. 0xa1 0x0e01
  259. 0xa2 0x0e02
  260. 0xa3 0x0e03
  261. 0xa4 0x0e04
  262. 0xa5 0x0e05
  263. 0xa6 0x0e06
  264. 0xa7 0x0e07
  265. 0xa8 0x0e08
  266. 0xa9 0x0e09
  267. 0xaa 0x0e0a
  268. 0xab 0x0e0b
  269. 0xac 0x0e0c
  270. 0xad 0x0e0d
  271. 0xae 0x0e0e
  272. 0xaf 0x0e0f
  273. 0xb0 0x0e10
  274. 0xb1 0x0e11
  275. 0xb2 0x0e12
  276. 0xb3 0x0e13
  277. 0xb4 0x0e14
  278. 0xb5 0x0e15
  279. 0xb6 0x0e16
  280. 0xb7 0x0e17
  281. 0xb8 0x0e18
  282. 0xb9 0x0e19
  283. 0xba 0x0e1a
  284. 0xbb 0x0e1b
  285. 0xbc 0x0e1c
  286. 0xbd 0x0e1d
  287. 0xbe 0x0e1e
  288. 0xbf 0x0e1f
  289. 0xc0 0x0e20
  290. 0xc1 0x0e21
  291. 0xc2 0x0e22
  292. 0xc3 0x0e23
  293. 0xc4 0x0e24
  294. 0xc5 0x0e25
  295. 0xc6 0x0e26
  296. 0xc7 0x0e27
  297. 0xc8 0x0e28
  298. 0xc9 0x0e29
  299. 0xca 0x0e2a
  300. 0xcb 0x0e2b
  301. 0xcc 0x0e2c
  302. 0xcd 0x0e2d
  303. 0xce 0x0e2e
  304. 0xcf 0x0e2f
  305. 0xd0 0x0e30
  306. 0xd1 0x0e31
  307. 0xd2 0x0e32
  308. 0xd3 0x0e33
  309. 0xd4 0x0e34
  310. 0xd5 0x0e35
  311. 0xd6 0x0e36
  312. 0xd7 0x0e37
  313. 0xd8 0x0e38
  314. 0xd9 0x0e39
  315. 0xda 0x0e3a
  316. 0xdb 0xfffd
  317. 0xdc 0xfffd
  318. 0xdd 0xfffd
  319. 0xde 0xfffd
  320. 0xdf 0x0e3f
  321. 0xe0 0x0e40
  322. 0xe1 0x0e41
  323. 0xe2 0x0e42
  324. 0xe3 0x0e43
  325. 0xe4 0x0e44
  326. 0xe5 0x0e45
  327. 0xe6 0x0e46
  328. 0xe7 0x0e47
  329. 0xe8 0x0e48
  330. 0xe9 0x0e49
  331. 0xea 0x0e4a
  332. 0xeb 0x0e4b
  333. 0xec 0x0e4c
  334. 0xed 0x0e4d
  335. 0xee 0x0e4e
  336. 0xef 0x0e4f
  337. 0xf0 0x0e50
  338. 0xf1 0x0e51
  339. 0xf2 0x0e52
  340. 0xf3 0x0e53
  341. 0xf4 0x0e54
  342. 0xf5 0x0e55
  343. 0xf6 0x0e56
  344. 0xf7 0x0e57
  345. 0xf8 0x0e58
  346. 0xf9 0x0e59
  347. 0xfa 0x0e5a
  348. 0xfb 0x0e5b
  349. 0xfc 0xfffd
  350. 0xfd 0xfffd
  351. 0xfe 0xfffd
  352. 0xff 0xfffd