02ibm273.t 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #! /usr/local/bin/perl -w
  2. # vim: tabstop=4
  3. # vim: syntax=perl
  4. use strict;
  5. use Test;
  6. BEGIN {
  7. plan tests => 7;
  8. }
  9. use Locale::Recode;
  10. sub int2utf8;
  11. my $local2ucs = {};
  12. my $ucs2local = {};
  13. while (<DATA>) {
  14. my ($code, $ucs, undef) = map { oct $_ } split /\s+/, $_;
  15. $local2ucs->{$code} = $ucs;
  16. $ucs2local->{$ucs} = $code unless $ucs == 0xfffd;
  17. }
  18. my $cd_int = Locale::Recode->new (from => 'IBM273',
  19. to => 'INTERNAL');
  20. ok !$cd_int->getError;
  21. my $cd_utf8 = Locale::Recode->new (from => 'IBM273',
  22. to => 'UTF-8');
  23. ok !$cd_utf8->getError;
  24. my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  25. to => 'IBM273');
  26. ok !$cd_rev->getError;
  27. # Convert into internal representation.
  28. my $result_int = 1;
  29. while (my ($code, $ucs) = each %$local2ucs) {
  30. my $outbuf = chr $code;
  31. my $result = $cd_int->recode ($outbuf);
  32. unless ($result && $outbuf->[0] == $ucs) {
  33. $result_int = 0;
  34. last;
  35. }
  36. }
  37. ok $result_int;
  38. # Convert to UTF-8.
  39. my $result_utf8 = 1;
  40. while (my ($code, $ucs) = each %$local2ucs) {
  41. my $outbuf = chr $code;
  42. my $result = $cd_utf8->recode ($outbuf);
  43. unless ($result && $outbuf eq int2utf8 $ucs) {
  44. $result_utf8 = 0;
  45. last;
  46. }
  47. }
  48. ok $result_utf8;
  49. # Convert from internal representation.
  50. my $result_rev = 1;
  51. while (my ($ucs, $code) = each %$ucs2local) {
  52. my $outbuf = [ $ucs ];
  53. my $result = $cd_rev->recode ($outbuf);
  54. unless ($result && $code == ord $outbuf) {
  55. $result_int = 0;
  56. last;
  57. }
  58. }
  59. ok $result_int;
  60. # Check handling of unknown characters.
  61. my $test_string1 = [ unpack 'c*', ' Supergirl ' ];
  62. $test_string1->[0] = 0xad0be;
  63. $test_string1->[-1] = 0xbeefbabe;
  64. my $test_string2 = [ unpack 'c*', 'Supergirl' ];
  65. my $unknown = "\x6f"; # Unknown character!
  66. $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  67. to => 'IBM273',
  68. )
  69. && $cd_rev->recode ($test_string1)
  70. && $cd_rev->recode ($test_string2)
  71. && ($test_string2 = $unknown . $test_string2 . $unknown);
  72. ok $test_string1 eq $test_string2;
  73. sub int2utf8
  74. {
  75. my $ucs4 = shift;
  76. if ($ucs4 <= 0x7f) {
  77. return chr $ucs4;
  78. } elsif ($ucs4 <= 0x7ff) {
  79. return pack ("C2",
  80. (0xc0 | (($ucs4 >> 6) & 0x1f)),
  81. (0x80 | ($ucs4 & 0x3f)));
  82. } elsif ($ucs4 <= 0xffff) {
  83. return pack ("C3",
  84. (0xe0 | (($ucs4 >> 12) & 0xf)),
  85. (0x80 | (($ucs4 >> 6) & 0x3f)),
  86. (0x80 | ($ucs4 & 0x3f)));
  87. } elsif ($ucs4 <= 0x1fffff) {
  88. return pack ("C4",
  89. (0xf0 | (($ucs4 >> 18) & 0x7)),
  90. (0x80 | (($ucs4 >> 12) & 0x3f)),
  91. (0x80 | (($ucs4 >> 6) & 0x3f)),
  92. (0x80 | ($ucs4 & 0x3f)));
  93. } elsif ($ucs4 <= 0x3ffffff) {
  94. return pack ("C5",
  95. (0xf0 | (($ucs4 >> 24) & 0x3)),
  96. (0x80 | (($ucs4 >> 18) & 0x3f)),
  97. (0x80 | (($ucs4 >> 12) & 0x3f)),
  98. (0x80 | (($ucs4 >> 6) & 0x3f)),
  99. (0x80 | ($ucs4 & 0x3f)));
  100. } else {
  101. return pack ("C6",
  102. (0xf0 | (($ucs4 >> 30) & 0x3)),
  103. (0x80 | (($ucs4 >> 24) & 0x1)),
  104. (0x80 | (($ucs4 >> 18) & 0x3f)),
  105. (0x80 | (($ucs4 >> 12) & 0x3f)),
  106. (0x80 | (($ucs4 >> 6) & 0x3f)),
  107. (0x80 | ($ucs4 & 0x3f)));
  108. }
  109. }
  110. #Local Variables:
  111. #mode: perl
  112. #perl-indent-level: 4
  113. #perl-continued-statement-offset: 4
  114. #perl-continued-brace-offset: 0
  115. #perl-brace-offset: -4
  116. #perl-brace-imaginary-offset: 0
  117. #perl-label-offset: -4
  118. #tab-width: 4
  119. #End:
  120. __DATA__
  121. 0x00 0x0000
  122. 0x01 0x0001
  123. 0x02 0x0002
  124. 0x03 0x0003
  125. 0x04 0x009c
  126. 0x05 0x0009
  127. 0x06 0x0086
  128. 0x07 0x007f
  129. 0x08 0x0097
  130. 0x09 0x008d
  131. 0x0a 0x008e
  132. 0x0b 0x000b
  133. 0x0c 0x000c
  134. 0x0d 0x000d
  135. 0x0e 0x000e
  136. 0x0f 0x000f
  137. 0x10 0x0010
  138. 0x11 0x0011
  139. 0x12 0x0012
  140. 0x13 0x0013
  141. 0x14 0x009d
  142. 0x15 0x0085
  143. 0x16 0x0008
  144. 0x17 0x0087
  145. 0x18 0x0018
  146. 0x19 0x0019
  147. 0x1a 0x0092
  148. 0x1b 0x008f
  149. 0x1c 0x001c
  150. 0x1d 0x001d
  151. 0x1e 0x001e
  152. 0x1f 0x001f
  153. 0x20 0x0080
  154. 0x21 0x0081
  155. 0x22 0x0082
  156. 0x23 0x0083
  157. 0x24 0x0084
  158. 0x25 0x000a
  159. 0x26 0x0017
  160. 0x27 0x001b
  161. 0x28 0x0088
  162. 0x29 0x0089
  163. 0x2a 0x008a
  164. 0x2b 0x008b
  165. 0x2c 0x008c
  166. 0x2d 0x0005
  167. 0x2e 0x0006
  168. 0x2f 0x0007
  169. 0x30 0x0090
  170. 0x31 0x0091
  171. 0x32 0x0016
  172. 0x33 0x0093
  173. 0x34 0x0094
  174. 0x35 0x0095
  175. 0x36 0x0096
  176. 0x37 0x0004
  177. 0x38 0x0098
  178. 0x39 0x0099
  179. 0x3a 0x009a
  180. 0x3b 0x009b
  181. 0x3c 0x0014
  182. 0x3d 0x0015
  183. 0x3e 0x009e
  184. 0x3f 0x001a
  185. 0x40 0x0020
  186. 0x41 0x00a0
  187. 0x42 0x00e2
  188. 0x43 0x007b
  189. 0x44 0x00e0
  190. 0x45 0x00e1
  191. 0x46 0x00e3
  192. 0x47 0x00e5
  193. 0x48 0x00e7
  194. 0x49 0x00f1
  195. 0x4a 0x00c4
  196. 0x4b 0x002e
  197. 0x4c 0x003c
  198. 0x4d 0x0028
  199. 0x4e 0x002b
  200. 0x4f 0x0021
  201. 0x50 0x0026
  202. 0x51 0x00e9
  203. 0x52 0x00ea
  204. 0x53 0x00eb
  205. 0x54 0x00e8
  206. 0x55 0x00ed
  207. 0x56 0x00ee
  208. 0x57 0x00ef
  209. 0x58 0x00ec
  210. 0x59 0x007e
  211. 0x5a 0x00dc
  212. 0x5b 0x0024
  213. 0x5c 0x002a
  214. 0x5d 0x0029
  215. 0x5e 0x003b
  216. 0x5f 0x005e
  217. 0x60 0x002d
  218. 0x61 0x002f
  219. 0x62 0x00c2
  220. 0x63 0x005b
  221. 0x64 0x00c0
  222. 0x65 0x00c1
  223. 0x66 0x00c3
  224. 0x67 0x00c5
  225. 0x68 0x00c7
  226. 0x69 0x00d1
  227. 0x6a 0x00f6
  228. 0x6b 0x002c
  229. 0x6c 0x0025
  230. 0x6d 0x005f
  231. 0x6e 0x003e
  232. 0x6f 0x003f
  233. 0x70 0x00f8
  234. 0x71 0x00c9
  235. 0x72 0x00ca
  236. 0x73 0x00cb
  237. 0x74 0x00c8
  238. 0x75 0x00cd
  239. 0x76 0x00ce
  240. 0x77 0x00cf
  241. 0x78 0x00cc
  242. 0x79 0x0060
  243. 0x7a 0x003a
  244. 0x7b 0x0023
  245. 0x7c 0x00a7
  246. 0x7d 0x0027
  247. 0x7e 0x003d
  248. 0x7f 0x0022
  249. 0x80 0x00d8
  250. 0x81 0x0061
  251. 0x82 0x0062
  252. 0x83 0x0063
  253. 0x84 0x0064
  254. 0x85 0x0065
  255. 0x86 0x0066
  256. 0x87 0x0067
  257. 0x88 0x0068
  258. 0x89 0x0069
  259. 0x8a 0x00ab
  260. 0x8b 0x00bb
  261. 0x8c 0x00f0
  262. 0x8d 0x00fd
  263. 0x8e 0x00fe
  264. 0x8f 0x00b1
  265. 0x90 0x00b0
  266. 0x91 0x006a
  267. 0x92 0x006b
  268. 0x93 0x006c
  269. 0x94 0x006d
  270. 0x95 0x006e
  271. 0x96 0x006f
  272. 0x97 0x0070
  273. 0x98 0x0071
  274. 0x99 0x0072
  275. 0x9a 0x00aa
  276. 0x9b 0x00ba
  277. 0x9c 0x00e6
  278. 0x9d 0x00b8
  279. 0x9e 0x00c6
  280. 0x9f 0x00a4
  281. 0xa0 0x00b5
  282. 0xa1 0x00df
  283. 0xa2 0x0073
  284. 0xa3 0x0074
  285. 0xa4 0x0075
  286. 0xa5 0x0076
  287. 0xa6 0x0077
  288. 0xa7 0x0078
  289. 0xa8 0x0079
  290. 0xa9 0x007a
  291. 0xaa 0x00a1
  292. 0xab 0x00bf
  293. 0xac 0x00d0
  294. 0xad 0x00dd
  295. 0xae 0x00de
  296. 0xaf 0x00ae
  297. 0xb0 0x00a2
  298. 0xb1 0x00a3
  299. 0xb2 0x00a5
  300. 0xb3 0x00b7
  301. 0xb4 0x00a9
  302. 0xb5 0x0040
  303. 0xb6 0x00b6
  304. 0xb7 0x00bc
  305. 0xb8 0x00bd
  306. 0xb9 0x00be
  307. 0xba 0x00ac
  308. 0xbb 0x007c
  309. 0xbc 0x203e
  310. 0xbd 0x00a8
  311. 0xbe 0x00b4
  312. 0xbf 0x00d7
  313. 0xc0 0x00e4
  314. 0xc1 0x0041
  315. 0xc2 0x0042
  316. 0xc3 0x0043
  317. 0xc4 0x0044
  318. 0xc5 0x0045
  319. 0xc6 0x0046
  320. 0xc7 0x0047
  321. 0xc8 0x0048
  322. 0xc9 0x0049
  323. 0xca 0x00ad
  324. 0xcb 0x00f4
  325. 0xcc 0x00a6
  326. 0xcd 0x00f2
  327. 0xce 0x00f3
  328. 0xcf 0x00f5
  329. 0xd0 0x00fc
  330. 0xd1 0x004a
  331. 0xd2 0x004b
  332. 0xd3 0x004c
  333. 0xd4 0x004d
  334. 0xd5 0x004e
  335. 0xd6 0x004f
  336. 0xd7 0x0050
  337. 0xd8 0x0051
  338. 0xd9 0x0052
  339. 0xda 0x00b9
  340. 0xdb 0x00fb
  341. 0xdc 0x007d
  342. 0xdd 0x00f9
  343. 0xde 0x00fa
  344. 0xdf 0x00ff
  345. 0xe0 0x00d6
  346. 0xe1 0x00f7
  347. 0xe2 0x0053
  348. 0xe3 0x0054
  349. 0xe4 0x0055
  350. 0xe5 0x0056
  351. 0xe6 0x0057
  352. 0xe7 0x0058
  353. 0xe8 0x0059
  354. 0xe9 0x005a
  355. 0xea 0x00b2
  356. 0xeb 0x00d4
  357. 0xec 0x005c
  358. 0xed 0x00d2
  359. 0xee 0x00d3
  360. 0xef 0x00d5
  361. 0xf0 0x0030
  362. 0xf1 0x0031
  363. 0xf2 0x0032
  364. 0xf3 0x0033
  365. 0xf4 0x0034
  366. 0xf5 0x0035
  367. 0xf6 0x0036
  368. 0xf7 0x0037
  369. 0xf8 0x0038
  370. 0xf9 0x0039
  371. 0xfa 0x00b3
  372. 0xfb 0x00db
  373. 0xfc 0x005d
  374. 0xfd 0x00d9
  375. 0xfe 0x00da
  376. 0xff 0x009f