02inis-cyrillic.t 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #! /usr/local/bin/perl -w
  2. # vim: tabstop=4
  3. # vim: syntax=perl
  4. use strict;
  5. use Test;
  6. BEGIN {
  7. plan tests => 7;
  8. }
  9. use Locale::Recode;
  10. sub int2utf8;
  11. my $local2ucs = {};
  12. my $ucs2local = {};
  13. while (<DATA>) {
  14. my ($code, $ucs, undef) = map { oct $_ } split /\s+/, $_;
  15. $local2ucs->{$code} = $ucs;
  16. $ucs2local->{$ucs} = $code unless $ucs == 0xfffd;
  17. }
  18. my $cd_int = Locale::Recode->new (from => 'INIS-CYRILLIC',
  19. to => 'INTERNAL');
  20. ok !$cd_int->getError;
  21. my $cd_utf8 = Locale::Recode->new (from => 'INIS-CYRILLIC',
  22. to => 'UTF-8');
  23. ok !$cd_utf8->getError;
  24. my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  25. to => 'INIS-CYRILLIC');
  26. ok !$cd_rev->getError;
  27. # Convert into internal representation.
  28. my $result_int = 1;
  29. while (my ($code, $ucs) = each %$local2ucs) {
  30. my $outbuf = chr $code;
  31. my $result = $cd_int->recode ($outbuf);
  32. unless ($result && $outbuf->[0] == $ucs) {
  33. $result_int = 0;
  34. last;
  35. }
  36. }
  37. ok $result_int;
  38. # Convert to UTF-8.
  39. my $result_utf8 = 1;
  40. while (my ($code, $ucs) = each %$local2ucs) {
  41. my $outbuf = chr $code;
  42. my $result = $cd_utf8->recode ($outbuf);
  43. unless ($result && $outbuf eq int2utf8 $ucs) {
  44. $result_utf8 = 0;
  45. last;
  46. }
  47. }
  48. ok $result_utf8;
  49. # Convert from internal representation.
  50. my $result_rev = 1;
  51. while (my ($ucs, $code) = each %$ucs2local) {
  52. my $outbuf = [ $ucs ];
  53. my $result = $cd_rev->recode ($outbuf);
  54. unless ($result && $code == ord $outbuf) {
  55. $result_int = 0;
  56. last;
  57. }
  58. }
  59. ok $result_int;
  60. # Check handling of unknown characters.
  61. my $test_string1 = [ unpack 'c*', ' Supergirl ' ];
  62. $test_string1->[0] = 0xad0be;
  63. $test_string1->[-1] = 0xbeefbabe;
  64. my $test_string2 = [ unpack 'c*', 'Supergirl' ];
  65. my $unknown = "\x3f"; # Unknown character!
  66. $cd_rev = Locale::Recode->new (from => 'INTERNAL',
  67. to => 'INIS-CYRILLIC',
  68. )
  69. && $cd_rev->recode ($test_string1)
  70. && $cd_rev->recode ($test_string2)
  71. && ($test_string2 = $unknown . $test_string2 . $unknown);
  72. ok $test_string1 eq $test_string2;
  73. sub int2utf8
  74. {
  75. my $ucs4 = shift;
  76. if ($ucs4 <= 0x7f) {
  77. return chr $ucs4;
  78. } elsif ($ucs4 <= 0x7ff) {
  79. return pack ("C2",
  80. (0xc0 | (($ucs4 >> 6) & 0x1f)),
  81. (0x80 | ($ucs4 & 0x3f)));
  82. } elsif ($ucs4 <= 0xffff) {
  83. return pack ("C3",
  84. (0xe0 | (($ucs4 >> 12) & 0xf)),
  85. (0x80 | (($ucs4 >> 6) & 0x3f)),
  86. (0x80 | ($ucs4 & 0x3f)));
  87. } elsif ($ucs4 <= 0x1fffff) {
  88. return pack ("C4",
  89. (0xf0 | (($ucs4 >> 18) & 0x7)),
  90. (0x80 | (($ucs4 >> 12) & 0x3f)),
  91. (0x80 | (($ucs4 >> 6) & 0x3f)),
  92. (0x80 | ($ucs4 & 0x3f)));
  93. } elsif ($ucs4 <= 0x3ffffff) {
  94. return pack ("C5",
  95. (0xf0 | (($ucs4 >> 24) & 0x3)),
  96. (0x80 | (($ucs4 >> 18) & 0x3f)),
  97. (0x80 | (($ucs4 >> 12) & 0x3f)),
  98. (0x80 | (($ucs4 >> 6) & 0x3f)),
  99. (0x80 | ($ucs4 & 0x3f)));
  100. } else {
  101. return pack ("C6",
  102. (0xf0 | (($ucs4 >> 30) & 0x3)),
  103. (0x80 | (($ucs4 >> 24) & 0x1)),
  104. (0x80 | (($ucs4 >> 18) & 0x3f)),
  105. (0x80 | (($ucs4 >> 12) & 0x3f)),
  106. (0x80 | (($ucs4 >> 6) & 0x3f)),
  107. (0x80 | ($ucs4 & 0x3f)));
  108. }
  109. }
  110. #Local Variables:
  111. #mode: perl
  112. #perl-indent-level: 4
  113. #perl-continued-statement-offset: 4
  114. #perl-continued-brace-offset: 0
  115. #perl-brace-offset: -4
  116. #perl-brace-imaginary-offset: 0
  117. #perl-label-offset: -4
  118. #tab-width: 4
  119. #End:
  120. __DATA__
  121. 0x00 0x0000
  122. 0x01 0x0001
  123. 0x02 0x0002
  124. 0x03 0x0003
  125. 0x04 0x0004
  126. 0x05 0x0005
  127. 0x06 0x0006
  128. 0x07 0x0007
  129. 0x08 0x0008
  130. 0x09 0x0009
  131. 0x0a 0x000a
  132. 0x0b 0x000b
  133. 0x0c 0x000c
  134. 0x0d 0x000d
  135. 0x0e 0x000e
  136. 0x0f 0x000f
  137. 0x10 0x0010
  138. 0x11 0x0011
  139. 0x12 0x0012
  140. 0x13 0x0013
  141. 0x14 0x0014
  142. 0x15 0x0015
  143. 0x16 0x0016
  144. 0x17 0x0017
  145. 0x18 0x0018
  146. 0x19 0x0019
  147. 0x1a 0x001a
  148. 0x1b 0x001b
  149. 0x1c 0x001c
  150. 0x1d 0x001d
  151. 0x1e 0x001e
  152. 0x1f 0x001f
  153. 0x20 0x0020
  154. 0x2c 0xfffd
  155. 0x2c 0xfffd
  156. 0x2c 0xfffd
  157. 0x2c 0xfffd
  158. 0x2c 0xfffd
  159. 0x2c 0xfffd
  160. 0x2c 0xfffd
  161. 0x2c 0xfffd
  162. 0x2c 0xfffd
  163. 0x2c 0xfffd
  164. 0x2c 0xfffd
  165. 0x2c 0x221a
  166. 0x2e 0xfffd
  167. 0x2e 0x2192
  168. 0x2f 0x222b
  169. 0x30 0x03b1
  170. 0x31 0x03b2
  171. 0x32 0x03b3
  172. 0x33 0x03b4
  173. 0x34 0x03a3
  174. 0x35 0x03bc
  175. 0x36 0x03bd
  176. 0x37 0x03c9
  177. 0x38 0x03c0
  178. 0x39 0x039e
  179. 0x3a 0x0394
  180. 0x3b 0x039b
  181. 0x3c 0x03a9
  182. 0x3d 0x042a
  183. 0x3e 0x207b
  184. 0x3f 0x207a
  185. 0x40 0x044e
  186. 0x41 0x0430
  187. 0x42 0x0431
  188. 0x43 0x0446
  189. 0x44 0x0434
  190. 0x45 0x0435
  191. 0x46 0x0444
  192. 0x47 0x0433
  193. 0x48 0x0445
  194. 0x49 0x0438
  195. 0x4a 0x0439
  196. 0x4b 0x043a
  197. 0x4c 0x043b
  198. 0x4d 0x043c
  199. 0x4e 0x043d
  200. 0x4f 0x043e
  201. 0x50 0x043f
  202. 0x51 0x044f
  203. 0x52 0x0440
  204. 0x53 0x0441
  205. 0x54 0x0442
  206. 0x55 0x0443
  207. 0x56 0x0436
  208. 0x57 0x0432
  209. 0x58 0x044c
  210. 0x59 0x044b
  211. 0x5a 0x0437
  212. 0x5b 0x0448
  213. 0x5c 0x044d
  214. 0x5d 0x0449
  215. 0x5e 0x0447
  216. 0x5f 0x044a
  217. 0x60 0x042e
  218. 0x61 0x0410
  219. 0x62 0x0411
  220. 0x63 0x0426
  221. 0x64 0x0414
  222. 0x65 0x0415
  223. 0x66 0x0424
  224. 0x67 0x0413
  225. 0x68 0x0425
  226. 0x69 0x0418
  227. 0x6a 0x0419
  228. 0x6b 0x041a
  229. 0x6c 0x041b
  230. 0x6d 0x041c
  231. 0x6e 0x041d
  232. 0x6f 0x041e
  233. 0x70 0x041f
  234. 0x71 0x042f
  235. 0x72 0x0420
  236. 0x73 0x0421
  237. 0x74 0x0422
  238. 0x75 0x0423
  239. 0x76 0x0416
  240. 0x77 0x0412
  241. 0x78 0x042c
  242. 0x79 0x042b
  243. 0x7a 0x0417
  244. 0x7b 0x0428
  245. 0x7c 0x042d
  246. 0x7d 0x0429
  247. 0x7e 0x0427
  248. 0x7f 0x007f
  249. 0x80 0xfffd
  250. 0x81 0xfffd
  251. 0x82 0xfffd
  252. 0x83 0xfffd
  253. 0x84 0xfffd
  254. 0x85 0xfffd
  255. 0x86 0xfffd
  256. 0x87 0xfffd
  257. 0x88 0xfffd
  258. 0x89 0xfffd
  259. 0x8a 0xfffd
  260. 0x8b 0xfffd
  261. 0x8c 0xfffd
  262. 0x8d 0xfffd
  263. 0x8e 0xfffd
  264. 0x8f 0xfffd
  265. 0x90 0xfffd
  266. 0x91 0xfffd
  267. 0x92 0xfffd
  268. 0x93 0xfffd
  269. 0x94 0xfffd
  270. 0x95 0xfffd
  271. 0x96 0xfffd
  272. 0x97 0xfffd
  273. 0x98 0xfffd
  274. 0x99 0xfffd
  275. 0x9a 0xfffd
  276. 0x9b 0xfffd
  277. 0x9c 0xfffd
  278. 0x9d 0xfffd
  279. 0x9e 0xfffd
  280. 0x9f 0xfffd
  281. 0xa0 0xfffd
  282. 0xa1 0xfffd
  283. 0xa2 0xfffd
  284. 0xa3 0xfffd
  285. 0xa4 0xfffd
  286. 0xa5 0xfffd
  287. 0xa6 0xfffd
  288. 0xa7 0xfffd
  289. 0xa8 0xfffd
  290. 0xa9 0xfffd
  291. 0xaa 0xfffd
  292. 0xab 0xfffd
  293. 0xac 0xfffd
  294. 0xad 0xfffd
  295. 0xae 0xfffd
  296. 0xaf 0xfffd
  297. 0xb0 0xfffd
  298. 0xb1 0xfffd
  299. 0xb2 0xfffd
  300. 0xb3 0xfffd
  301. 0xb4 0xfffd
  302. 0xb5 0xfffd
  303. 0xb6 0xfffd
  304. 0xb7 0xfffd
  305. 0xb8 0xfffd
  306. 0xb9 0xfffd
  307. 0xba 0xfffd
  308. 0xbb 0xfffd
  309. 0xbc 0xfffd
  310. 0xbd 0xfffd
  311. 0xbe 0xfffd
  312. 0xbf 0xfffd
  313. 0xc0 0xfffd
  314. 0xc1 0xfffd
  315. 0xc2 0xfffd
  316. 0xc3 0xfffd
  317. 0xc4 0xfffd
  318. 0xc5 0xfffd
  319. 0xc6 0xfffd
  320. 0xc7 0xfffd
  321. 0xc8 0xfffd
  322. 0xc9 0xfffd
  323. 0xca 0xfffd
  324. 0xcb 0xfffd
  325. 0xcc 0xfffd
  326. 0xcd 0xfffd
  327. 0xce 0xfffd
  328. 0xcf 0xfffd
  329. 0xd0 0xfffd
  330. 0xd1 0xfffd
  331. 0xd2 0xfffd
  332. 0xd3 0xfffd
  333. 0xd4 0xfffd
  334. 0xd5 0xfffd
  335. 0xd6 0xfffd
  336. 0xd7 0xfffd
  337. 0xd8 0xfffd
  338. 0xd9 0xfffd
  339. 0xda 0xfffd
  340. 0xdb 0xfffd
  341. 0xdc 0xfffd
  342. 0xdd 0xfffd
  343. 0xde 0xfffd
  344. 0xdf 0xfffd
  345. 0xe0 0xfffd
  346. 0xe1 0xfffd
  347. 0xe2 0xfffd
  348. 0xe3 0xfffd
  349. 0xe4 0xfffd
  350. 0xe5 0xfffd
  351. 0xe6 0xfffd
  352. 0xe7 0xfffd
  353. 0xe8 0xfffd
  354. 0xe9 0xfffd
  355. 0xea 0xfffd
  356. 0xeb 0xfffd
  357. 0xec 0xfffd
  358. 0xed 0xfffd
  359. 0xee 0xfffd
  360. 0xef 0xfffd
  361. 0xf0 0xfffd
  362. 0xf1 0xfffd
  363. 0xf2 0xfffd
  364. 0xf3 0xfffd
  365. 0xf4 0xfffd
  366. 0xf5 0xfffd
  367. 0xf6 0xfffd
  368. 0xf7 0xfffd
  369. 0xf8 0xfffd
  370. 0xf9 0xfffd
  371. 0xfa 0xfffd
  372. 0xfb 0xfffd
  373. 0xfc 0xfffd
  374. 0xfd 0xfffd
  375. 0xfe 0xfffd
  376. 0xff 0xfffd