natInput_EUCJIS.cc 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /* Copyright (C) 1999 Free Software Foundation
  2. This file is part of libgcj.
  3. This software is copyrighted work licensed under the terms of the
  4. Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
  5. details. */
  6. #include <config.h>
  7. #include <gcj/cni.h>
  8. #include <gnu/gcj/convert/Input_EUCJIS.h>
  9. #define ERROR_CHAR 0xFFFD
  10. extern unsigned short JIS0208_to_Unicode[84][94];
  11. extern unsigned short JIS0212_to_Unicode[76][94];
  12. jint
  13. gnu::gcj::convert::Input_EUCJIS::read(jcharArray outbuffer, jint outpos,
  14. jint count)
  15. {
  16. jint start_outpos = outpos;
  17. for (;;)
  18. {
  19. if (outpos - start_outpos >= count)
  20. break;
  21. if (inpos >= inlength)
  22. break;
  23. int b = ((unsigned char*) elements(inbuffer))[inpos++];
  24. if (codeset == 0) // ASCII or JIS-Roman
  25. {
  26. if (b < 128)
  27. {
  28. #if 1
  29. // Technically, we should translate 0x5c to Yen symbol;
  30. // in practice, it is not clear.
  31. if (b == 0x5c)
  32. b = 0x00A5; // Yen sign.
  33. #endif
  34. elements(outbuffer)[outpos++] = (char) b;
  35. }
  36. else
  37. {
  38. if (b == 0x8E) // SS2
  39. codeset = 2;
  40. else if (b == 0x8F) // SS3
  41. codeset = 3;
  42. else
  43. {
  44. codeset = 1;
  45. first_byte = b;
  46. }
  47. }
  48. }
  49. else if (codeset == 1) // JIS X 0208:1997
  50. {
  51. first_byte -= 0x80 + 33;
  52. b -= 0x80 + 33;
  53. if ((unsigned) first_byte >= 84 || (unsigned) b >= 94)
  54. b = ERROR_CHAR;
  55. else
  56. {
  57. b = JIS0208_to_Unicode[first_byte][b];
  58. if (b == 0)
  59. b = ERROR_CHAR;
  60. }
  61. elements(outbuffer)[outpos++] = b;
  62. codeset = 0;
  63. }
  64. else if (codeset == 2) // Half-width katakana
  65. {
  66. if (b >= 0xA1 && b <= 0xDF)
  67. b += 0xFF61 - 0xA1;
  68. else
  69. b = ERROR_CHAR;
  70. elements(outbuffer)[outpos++] = b;
  71. codeset = 0;
  72. }
  73. else if (codeset == 3) // second byte of JIS X 0212-1990
  74. {
  75. first_byte = b;
  76. codeset = 4;
  77. }
  78. else // codeset == 4 // third byte of JIS X 0212-1990
  79. {
  80. first_byte -= 0x80 + 34;
  81. b -= 0x80 + 33;
  82. if ((unsigned) first_byte >= 76 || (unsigned) b >= 94)
  83. b = ERROR_CHAR;
  84. else
  85. {
  86. b = JIS0208_to_Unicode[first_byte][b];
  87. if (b == 0)
  88. b = ERROR_CHAR;
  89. }
  90. elements(outbuffer)[outpos++] = b;
  91. codeset = 0;
  92. }
  93. }
  94. return outpos - start_outpos;
  95. }