nsGBKConvUtil.cpp 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsGBKConvUtil.h"
  6. #include "gbku.h"
  7. #include "nsDebug.h"
  8. #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
  9. //--------------------------------------------------------------------
  10. // nsGBKConvUtil
  11. //--------------------------------------------------------------------
  12. static const char16_t gGBKToUnicodeTable[MAX_GBK_LENGTH] = {
  13. #include "cp936map.h"
  14. };
  15. static const uint16_t gUnicodeToGBKTable[0xA000-0x4e00] = {
  16. #include "cp936invmap.h"
  17. };
  18. bool nsGBKConvUtil::UnicodeToGBKChar(
  19. char16_t aChar, bool aToGL, char*
  20. aOutByte1, char* aOutByte2)
  21. {
  22. bool found=false;
  23. *aOutByte1 = *aOutByte2 = 0;
  24. if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff))
  25. {
  26. // surrogate is not in here
  27. return false;
  28. }
  29. if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF))
  30. {
  31. uint16_t item = gUnicodeToGBKTable[aChar - 0x4e00];
  32. if(item != 0)
  33. {
  34. *aOutByte1 = item >> 8;
  35. *aOutByte2 = item & 0x00FF;
  36. found = true;
  37. } else {
  38. return false;
  39. }
  40. } else if (aChar == UCS2_NO_MAPPING) {
  41. return false;
  42. } else {
  43. // ugly linear search
  44. for( int32_t i = 0; i < MAX_GBK_LENGTH; i++ )
  45. {
  46. if( aChar == gGBKToUnicodeTable[i])
  47. {
  48. *aOutByte1 = (i / 0x00BF + 0x0081) ;
  49. *aOutByte2 = (i % 0x00BF + 0x0040) ;
  50. found = true;
  51. break;
  52. }
  53. }
  54. }
  55. if(! found)
  56. return false;
  57. if(aToGL) {
  58. // to GL, we only return if it is in the range
  59. if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) &&
  60. UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE))
  61. {
  62. // mask them to GL
  63. *aOutByte1 &= 0x7F;
  64. *aOutByte2 &= 0x7F;
  65. } else {
  66. // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
  67. // it is not a GB2312 character, we cannot map to GL
  68. *aOutByte1 = 0x00;
  69. *aOutByte2 = 0x00;
  70. return false;
  71. }
  72. }
  73. return true;
  74. }
  75. char16_t nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2)
  76. {
  77. NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range");
  78. NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range");
  79. uint8_t i1 = (uint8_t)aByte1;
  80. uint8_t i2 = (uint8_t)aByte2;
  81. uint16_t idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ;
  82. NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB");
  83. // play it safe- add if statement here ot protect ARB
  84. // probably not necessary
  85. if(idx < MAX_GBK_LENGTH)
  86. return gGBKToUnicodeTable[ idx ];
  87. else
  88. return UCS2_NO_MAPPING;
  89. }