stringEncodingConverter.c 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #include <string.h>
  2. #include <stdlib.h>
  3. #if defined(__APPLE__)
  4. #include <CoreFoundation/CoreFoundation.h>
  5. #elif defined(_WIN32)
  6. #include <windows.h>
  7. #else
  8. #include <iconv.h>
  9. #include <errno.h>
  10. #endif
  11. #include "stringEncodingConverter.h"
  12. char *convertShiftJISToUTF8(const char *in, size_t inLength)
  13. {
  14. #if defined(__APPLE__)
  15. CFStringRef inStr = CFStringCreateWithBytes(
  16. NULL,
  17. (const UInt8 *)in,
  18. inLength,
  19. kCFStringEncodingDOSJapanese,
  20. false
  21. );
  22. if (!inStr) return NULL;
  23. CFIndex outLength;
  24. CFRange range = CFRangeMake(0, CFStringGetLength(inStr));
  25. CFStringGetBytes(inStr, range, kCFStringEncodingUTF8, '?', false, NULL, 0, &outLength);
  26. char *out = calloc(outLength+1, 1);
  27. CFStringGetBytes(inStr, range, kCFStringEncodingUTF8, '?', false, (UInt8 *)out, outLength, NULL);
  28. CFRelease(inStr);
  29. return out;
  30. #elif defined(_WIN32)
  31. int wideLength = MultiByteToWideChar(932, 0, in, inLength, NULL, 0);
  32. if (wideLength == 0) return NULL;
  33. LPWSTR inWideStr = (LPWSTR)malloc(wideLength * sizeof(WCHAR));
  34. wideLength = MultiByteToWideChar(932, 0, in, inLength, inWideStr, wideLength);
  35. int outLength = WideCharToMultiByte(CP_UTF8, 0, inWideStr, wideLength, NULL, 0, NULL, NULL);
  36. char *out = calloc(outLength+1, 1);
  37. WideCharToMultiByte(CP_UTF8, 0, inWideStr, wideLength, out, outLength, NULL, NULL);
  38. free(inWideStr);
  39. return out;
  40. #else
  41. size_t inBytesLeft, outBytesLeft;
  42. inBytesLeft = inLength;
  43. outBytesLeft = inBytesLeft*3+1;
  44. char *out = (char *)malloc(outBytesLeft);
  45. char *out_p = out;
  46. iconv_t cd = iconv_open("UTF-8", "CP932");
  47. while (1) {
  48. int ret = iconv(cd, (char **)&in, &inBytesLeft, &out_p, &outBytesLeft);
  49. if (ret != -1) {
  50. break;
  51. }
  52. else if (errno == EILSEQ) {
  53. in += 2;
  54. inBytesLeft -= 2;
  55. *out_p++ = '?';
  56. outBytesLeft--;
  57. }
  58. else break;
  59. }
  60. iconv_close(cd);
  61. if (!out_p) {
  62. free(out);
  63. return NULL;
  64. }
  65. *out_p = 0;
  66. /* U+301C, U+2016, U+2212 should be converted to U+FF5E, U+2225, U+FF0D */
  67. out_p = out;
  68. while ((out_p = strstr(out_p, "\xe3\x80\x9c"))) {
  69. *out_p = 0xef;
  70. *(out_p+1) = 0xbd;
  71. *(out_p+2) = 0x9e;
  72. }
  73. out_p = out;
  74. while ((out_p = strstr(out_p, "\xe2\x80\x96"))) {
  75. *out_p = 0xe2;
  76. *(out_p+1) = 0x88;
  77. *(out_p+2) = 0xa5;
  78. }
  79. out_p = out;
  80. while ((out_p = strstr(out_p, "\xe2\x88\x92"))) {
  81. *out_p = 0xef;
  82. *(out_p+1) = 0xbc;
  83. *(out_p+2) = 0x8d;
  84. }
  85. return out;
  86. #endif
  87. }