1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- #include <string.h>
- #include <stdlib.h>
- #if defined(__APPLE__)
- #include <CoreFoundation/CoreFoundation.h>
- #elif defined(_WIN32)
- #include <windows.h>
- #else
- #include <iconv.h>
- #include <errno.h>
- #endif
- #include "stringEncodingConverter.h"
- char *convertShiftJISToUTF8(const char *in, size_t inLength)
- {
- #if defined(__APPLE__)
- CFStringRef inStr = CFStringCreateWithBytes(
- NULL,
- (const UInt8 *)in,
- inLength,
- kCFStringEncodingDOSJapanese,
- false
- );
- if (!inStr) return NULL;
- CFIndex outLength;
- CFRange range = CFRangeMake(0, CFStringGetLength(inStr));
- CFStringGetBytes(inStr, range, kCFStringEncodingUTF8, '?', false, NULL, 0, &outLength);
- char *out = calloc(outLength+1, 1);
- CFStringGetBytes(inStr, range, kCFStringEncodingUTF8, '?', false, (UInt8 *)out, outLength, NULL);
- CFRelease(inStr);
- return out;
- #elif defined(_WIN32)
- int wideLength = MultiByteToWideChar(932, 0, in, inLength, NULL, 0);
- if (wideLength == 0) return NULL;
- LPWSTR inWideStr = (LPWSTR)malloc(wideLength * sizeof(WCHAR));
- wideLength = MultiByteToWideChar(932, 0, in, inLength, inWideStr, wideLength);
- int outLength = WideCharToMultiByte(CP_UTF8, 0, inWideStr, wideLength, NULL, 0, NULL, NULL);
- char *out = calloc(outLength+1, 1);
- WideCharToMultiByte(CP_UTF8, 0, inWideStr, wideLength, out, outLength, NULL, NULL);
- free(inWideStr);
- return out;
- #else
- size_t inBytesLeft, outBytesLeft;
- inBytesLeft = inLength;
- outBytesLeft = inBytesLeft*3+1;
- char *out = (char *)malloc(outBytesLeft);
- char *out_p = out;
- iconv_t cd = iconv_open("UTF-8", "CP932");
- while (1) {
- int ret = iconv(cd, (char **)&in, &inBytesLeft, &out_p, &outBytesLeft);
- if (ret != -1) {
- break;
- }
- else if (errno == EILSEQ) {
- in += 2;
- inBytesLeft -= 2;
- *out_p++ = '?';
- outBytesLeft--;
- }
- else break;
- }
- iconv_close(cd);
- if (!out_p) {
- free(out);
- return NULL;
- }
- *out_p = 0;
- /* U+301C, U+2016, U+2212 should be converted to U+FF5E, U+2225, U+FF0D */
- out_p = out;
- while ((out_p = strstr(out_p, "\xe3\x80\x9c"))) {
- *out_p = 0xef;
- *(out_p+1) = 0xbd;
- *(out_p+2) = 0x9e;
- }
- out_p = out;
- while ((out_p = strstr(out_p, "\xe2\x80\x96"))) {
- *out_p = 0xe2;
- *(out_p+1) = 0x88;
- *(out_p+2) = 0xa5;
- }
- out_p = out;
- while ((out_p = strstr(out_p, "\xe2\x88\x92"))) {
- *out_p = 0xef;
- *(out_p+1) = 0xbc;
- *(out_p+2) = 0x8d;
- }
- return out;
- #endif
- }
|