AdCharFmt.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841
  1. //
  2. //////////////////////////////////////////////////////////////////////////////
  3. //
  4. // Copyright 2015 Autodesk, Inc. All rights reserved.
  5. //
  6. // Use of this software is subject to the terms of the Autodesk license
  7. // agreement provided at the time of installation or download, or which
  8. // otherwise accompanies this software in either electronic or hard copy form.
  9. //
  10. //////////////////////////////////////////////////////////////////////////////
  11. //
  12. // Name: AdCharFmt.h
  13. //
  14. // Description: Enums and utility functions for dealing with
  15. // various char formats (ansi, utf-8, etc)
  16. //
  17. //////////////////////////////////////////////////////////////////////////////
  18. #pragma once
  19. #include "adesk.h"
  20. #include "casuppress.h"
  21. #pragma warning(push, 4)
  22. #ifdef ASSERT
  23. #define AdCharFmt_Assert ASSERT
  24. #elif defined assert
  25. #define AdCharFmt_Assert assert
  26. #elif defined _ASSERTE
  27. #define AdCharFmt_Assert _ASSERTE
  28. #else
  29. #define AdCharFmt_Assert(x)
  30. #endif
  31. class AdCharFormatter
  32. {
  33. public:
  34. enum {
  35. kUnknown = 0,
  36. kAnsi,
  37. // kAnsiCIF, should this be a separate type?
  38. kUtf8,
  39. kUtf16LE,
  40. kUtf16BE,
  41. kUtf32LE,
  42. kUtf32BE
  43. };
  44. enum {
  45. #if defined(WINVER) && (WINVER >= 0x0500)
  46. kNoBestFitFlag = WC_NO_BEST_FIT_CHARS,
  47. #else
  48. kNoBestFitFlag = 0x0400,
  49. #endif
  50. kCIFLen = 7,
  51. kMIFLen = 8
  52. };
  53. AdCharFormatter(unsigned nFormat, bool bUseCIF, bool bExpandLF) :
  54. mnFormat(nFormat), mbUseCIF(bUseCIF),
  55. mbExpandLF(bExpandLF)
  56. {
  57. // doesn't make sense to ask for CIF with non-ansi format
  58. AdCharFmt_Assert(this->mbUseCIF == false || this->mnFormat == kAnsi);
  59. }
  60. // by default, we assume ansi and we *do* expand lfs into cr-lf
  61. AdCharFormatter() : mnFormat(kAnsi), mbUseCIF(false),
  62. mbExpandLF(true) {}
  63. // This function assumes you are passing it the first four
  64. // bytes of the input file for examination.
  65. unsigned static lookupBOM(unsigned nBom)
  66. {
  67. if ((nBom & 0x00ffffff) == 0xbfbbef)
  68. return kUtf8;
  69. if ((nBom & 0x0000ffff) == 0xfeff)
  70. return kUtf16LE;
  71. if ((nBom & 0x0000ffff) == 0xfffe)
  72. return kUtf16BE;
  73. if (nBom == 0x0000feff)
  74. return kUtf32LE;
  75. if (nBom == 0xfffe0000)
  76. return kUtf32BE;
  77. return kUnknown;
  78. }
  79. // Writes the BOM for the given format into the dword and returns
  80. // its length.
  81. int static getBOM(unsigned &nBom, unsigned nFmt)
  82. {
  83. if (nFmt == kUtf8) {
  84. nBom = 0xbfbbef;
  85. return 3;
  86. }
  87. else if (nFmt == kUtf16LE) {
  88. nBom = 0xfeff;
  89. return 2;
  90. }
  91. else if (nFmt == kUtf16BE) {
  92. nBom = 0xfffe;
  93. return 2;
  94. }
  95. else if (nFmt == kUtf32LE) {
  96. nBom = 0x0000feff;
  97. return 4;
  98. }
  99. else if (nFmt == kUtf32BE) {
  100. nBom = 0xfffe0000;
  101. return 4;
  102. }
  103. else {
  104. AdCharFmt_Assert(nFmt == kUnknown || nFmt == kAnsi);
  105. return 0; // ansi or unknown
  106. }
  107. }
  108. // Inserts 7 CIF chars of the form "\U+xxxx" into the destination
  109. // buffer. Assumes there is room in the buffer. Does *not*
  110. // append a null terminator.
  111. template <class ChType> static void putCIF(wchar_t wch, ChType *pDest)
  112. {
  113. pDest[0] = '\\';
  114. pDest[1] = 'U';
  115. pDest[2] = '+';
  116. for (int i = 0; i < 4; i++) {
  117. const ChType n = (ChType)(wch & 0xf); // Lowest 4 bits
  118. pDest[6 - i] = (n <= 9) ? ('0' + n) : ('A' - 10 + n);
  119. wch >>= 4;
  120. }
  121. }
  122. // Non-static methods for getting and setting local state.
  123. //
  124. bool getUseCIF() const { return this->mbUseCIF; }
  125. bool getExpandLF() const { return this->mbExpandLF; }
  126. unsigned getFormat() const { return this->mnFormat; }
  127. bool setUseCIF(bool bUseCIF) {
  128. const bool bOldUseCIF = this->mbUseCIF;
  129. this->mbUseCIF = bUseCIF;
  130. return bOldUseCIF;
  131. }
  132. bool setExpandLF(bool bExpandLF) {
  133. const bool bOldExpandLF = this->mbExpandLF;
  134. this->mbExpandLF = bExpandLF;
  135. return bOldExpandLF;
  136. }
  137. unsigned setFormat(unsigned nFormat) {
  138. const unsigned nOldFormat = this->mnFormat;
  139. this->mnFormat = nFormat;
  140. return nOldFormat;
  141. }
  142. // Static method to get length of a widechar string.
  143. static unsigned wcsLength(const wchar_t * pStr)
  144. {
  145. unsigned nLen = 0;
  146. while (*pStr != L'\0') {
  147. nLen++;
  148. pStr++;
  149. }
  150. return nLen;
  151. }
  152. // Static method
  153. // Requires dest buf is 2 chars for ansi, 4 for utf-8, 7 for ansi+CIF
  154. // Returns number of bytes put into buffer.
  155. static unsigned wcharToAnsiOrUtf8(wchar_t wch, char *pDestBuf,
  156. unsigned nDestBufSize,
  157. bool bToAnsi,
  158. bool bUseCIF, bool bExpandLF)
  159. {
  160. AdCharFmt_Assert(bToAnsi || !bUseCIF); // No CIF with utf-8
  161. AdCharFmt_Assert(nDestBufSize >= 2);
  162. if (nDestBufSize < 2)
  163. return 0;
  164. AdCharFmt_Assert(!bUseCIF || nDestBufSize >= 7); // CIF needs 7
  165. AdCharFmt_Assert(bToAnsi || nDestBufSize >= 4); // utf8 needs 4
  166. if (wch >= 0 && wch <= 0x7f) { // simple ascii
  167. if (wch == L'\n' && bExpandLF) {
  168. pDestBuf[0] = '\r'; // convert lf into cr-lf
  169. pDestBuf[1] = '\n';
  170. return 2;
  171. }
  172. pDestBuf[0] = (char)wch;
  173. AdCharFmt_Assert(pDestBuf[0] == wch);
  174. return 1;
  175. }
  176. BOOL bUsedDefaultChar = false;
  177. const unsigned nCvted = ::WideCharToMultiByte(
  178. // If ansi, then we go to current system code page
  179. bToAnsi ? CP_ACP : CP_UTF8,
  180. // If ansi and not using CIF, try to get best fit
  181. (bToAnsi && bUseCIF) ? kNoBestFitFlag : 0,
  182. &wch,
  183. 1, // a single input widechar
  184. pDestBuf,
  185. bToAnsi ? 2 : 4, // max output chars expected
  186. NULL, // no default char - win32's '?' is fine
  187. bToAnsi ? &bUsedDefaultChar : NULL);
  188. // conversion always succeeds, even on questionable widechar
  189. #ifdef _ADESK_MAC_
  190. // Well, the comment above just isn't true on the MAC, see DID 1326233.
  191. // Our extended chars in that bug get returned with nCvted == 0.
  192. // I couldn't find a proper fix down in WideCharToMultiByteImp()
  193. // so I'm handling it here for now, Mac only.
  194. if (nCvted == 0 && bToAnsi && bUseCIF) {
  195. putCIF<char>(wch, pDestBuf);
  196. return 7; // length of CIF string
  197. }
  198. #endif
  199. AdCharFmt_Assert(nCvted >= 1);
  200. AdCharFmt_Assert(nCvted <= 4);
  201. AdCharFmt_Assert(nCvted <= 2 || !bToAnsi);
  202. AdCharFmt_Assert(!bUsedDefaultChar || bToAnsi);
  203. if (bToAnsi && bUsedDefaultChar && bUseCIF) {
  204. // translation failed - we need to output CIF
  205. putCIF<char>(wch, pDestBuf);
  206. return 7; // length of CIF string
  207. }
  208. else
  209. return nCvted;
  210. }
  211. // static method to output utf-16
  212. static unsigned wcharToUtf16(wchar_t wch, char * pDestBuf,
  213. unsigned nDestBufSize,
  214. bool bLittleEndian, bool bExpandLF)
  215. {
  216. AdCharFmt_Assert(nDestBufSize >= 2);
  217. if (nDestBufSize < 2)
  218. return 0;
  219. unsigned short *pShortDest = (unsigned short *)pDestBuf;
  220. unsigned nBytes = 2;
  221. if (wch == L'\n' && bExpandLF) {
  222. AdCharFmt_Assert(nDestBufSize >= 4);
  223. // '\r' == 0x0d
  224. *pShortDest = bLittleEndian ? 0x000d : 0x0d00;
  225. nBytes += 2;
  226. pShortDest++;
  227. }
  228. if (bLittleEndian)
  229. *pShortDest = wch;
  230. else
  231. *pShortDest = ((wch >> 8) & 0xff) | ((wch << 8) & 0xff00);
  232. return nBytes;
  233. }
  234. // Non-static method. Uses local state info.
  235. // Returns number of bytes put into buffer. Returns 0 if error.
  236. unsigned wcharToBytes(wchar_t wch, char *pDestBuf,
  237. unsigned nDestBufSize) const
  238. {
  239. if (this->mnFormat == kAnsi || this->mnFormat == kUtf8)
  240. return wcharToAnsiOrUtf8(wch, pDestBuf, nDestBufSize,
  241. this->mnFormat == kAnsi,
  242. this->mbUseCIF, this->mbExpandLF);
  243. else if (this->mnFormat == kUtf16LE || this->mnFormat == kUtf16BE) {
  244. AdCharFmt_Assert(!this->mbUseCIF);
  245. return wcharToUtf16(wch, pDestBuf, nDestBufSize,
  246. this->mnFormat == kUtf16LE,
  247. this->mbExpandLF);
  248. }
  249. else {
  250. AdCharFmt_Assert(!this->mbUseCIF);
  251. AdCharFmt_Assert(nDestBufSize >= 4);
  252. AdCharFmt_Assert(this->mnFormat == kUtf32LE ||
  253. this->mnFormat == kUtf32BE);
  254. AdCharFmt_Assert(this->mnFormat != kUtf32LE); // not implemented!
  255. AdCharFmt_Assert(this->mnFormat != kUtf32BE); // not implemented!
  256. return 0;
  257. }
  258. }
  259. template <class ChType> static bool isHex(ChType ch)
  260. {
  261. // true if in range 0..9, a..f or A..F
  262. return (ch >= '0' && ch <= '9') ||
  263. (ch >= 'A' && ch <= 'F') ||
  264. (ch >= 'a' && ch <= 'f');
  265. }
  266. template <class ChType> static bool isHex(ChType ch, unsigned &uVal)
  267. {
  268. if (ch >= '0' && ch <= '9') {
  269. uVal = ch - '0';
  270. AdCharFmt_Assert(uVal <= 9);
  271. return true;
  272. }
  273. if (ch >= 'A' && ch <= 'F') {
  274. uVal = ch - 'A' + 10;
  275. AdCharFmt_Assert(uVal >= 10);
  276. AdCharFmt_Assert(uVal <= 15);
  277. return true;
  278. }
  279. if (ch >= 'a' && ch <= 'f') {
  280. uVal = ch - 'a' + 10;
  281. AdCharFmt_Assert(uVal >= 10);
  282. AdCharFmt_Assert(uVal <= 15);
  283. return true;
  284. }
  285. return false;
  286. }
  287. // Static method.
  288. // Returns true if pSrcBuf contains a CIF sequence, or false otherwise.
  289. // Assumes the string is null terminated
  290. template <class ChType> static bool isCIFString(const ChType *pSrcBuf)
  291. {
  292. AdCharFmt_Assert(pSrcBuf != NULL);
  293. // look for \U+xxxx
  294. if (pSrcBuf[0] == '\\' &&
  295. (pSrcBuf[1] == 'U' || pSrcBuf[1] == 'u') &&
  296. pSrcBuf[2] == '+' &&
  297. isHex<ChType>(pSrcBuf[3]) &&
  298. isHex<ChType>(pSrcBuf[4]) &&
  299. isHex<ChType>(pSrcBuf[5]) &&
  300. isHex<ChType>(pSrcBuf[6]))
  301. return true;
  302. return false;
  303. }
  304. // Takes a string length arg instead of assuming it's null terminated
  305. template <class ChType> static bool isCIFString(
  306. const ChType *pSrcBuf, unsigned nSrcBufSize)
  307. {
  308. AdCharFmt_Assert(pSrcBuf != NULL);
  309. AdCharFmt_Assert(nSrcBufSize > 0);
  310. if(nSrcBufSize < 7)
  311. return false;
  312. return isCIFString<ChType>(pSrcBuf);
  313. }
  314. // Static method. Parse a wide character from a CIF string.
  315. // Returns true if parsing is successful, or false otherwise.
  316. template <class ChType> static bool parseCIF(
  317. const ChType *pSrcBuf, wchar_t &wch)
  318. {
  319. AdCharFmt_Assert(pSrcBuf != NULL);
  320. if(pSrcBuf == NULL)
  321. return false;
  322. // look for "\U+"
  323. if (pSrcBuf[0] != '\\' || (pSrcBuf[1] != 'U' && pSrcBuf[1] != 'u') || pSrcBuf[2] != '+')
  324. return false;
  325. // look for 4 hex digits
  326. unsigned uVal;
  327. if (!isHex<ChType>(pSrcBuf[3], uVal))
  328. return false;
  329. wch = (wchar_t)(uVal << 12);
  330. if (!isHex<ChType>(pSrcBuf[4], uVal))
  331. return false;
  332. wch |= (wchar_t)(uVal << 8);
  333. if (!isHex<ChType>(pSrcBuf[5], uVal))
  334. return false;
  335. wch |= (wchar_t)(uVal << 4);
  336. if (!isHex<ChType>(pSrcBuf[6], uVal))
  337. return false;
  338. wch |= (wchar_t)uVal;
  339. return true;
  340. }
  341. // Static method.
  342. // Returns true if pSrcBuf contains a MIF sequence, or false otherwise.
  343. // Assumes the string is null terminated
  344. template <class ChType> static bool isMIFString(const ChType *pSrcBuf)
  345. {
  346. AdCharFmt_Assert(pSrcBuf != NULL);
  347. // look for \M+nxxyy, where n is 1..5, x and y are hex digits
  348. if (pSrcBuf[0] == '\\' &&
  349. (pSrcBuf[1] == 'M' || pSrcBuf[1] == 'm') &&
  350. pSrcBuf[2] == '+' &&
  351. pSrcBuf[3] >= '1' &&
  352. pSrcBuf[3] <= '5' &&
  353. isHex<ChType>(pSrcBuf[4]) &&
  354. isHex<ChType>(pSrcBuf[5]) &&
  355. isHex<ChType>(pSrcBuf[6]) &&
  356. isHex<ChType>(pSrcBuf[7]))
  357. return true;
  358. return false;
  359. }
  360. // Takes a string length arg instead of assuming it's null terminated
  361. template <class ChType> static bool isMIFString(const ChType *pSrcBuf,
  362. unsigned nSrcBufSize)
  363. {
  364. AdCharFmt_Assert(pSrcBuf != NULL);
  365. AdCharFmt_Assert(nSrcBufSize > 0);
  366. if(nSrcBufSize < 8)
  367. return false;
  368. return isMIFString<ChType>(pSrcBuf);
  369. }
  370. static unsigned winCodePageFromMIFIndex(unsigned nIdx)
  371. {
  372. return nIdx == 1 ? 932 :
  373. nIdx == 2 ? 950 :
  374. nIdx == 3 ? 949 :
  375. nIdx == 4 ? 1361 :
  376. nIdx == 5 ? 936 : 0;
  377. }
  378. static unsigned char MIFIndexFromWinCodePage(unsigned nCodePage)
  379. {
  380. return nCodePage == 932 ? 1 :
  381. nCodePage == 950 ? 2 :
  382. nCodePage == 949 ? 3 :
  383. nCodePage == 1361 ? 4 :
  384. nCodePage == 936 ? 5 : 0;
  385. }
  386. // Assumes output buffer is at least 8 chars. Returns true
  387. // if widechar converted to MIF, false otherwise.
  388. template <class ChType> static bool putMIF(wchar_t wch,
  389. ChType *pMbOut, unsigned nWinCodePage)
  390. {
  391. const unsigned char nMIFIndex = MIFIndexFromWinCodePage(nWinCodePage);
  392. if (nMIFIndex == 0)
  393. return false;
  394. AdCharFmt_Assert(nMIFIndex >= 1);
  395. AdCharFmt_Assert(nMIFIndex <= 5);
  396. char mbBuf[2];
  397. const int nMbChars = isNativeToCodePage(wch, nWinCodePage, mbBuf);
  398. if (nMbChars == 0)
  399. return false;
  400. if (nMbChars == 1) {
  401. mbBuf[1] = mbBuf[0]; // put '0' followed by the char
  402. mbBuf[0] = 0;
  403. }
  404. AdCharFmt_Assert(nMbChars <= 2);
  405. pMbOut[0] = '\\';
  406. pMbOut[1] = 'M';
  407. pMbOut[2] = '+';
  408. pMbOut[3] = '0' + nMIFIndex;
  409. unsigned nNibs[4];
  410. nNibs[0] = mbBuf[0] >> 4;
  411. nNibs[1] = mbBuf[0];
  412. nNibs[2] = mbBuf[1] >> 4;
  413. nNibs[3] = mbBuf[1];
  414. for (int i = 0; i < 4; i++) {
  415. const ChType n = (ChType)(nNibs[i] & 0xf); // Lowest 4 bits
  416. pMbOut[4 + i] = (n <= 9) ? ('0' + n) : ('A' - 10 + n);
  417. }
  418. return true;
  419. }
  420. // Returns 0 if input char doesn't represent a valid MIF codepage
  421. template <class ChType> static unsigned getMIFCodePage(ChType ch)
  422. {
  423. return
  424. ch == '1' ? 932 : // Shift-JIS (Japanese)
  425. ch == '2' ? 950 : // Big-5 (Traditional Chinese)
  426. ch == '3' ? 949 : // KS C-5601-1987 (Wansung)
  427. ch == '4' ? 1361 : // KS C-5601-1992 (Johab)
  428. ch == '5' ? 936 : // GB 2312-80 (Simplified Chinese)
  429. 0;
  430. }
  431. // Static method. Parse a wide character from a MIF string.
  432. // Returns true if parsing is successful, or false otherwise.
  433. template <class ChType> static bool parseMIF(const ChType *pSrcBuf,
  434. wchar_t &wch)
  435. {
  436. // Convert the given MIF sequence \M+xyyzz to wide character.
  437. // x represents the index into the code page array, and yy
  438. // represents the lower order byte in a DBCS character, where
  439. // zz represents the higher order byte in a DBCS character.
  440. AdCharFmt_Assert(pSrcBuf != NULL);
  441. if (pSrcBuf == NULL)
  442. return false;
  443. if (pSrcBuf[0] != '\\' || (pSrcBuf[1] != 'M' && pSrcBuf[1] != 'm') || pSrcBuf[2] != '+')
  444. return false;
  445. const unsigned nCodePage = getMIFCodePage<ChType>(pSrcBuf[3]);
  446. if (nCodePage == 0)
  447. return false; // didn't get a code page
  448. char mbBuf[2];
  449. unsigned uVal;
  450. if (!isHex<ChType>(pSrcBuf[4], uVal))
  451. return false;
  452. mbBuf[0] = (char)(uVal << 4); // leading byte's high nibble
  453. if (!isHex<ChType>(pSrcBuf[5], uVal))
  454. return false;
  455. mbBuf[0] |= (char)uVal; // leading byte's low nibble
  456. if (!isHex<ChType>(pSrcBuf[6], uVal))
  457. return false;
  458. mbBuf[1] = (char)(uVal << 4); // trailing byte's high nibble
  459. if (!isHex<ChType>(pSrcBuf[7], uVal))
  460. return false;
  461. mbBuf[1] |= (char)(uVal); // trailing byte's low nibble
  462. // If leading byte is zero, then it's a single byte ansi char
  463. // That is, "\M+n00xx" is treated as a single byte char.
  464. //
  465. // Note, if we get "\M+nxx00", then it will translate xx
  466. // as a single byte char (if possible) and then will
  467. // append a null terminator.
  468. //
  469. if (mbBuf[0] == 0) {
  470. mbBuf[0] = mbBuf[1];
  471. mbBuf[1] = 0;
  472. }
  473. // Convert the MBCS characters into a single wide character.
  474. const int nCvted = MultiByteToWideChar(
  475. nCodePage,
  476. MB_ERR_INVALID_CHARS, // Fail if we don't get a match
  477. mbBuf, // Source MBCS string.
  478. mbBuf[1] == 0 ? 1 : 2, // Characters in MBCS string.
  479. &wch, // Wide character string.
  480. 1); // Characters in wide string.
  481. // Exactly one wide character should be written, if any.
  482. AdCharFmt_Assert(nCvted >= 0);
  483. AdCharFmt_Assert(nCvted <= 1);
  484. if(nCvted != 1)
  485. return false; // couldn't translate the char?
  486. return true;
  487. }
  488. // Static method. Determine if a given wide character is native
  489. // to the given code page value.
  490. //
  491. // Note: if pChNative is non-null, it should be pointing to a
  492. // 2-byte char buff, in case the native char is double-byte.
  493. //
  494. // Return value:
  495. // This method returns number of bytes in the equivalent MBCS
  496. // character. If the return value is zero, wch is not native to
  497. // code page specified by nCodePage parameter.
  498. //
  499. static int isNativeToCodePage(wchar_t wch, // Input wide char.
  500. unsigned nCodePage, // Code page, can be CP_ACP
  501. char *pChNative = NULL) // Equivalent ansi char(s).
  502. {
  503. if (wch >= 0 && wch <= 0x7f)
  504. {
  505. if (pChNative != NULL) {
  506. *pChNative = (char)wch;
  507. AdCharFmt_Assert(*pChNative == (char)wch);
  508. }
  509. return 1;
  510. }
  511. BOOL bDefaultUsed = FALSE;
  512. char chNative[2];
  513. if (pChNative == NULL)
  514. pChNative = chNative;
  515. const int nCvted = WideCharToMultiByte(nCodePage,
  516. kNoBestFitFlag, // has to be an exact match
  517. &wch, 1, // one input widechar
  518. pChNative, 2, // up to two output ansi chars
  519. NULL, &bDefaultUsed);
  520. // Nothing is converted, or default character used.
  521. if (nCvted <= 0 || bDefaultUsed != FALSE)
  522. return 0; // Not native to code page nCodePage.
  523. return nCvted;
  524. }
  525. // Static method. Converts a given wide character string into
  526. // its ANSI/CIF equivalent.
  527. // This method returns the number of characters written to the
  528. // szDst buffer. If szDst is NULL, the return value indicates
  529. // the size of destination buffer required for the conversion,
  530. // in number of characters (including NULL character).
  531. static int wcharNonNativeToCIF(
  532. const wchar_t *szSrc, // Source wide string.
  533. wchar_t *szDst, // Destination Ansi CIF buffer.
  534. size_t cchDstSize, // Destination buffer size (chars).
  535. bool b2byteCharToCIF = false) //sometimes it is required to Convert non-single byte characters to CIF, no matter it is native or not.
  536. {
  537. #if defined(_ADESK_WINDOWS_)
  538. AdCharFmt_Assert(szSrc != NULL);
  539. AdCharFmt_Assert(szDst == NULL || cchDstSize > 0);
  540. if (szSrc == NULL || (szDst != NULL && cchDstSize <= 0))
  541. return 0; // Ignorable assert.
  542. unsigned int cchWritten = 0;
  543. wchar_t *lpReadPtr = ((wchar_t *) szSrc);
  544. while(*lpReadPtr)
  545. {
  546. // Check buffer size only szDst is provided.
  547. if (szDst && (cchWritten >= cchDstSize - 1))
  548. break; // Running out of space.
  549. const wchar_t wch = *lpReadPtr;
  550. int ret = isNativeToCodePage(wch, CP_ACP);
  551. if (!(b2byteCharToCIF && ret == 2) && ret > 0)
  552. {
  553. if (szDst != NULL)
  554. *szDst++ = wch;
  555. cchWritten++; // One character written.
  556. }
  557. else
  558. {
  559. if (szDst != NULL)
  560. {
  561. AdCharFmt_Assert(cchDstSize - cchWritten >= 8);
  562. if(cchDstSize - cchWritten < 8) // No more space?
  563. break;
  564. putCIF<wchar_t>(wch, szDst);
  565. szDst = szDst + 7;
  566. }
  567. cchWritten += 7; // CIF written.
  568. }
  569. lpReadPtr++; // Next input character.
  570. }
  571. if (szDst != NULL) // NULL-terminate output string.
  572. *szDst = L'\0';
  573. // Return the number of characters written out to szDst, or the
  574. // number of characters required for output if szDst was NULL.
  575. return cchWritten + 1; // Including NULL terminating character.
  576. #else
  577. STUB_WARNING(wcharNonNativeToCIF AdCharFmt.h);
  578. return 0;
  579. #endif
  580. }
  581. // Static helper method to convert existing CIF sequences in a
  582. // wide string back to their native equivalent.
  583. //
  584. // Parameters:
  585. // - szSrc: Source wide string that might contain CIF sequence
  586. // to be converted. This parameter cannot be NULL.
  587. // - szDst: Destination wide string buffer where output is to
  588. // be written. If szDst is NULL and cchDstSize is a
  589. // negative value, resulting string will be written
  590. // to szSrc instead. If szDst is NULL and cchDstSize
  591. // is zero, required buffer size in characters is
  592. // returned. If szDst is not NULL, cchDstSize must be
  593. // greater than zero.
  594. // - cchDstSize: Size of szDst buffer in number of wide chars.
  595. // See description for 'szDst' for more details.
  596. //
  597. // Return value:
  598. // - If szDst is not NULL and cchDstSize is greater than zero,
  599. // the return value indicates the number of wide characters
  600. // written to szDst.
  601. // - If szDst is NULL and cchDstSize is negative, the return
  602. // value is the number of wide characters written to szSrc.
  603. // - If szDst is NULL and cchDstSize is zero, the return value
  604. // is buffer size required (in characters) for conversion.
  605. //
  606. static int wcharFromCIFMIF(
  607. const wchar_t *szSrc, // Source input CIF string.
  608. wchar_t *szDst, // Destination wide buffer.
  609. int cchDstSize) // Destination buffer size (chars).
  610. {
  611. AdCharFmt_Assert(szSrc != NULL);
  612. AdCharFmt_Assert(szDst == NULL || cchDstSize > 0);
  613. AdCharFmt_Assert(szDst != NULL || cchDstSize <= 0);
  614. if (szSrc == NULL)
  615. return 0; // Ignorable assert.
  616. if (szDst != NULL && cchDstSize <= 0)
  617. return 0; // Ignorable assert.
  618. if (szDst == NULL && cchDstSize > 0)
  619. return 0; // Ignorable assert.
  620. int cchWritten = 0;
  621. const wchar_t *lpReadPtr = szSrc; // Read pointer.
  622. wchar_t *lpWritePtr = szDst; // Write pointer.
  623. wchar_t *lpWriteEnd = szDst + cchDstSize;
  624. if (szDst == NULL && cchDstSize < 0) // Write to szSrc.
  625. {
  626. lpWritePtr = const_cast<wchar_t *>(szSrc);
  627. lpWriteEnd = NULL; // result will always fit in dest
  628. }
  629. for (;;) {
  630. if (lpWritePtr && lpWriteEnd && lpWritePtr >= lpWriteEnd)
  631. break; // Ran out of writing space.
  632. const wchar_t wch = *lpReadPtr;
  633. wchar_t wchOut;
  634. if (parseCIF<wchar_t>(lpReadPtr, wchOut))
  635. lpReadPtr += 7; // Skip CIF sequence.
  636. else if (parseMIF<wchar_t>(lpReadPtr, wchOut))
  637. lpReadPtr += 8; // Skip MIF sequence.
  638. else {
  639. wchOut = wch;
  640. lpReadPtr++; // Next input character.
  641. }
  642. if (lpWritePtr != NULL)
  643. *lpWritePtr++ = wchOut;
  644. cchWritten++; // One more char output
  645. if (wch == 0)
  646. break; // end of input string
  647. }
  648. return cchWritten;
  649. }
  650. // Static method. Converts a given wide character string into
  651. // its ANSI/RTF equivalent. For an example, Unicode character
  652. // 0x65e5 (26085) will be formatted to ANSI string "\u26085?".
  653. //
  654. // Parameters:
  655. // - nCodePage: Code page value that the conversion is based on.
  656. // - szSrc: Source wide string to be converted.
  657. // - cchSrcLen: Source wide string length, in the number of
  658. // wide characters (excluding the NULL character).
  659. // If szSrc is a null terminated string, this
  660. // parameter can be -1.
  661. // - szDst: Destination ANSI/RTF output buffer. If this argument
  662. // is NULL, the required buffer size (in bytes) will be
  663. // returned through pcbDstSize argument (include NULL).
  664. // - pcbDstSize: Size of szDst buffer in terms of bytes. After
  665. // conversion is done, this value will be number
  666. // of bytes written out to szDst buffer including
  667. // the null-terminating character.
  668. // If szDst is NULL, caller can determine required
  669. // output buffer size from this parameter.
  670. //
  671. // Return value:
  672. // This method returns the number of wide characters from szSrc
  673. // that have been processed (not including the NULL terminating
  674. // character) and output to szDst.
  675. //
  676. static int wcharNonNativeToRTF(
  677. unsigned nCodePage, // Conversion code page.
  678. const wchar_t *szSrc, // Source wide string.
  679. int cchSrcLen, // Source length, in wide characters.
  680. char *szDst, // RTF (ANSI) output buffer.
  681. int *pcbDstSize) // Output buffer size, in bytes.
  682. {
  683. AdCharFmt_Assert(szSrc != NULL);
  684. AdCharFmt_Assert(pcbDstSize != NULL);
  685. AdCharFmt_Assert(cchSrcLen >= -1);
  686. AdCharFmt_Assert(cchSrcLen < 0x1000000); // 16M sanity check
  687. if (szSrc == NULL || pcbDstSize == NULL)
  688. return 0; // Ignorable assert.
  689. if (cchSrcLen < -1)
  690. return 0; // Ignorable assert.
  691. AdCharFmt_Assert(szDst == NULL || *pcbDstSize > 0);
  692. if (szDst != NULL && *pcbDstSize <= 0)
  693. return 0; // Ignorable assert.
  694. int cbWritten = 0;
  695. int cchProcessed = 0;
  696. for (;;) {
  697. if (cchSrcLen >= 0) { // if caller passed in the src len
  698. AdCharFmt_Assert(cchProcessed <= cchSrcLen);
  699. if (cchProcessed >= cchSrcLen)
  700. break; // we did that many chars, now we're done
  701. }
  702. char szAnsiBuf[2];
  703. const wchar_t wch = szSrc[cchProcessed];
  704. if (wch == L'\0') {
  705. // We don't expect caller to pass us a SrcLen that
  706. // exceeds the length of the string.
  707. if (cchSrcLen < 0)
  708. break; // got the null terminator, we're done
  709. }
  710. const int cbAnsiSize = isNativeToCodePage(
  711. wch, nCodePage, szAnsiBuf);
  712. AdCharFmt_Assert(cbAnsiSize >= 0);
  713. AdCharFmt_Assert(cbAnsiSize <= 2);
  714. if (cbAnsiSize > 0) { // Native to code page value.
  715. if (szDst) {
  716. if (cbWritten + cbAnsiSize >= *pcbDstSize)
  717. break; // Not enough space for writing.
  718. szDst[cbWritten] = szAnsiBuf[0];
  719. if (cbAnsiSize > 1)
  720. szDst[cbWritten+1] = szAnsiBuf[1];
  721. }
  722. cbWritten += cbAnsiSize;
  723. }
  724. else { // Non-native character, convert it!
  725. unsigned short nValue = wch;
  726. AdCharFmt_Assert(nValue >= 128);
  727. int nDigits = 3; // For "\,u,?" characters.
  728. if (nValue < 1000) nDigits += 3;
  729. else if (nValue < 10000) nDigits += 4;
  730. else nDigits += 5;
  731. if (szDst) {
  732. if (cbWritten + nDigits >= *pcbDstSize)
  733. break; // Not enough space for writing.
  734. szDst[cbWritten + 0] = '\\';
  735. szDst[cbWritten + 1] = 'u';
  736. int i = cbWritten + nDigits - 1;
  737. szDst[i] = '?'; // trailing char
  738. do {
  739. AdCharFmt_Assert(i > cbWritten + 1);
  740. i--;
  741. szDst[i] = (char)((nValue % 10) + '0');
  742. nValue /= 10;
  743. } while (nValue != 0);
  744. AdCharFmt_Assert(i == cbWritten + 2); // after the 'u'
  745. }
  746. cbWritten += nDigits;
  747. }
  748. cchProcessed++; // One wide character processed.
  749. }
  750. // Null-terminate the destination buffer only when the source
  751. // buffer length is not specified (i.e. it is null-terminated),
  752. // otherwise, the number of bytes written into szBuf can be
  753. // determined through the argument pcbDstSize.
  754. //
  755. *pcbDstSize = cbWritten; // Not including Null-terminator.
  756. if (cchSrcLen < 0) {
  757. if (szDst) szDst[cbWritten] = 0; // Null-terminate output.
  758. *pcbDstSize = cbWritten + 1; // Bytes written.
  759. }
  760. return cchProcessed; // Wide characters processed.
  761. }
  762. private:
  763. unsigned mnFormat;
  764. bool mbUseCIF;
  765. bool mbExpandLF;
  766. };
  767. #pragma warning(pop)