StringUtil.cpp 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. // Copyright 2008 Dolphin Emulator Project
  2. // Licensed under GPLv2+
  3. // Refer to the license.txt file included.
  4. #include <algorithm>
  5. #include <cstdarg>
  6. #include <cstddef>
  7. #include <cstdio>
  8. #include <cstdlib>
  9. #include <cstring>
  10. #include <iomanip>
  11. #include <istream>
  12. #include <limits.h>
  13. #include <string>
  14. #include <vector>
  15. #include "Common/CommonPaths.h"
  16. #include "Common/CommonTypes.h"
  17. #include "Common/StringUtil.h"
  18. #ifdef _WIN32
  19. #include <Windows.h>
  20. #else
  21. #include <iconv.h>
  22. #include <locale.h>
  23. #include <errno.h>
  24. #endif
  25. #if !defined(_WIN32) && !defined(ANDROID)
  26. static locale_t GetCLocale()
  27. {
  28. static locale_t c_locale = newlocale(LC_ALL_MASK, "C", nullptr);
  29. return c_locale;
  30. }
  31. #endif
  32. // faster than sscanf
  33. bool AsciiToHex(const std::string& _szValue, u32& result)
  34. {
  35. // Set errno to a good state.
  36. errno = 0;
  37. char *endptr = nullptr;
  38. const u32 value = strtoul(_szValue.c_str(), &endptr, 16);
  39. if (!endptr || *endptr)
  40. return false;
  41. if (errno == ERANGE)
  42. return false;
  43. result = value;
  44. return true;
  45. }
  46. bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
  47. {
  48. int writtenCount;
  49. #ifdef _WIN32
  50. // You would think *printf are simple, right? Iterate on each character,
  51. // if it's a format specifier handle it properly, etc.
  52. //
  53. // Nooooo. Not according to the C standard.
  54. //
  55. // According to the C99 standard (7.19.6.1 "The fprintf function")
  56. // The format shall be a multibyte character sequence
  57. //
  58. // Because some character encodings might have '%' signs in the middle of
  59. // a multibyte sequence (SJIS for example only specifies that the first
  60. // byte of a 2 byte sequence is "high", the second byte can be anything),
  61. // printf functions have to decode the multibyte sequences and try their
  62. // best to not screw up.
  63. //
  64. // Unfortunately, on Windows, the locale for most languages is not UTF-8
  65. // as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the
  66. // locale, and completely fails when trying to decode UTF-8 as EUC-CN.
  67. //
  68. // On the other hand, the fix is simple: because we use UTF-8, no such
  69. // multibyte handling is required as we can simply assume that no '%' char
  70. // will be present in the middle of a multibyte sequence.
  71. //
  72. // This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l.
  73. static _locale_t c_locale = nullptr;
  74. if (!c_locale)
  75. c_locale = _create_locale(LC_ALL, ".1252");
  76. writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
  77. #else
  78. #if !defined(ANDROID)
  79. locale_t previousLocale = uselocale(GetCLocale());
  80. #endif
  81. writtenCount = vsnprintf(out, outsize, format, args);
  82. #if !defined(ANDROID)
  83. uselocale(previousLocale);
  84. #endif
  85. #endif
  86. if (writtenCount > 0 && writtenCount < outsize)
  87. {
  88. out[writtenCount] = '\0';
  89. return true;
  90. }
  91. else
  92. {
  93. out[outsize - 1] = '\0';
  94. return false;
  95. }
  96. }
  97. std::string StringFromFormat(const char* format, ...)
  98. {
  99. va_list args;
  100. va_start(args, format);
  101. std::string res = StringFromFormatV(format, args);
  102. va_end(args);
  103. return std::move(res);
  104. }
  105. std::string StringFromFormatV(const char* format, va_list args)
  106. {
  107. char *buf = nullptr;
  108. #ifdef _WIN32
  109. int required = _vscprintf(format, args);
  110. buf = new char[required + 1];
  111. CharArrayFromFormatV(buf, required + 1, format, args);
  112. std::string temp = buf;
  113. delete[] buf;
  114. #else
  115. #if !defined(ANDROID)
  116. locale_t previousLocale = uselocale(GetCLocale());
  117. #endif
  118. if (vasprintf(&buf, format, args) < 0)
  119. ERROR_LOG(COMMON, "Unable to allocate memory for string");
  120. #if !defined(ANDROID)
  121. uselocale(previousLocale);
  122. #endif
  123. std::string temp = buf;
  124. free(buf);
  125. #endif
  126. return std::move(temp);
  127. }
  128. // For Debugging. Read out an u8 array.
  129. std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces)
  130. {
  131. std::ostringstream oss;
  132. oss << std::setfill('0') << std::hex;
  133. for (int line = 0; size; ++data, --size)
  134. {
  135. oss << std::setw(2) << (int)*data;
  136. if (line_len == ++line)
  137. {
  138. oss << '\n';
  139. line = 0;
  140. }
  141. else if (spaces)
  142. oss << ' ';
  143. }
  144. return oss.str();
  145. }
  146. // Turns " hej " into "hej". Also handles tabs.
  147. std::string StripSpaces(const std::string &str)
  148. {
  149. const size_t s = str.find_first_not_of(" \t\r\n");
  150. if (str.npos != s)
  151. return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
  152. else
  153. return "";
  154. }
  155. // "\"hello\"" is turned to "hello"
  156. // This one assumes that the string has already been space stripped in both
  157. // ends, as done by StripSpaces above, for example.
  158. std::string StripQuotes(const std::string& s)
  159. {
  160. if (s.size() && '\"' == s[0] && '\"' == *s.rbegin())
  161. return s.substr(1, s.size() - 2);
  162. else
  163. return s;
  164. }
  165. bool TryParse(const std::string &str, u32 *const output)
  166. {
  167. char *endptr = nullptr;
  168. // Reset errno to a value other than ERANGE
  169. errno = 0;
  170. unsigned long value = strtoul(str.c_str(), &endptr, 0);
  171. if (!endptr || *endptr)
  172. return false;
  173. if (errno == ERANGE)
  174. return false;
  175. #if ULONG_MAX > UINT_MAX
  176. if (value >= 0x100000000ull &&
  177. value <= 0xFFFFFFFF00000000ull)
  178. return false;
  179. #endif
  180. *output = static_cast<u32>(value);
  181. return true;
  182. }
  183. bool TryParse(const std::string &str, bool *const output)
  184. {
  185. if ("1" == str || !strcasecmp("true", str.c_str()))
  186. *output = true;
  187. else if ("0" == str || !strcasecmp("false", str.c_str()))
  188. *output = false;
  189. else
  190. return false;
  191. return true;
  192. }
  193. std::string StringFromInt(int value)
  194. {
  195. char temp[16];
  196. sprintf(temp, "%i", value);
  197. return temp;
  198. }
  199. std::string StringFromBool(bool value)
  200. {
  201. return value ? "True" : "False";
  202. }
  203. bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _pFilename, std::string* _pExtension)
  204. {
  205. if (full_path.empty())
  206. return false;
  207. size_t dir_end = full_path.find_last_of("/"
  208. // Windows needs the : included for something like just "C:" to be considered a directory
  209. #ifdef _WIN32
  210. ":"
  211. #endif
  212. );
  213. if (std::string::npos == dir_end)
  214. dir_end = 0;
  215. else
  216. dir_end += 1;
  217. size_t fname_end = full_path.rfind('.');
  218. if (fname_end < dir_end || std::string::npos == fname_end)
  219. fname_end = full_path.size();
  220. if (_pPath)
  221. *_pPath = full_path.substr(0, dir_end);
  222. if (_pFilename)
  223. *_pFilename = full_path.substr(dir_end, fname_end - dir_end);
  224. if (_pExtension)
  225. *_pExtension = full_path.substr(fname_end);
  226. return true;
  227. }
  228. void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _Path, const std::string& _Filename)
  229. {
  230. _CompleteFilename = _Path;
  231. // check for seperator
  232. if (DIR_SEP_CHR != *_CompleteFilename.rbegin())
  233. _CompleteFilename += DIR_SEP_CHR;
  234. // add the filename
  235. _CompleteFilename += _Filename;
  236. }
  237. void SplitString(const std::string& str, const char delim, std::vector<std::string>& output)
  238. {
  239. std::istringstream iss(str);
  240. output.resize(1);
  241. while (std::getline(iss, *output.rbegin(), delim))
  242. output.push_back("");
  243. output.pop_back();
  244. }
  245. std::string TabsToSpaces(int tab_size, const std::string &in)
  246. {
  247. const std::string spaces(tab_size, ' ');
  248. std::string out(in);
  249. size_t i = 0;
  250. while (out.npos != (i = out.find('\t')))
  251. out.replace(i, 1, spaces);
  252. return out;
  253. }
  254. std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest)
  255. {
  256. size_t pos = 0;
  257. if (src == dest)
  258. return result;
  259. while ((pos = result.find(src, pos)) != std::string::npos)
  260. {
  261. result.replace(pos, src.size(), dest);
  262. pos += dest.length();
  263. }
  264. return result;
  265. }
  266. #ifdef _WIN32
  267. std::string UTF16ToUTF8(const std::wstring& input)
  268. {
  269. auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), (int)input.size(), nullptr, 0, nullptr, nullptr);
  270. std::string output;
  271. output.resize(size);
  272. if (size == 0 || size != WideCharToMultiByte(CP_UTF8, 0, input.data(), (int)input.size(), &output[0], (int)output.size(), nullptr, nullptr))
  273. {
  274. output.clear();
  275. }
  276. return output;
  277. }
  278. std::wstring CPToUTF16(u32 code_page, const std::string& input)
  279. {
  280. auto const size = MultiByteToWideChar(code_page, 0, input.data(), (int)input.size(), nullptr, 0);
  281. std::wstring output;
  282. output.resize(size);
  283. if (size == 0 || size != MultiByteToWideChar(code_page, 0, input.data(), (int)input.size(), &output[0], (int)output.size()))
  284. {
  285. output.clear();
  286. }
  287. return output;
  288. }
  289. std::wstring UTF8ToUTF16(const std::string& input)
  290. {
  291. return CPToUTF16(CP_UTF8, input);
  292. }
  293. std::string SHIFTJISToUTF8(const std::string& input)
  294. {
  295. return UTF16ToUTF8(CPToUTF16(932, input));
  296. }
  297. std::string CP1252ToUTF8(const std::string& input)
  298. {
  299. return UTF16ToUTF8(CPToUTF16(1252, input));
  300. }
  301. #else
  302. template <typename T>
  303. std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
  304. {
  305. std::string result;
  306. iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
  307. if ((iconv_t)-1 == conv_desc)
  308. {
  309. ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
  310. }
  311. else
  312. {
  313. size_t const in_bytes = sizeof(T) * input.size();
  314. size_t const out_buffer_size = 4 * in_bytes;
  315. std::string out_buffer;
  316. out_buffer.resize(out_buffer_size);
  317. auto src_buffer = &input[0];
  318. size_t src_bytes = in_bytes;
  319. auto dst_buffer = &out_buffer[0];
  320. size_t dst_bytes = out_buffer.size();
  321. while (src_bytes != 0)
  322. {
  323. size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
  324. &dst_buffer, &dst_bytes);
  325. if ((size_t)-1 == iconv_result)
  326. {
  327. if (EILSEQ == errno || EINVAL == errno)
  328. {
  329. // Try to skip the bad character
  330. if (src_bytes != 0)
  331. {
  332. --src_bytes;
  333. ++src_buffer;
  334. }
  335. }
  336. else
  337. {
  338. ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
  339. break;
  340. }
  341. }
  342. }
  343. out_buffer.resize(out_buffer_size - dst_bytes);
  344. out_buffer.swap(result);
  345. iconv_close(conv_desc);
  346. }
  347. return result;
  348. }
  349. std::string CP1252ToUTF8(const std::string& input)
  350. {
  351. //return CodeToUTF8("CP1252//TRANSLIT", input);
  352. //return CodeToUTF8("CP1252//IGNORE", input);
  353. return CodeToUTF8("CP1252", input);
  354. }
  355. std::string SHIFTJISToUTF8(const std::string& input)
  356. {
  357. //return CodeToUTF8("CP932", input);
  358. return CodeToUTF8("SJIS", input);
  359. }
  360. std::string UTF16ToUTF8(const std::wstring& input)
  361. {
  362. std::string result = CodeToUTF8("UTF-16LE", input);
  363. // TODO: why is this needed?
  364. result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
  365. return result;
  366. }
  367. #endif