types.cc 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
  2. //
  3. // This program is free software; you can redistribute it and/or modify
  4. // it under the terms of the GNU General Public License as published by
  5. // the Free Software Foundation; either version 2 of the License, or
  6. // (at your option) any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
  16. #include <config.h>
  17. #ifdef HAVE_VASPRINTF
  18. # ifndef _GNU_SOURCE
  19. # define _GNU_SOURCE
  20. # endif
  21. #endif
  22. #include <string.h> // strlen
  23. #include <stdio.h> // vsnprintf, vasprintf
  24. #include <algorithm> // find
  25. #include "types.h"
  26. #include "converters.h" // guess_encoding
  27. #include "utf8.h"
  28. void unistring::init_from_utf8(const char *s, int len)
  29. {
  30. if (!s) {
  31. clear();
  32. } else {
  33. resize(len);
  34. int count = utf8_to_unicode(begin(), s, len);
  35. resize(count);
  36. }
  37. }
  38. void unistring::init_from_utf8(const char *s)
  39. {
  40. if (!s)
  41. clear();
  42. else
  43. init_from_utf8(s, strlen(s));
  44. }
  45. void unistring::init_from_latin1(const char *s)
  46. {
  47. clear();
  48. if (s)
  49. while (*s)
  50. push_back((unsigned char)*s++);
  51. }
  52. // init_from_filename() - filenames are supposed to be encoded in
  53. // UTF-8 nowadays, but this is not guaranteed. This method first
  54. // checks if it looks like UTF-8; if not, it assumes it's a
  55. // latin1 (ISO-8859-1) encoding.
  56. void unistring::init_from_filename(const char *filename)
  57. {
  58. const char *guess = guess_encoding(filename, strlen(filename));
  59. if (guess && STREQ(guess, "UTF-8"))
  60. init_from_utf8(filename);
  61. else
  62. init_from_latin1(filename);
  63. }
  64. int unistring::index(unichar ch) const
  65. {
  66. int idx = std::find(begin(), end(), ch) - begin();
  67. if (idx == len())
  68. idx = -1;
  69. return idx;
  70. }
  71. bool unistring::has_char(unichar ch) const
  72. {
  73. return index(ch) != -1;
  74. }
  75. int unistring::index(const unistring &sub, int from) const
  76. {
  77. if (from >= len())
  78. return -1;
  79. const unichar *pos = std::search(begin() + from, end(),
  80. sub.begin(), sub.end());
  81. if (pos != end())
  82. return pos - begin();
  83. else
  84. return -1;
  85. }
  86. // locale-independent toupper()
  87. unistring unistring::toupper_ascii() const
  88. {
  89. unistring ret = *this;
  90. for (size_type i = 0; i < size(); i++) {
  91. if (ret[i] >= 'a' && ret[i] <= 'z')
  92. ret[i] += 'A' - 'a';
  93. }
  94. return ret;
  95. }
  96. void u8string::init_from_unichars(const unichar *src, int len)
  97. {
  98. char *buf = new char[len * 6 + 1]; // max utf-8 sequence is 6 bytes.
  99. buf[ unicode_to_utf8(buf, src, len) ] = 0;
  100. *this = buf;
  101. delete buf;
  102. }
  103. void u8string::init_from_unichars(const unistring &str)
  104. {
  105. init_from_unichars(str.begin(), str.size());
  106. }
  107. int u8string::index(const char *s, int from) const
  108. {
  109. if (from >= len())
  110. return -1;
  111. const char *pos = std::search(&*(begin() + from), &*end(),
  112. s, s + strlen(s));
  113. if (pos != &*end())
  114. return pos - &*begin();
  115. else
  116. return -1;
  117. }
  118. inline bool is_ascii_ws(char ch)
  119. {
  120. return ch == ' ' || ch == '\t' || ch == '\n';
  121. }
  122. void u8string::inplace_trim()
  123. {
  124. while (size() && is_ascii_ws((*this)[0]))
  125. erase(begin(), begin()+1);
  126. while (size() && is_ascii_ws((*this)[this->size()-1]))
  127. erase(end()-1, end());
  128. }
  129. u8string u8string::trim() const
  130. {
  131. u8string ret = *this;
  132. ret.inplace_trim();
  133. return ret;
  134. }
  135. // locale-independent toupper()
  136. u8string u8string::toupper_ascii() const
  137. {
  138. u8string ret = *this;
  139. for (size_type i = 0; i < size(); i++) {
  140. if (ret[i] >= 'a' && ret[i] <= 'z')
  141. ret[i] += 'A' - 'a';
  142. }
  143. return ret;
  144. }
  145. u8string u8string::erase_char(char xch) const
  146. {
  147. u8string ret;
  148. for (size_type i = 0; i < size(); i++) {
  149. if ((*this)[i] != xch)
  150. ret += (*this)[i];
  151. }
  152. return ret;
  153. }
  154. void u8string::cformat(const char *fmt, ...)
  155. {
  156. va_list ap;
  157. va_start(ap, fmt);
  158. vcformat(fmt, ap);
  159. va_end(ap);
  160. }
  161. void u8string::vcformat(const char *fmt, va_list ap)
  162. {
  163. #ifdef HAVE_VASPRINTF
  164. char *buf;
  165. int result = vasprintf(&buf, fmt, ap);
  166. if (result != -1 && buf) {
  167. *this = buf;
  168. free(buf);
  169. } else {
  170. clear();
  171. }
  172. #else
  173. # define MAX_MSG_LEN 4096
  174. char buf[MAX_MSG_LEN+1];
  175. buf[MAX_MSG_LEN] = 0;
  176. # ifdef HAVE_VSNPRINTF
  177. vsnprintf(buf, MAX_MSG_LEN, fmt, ap);
  178. # else
  179. vsprintf(buf, fmt, ap);
  180. # endif
  181. *this = buf;
  182. # undef MAX_MSG_LEN
  183. #endif
  184. }