123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- // Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
- //
- // This program is free software; you can redistribute it and/or modify
- // it under the terms of the GNU General Public License as published by
- // the Free Software Foundation; either version 2 of the License, or
- // (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of the GNU General Public License
- // along with this program; if not, write to the Free Software
- // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- #include <config.h>
- #ifdef HAVE_VASPRINTF
- # ifndef _GNU_SOURCE
- # define _GNU_SOURCE
- # endif
- #endif
- #include <string.h> // strlen
- #include <stdio.h> // vsnprintf, vasprintf
- #include <algorithm> // find
- #include "types.h"
- #include "converters.h" // guess_encoding
- #include "utf8.h"
- void unistring::init_from_utf8(const char *s, int len)
- {
- if (!s) {
- clear();
- } else {
- resize(len);
- int count = utf8_to_unicode(begin(), s, len);
- resize(count);
- }
- }
- void unistring::init_from_utf8(const char *s)
- {
- if (!s)
- clear();
- else
- init_from_utf8(s, strlen(s));
- }
- void unistring::init_from_latin1(const char *s)
- {
- clear();
- if (s)
- while (*s)
- push_back((unsigned char)*s++);
- }
- // init_from_filename() - filenames are supposed to be encoded in
- // UTF-8 nowadays, but this is not guaranteed. This method first
- // checks if it looks like UTF-8; if not, it assumes it's a
- // latin1 (ISO-8859-1) encoding.
- void unistring::init_from_filename(const char *filename)
- {
- const char *guess = guess_encoding(filename, strlen(filename));
- if (guess && STREQ(guess, "UTF-8"))
- init_from_utf8(filename);
- else
- init_from_latin1(filename);
- }
- int unistring::index(unichar ch) const
- {
- int idx = std::find(begin(), end(), ch) - begin();
- if (idx == len())
- idx = -1;
- return idx;
- }
- bool unistring::has_char(unichar ch) const
- {
- return index(ch) != -1;
- }
- int unistring::index(const unistring &sub, int from) const
- {
- if (from >= len())
- return -1;
- const unichar *pos = std::search(begin() + from, end(),
- sub.begin(), sub.end());
- if (pos != end())
- return pos - begin();
- else
- return -1;
- }
- // locale-independent toupper()
- unistring unistring::toupper_ascii() const
- {
- unistring ret = *this;
- for (size_type i = 0; i < size(); i++) {
- if (ret[i] >= 'a' && ret[i] <= 'z')
- ret[i] += 'A' - 'a';
- }
- return ret;
- }
- void u8string::init_from_unichars(const unichar *src, int len)
- {
- char *buf = new char[len * 6 + 1]; // max utf-8 sequence is 6 bytes.
- buf[ unicode_to_utf8(buf, src, len) ] = 0;
- *this = buf;
- delete buf;
- }
- void u8string::init_from_unichars(const unistring &str)
- {
- init_from_unichars(str.begin(), str.size());
- }
- int u8string::index(const char *s, int from) const
- {
- if (from >= len())
- return -1;
- const char *pos = std::search(&*(begin() + from), &*end(),
- s, s + strlen(s));
- if (pos != &*end())
- return pos - &*begin();
- else
- return -1;
- }
- inline bool is_ascii_ws(char ch)
- {
- return ch == ' ' || ch == '\t' || ch == '\n';
- }
- void u8string::inplace_trim()
- {
- while (size() && is_ascii_ws((*this)[0]))
- erase(begin(), begin()+1);
- while (size() && is_ascii_ws((*this)[this->size()-1]))
- erase(end()-1, end());
- }
- u8string u8string::trim() const
- {
- u8string ret = *this;
- ret.inplace_trim();
- return ret;
- }
- // locale-independent toupper()
- u8string u8string::toupper_ascii() const
- {
- u8string ret = *this;
- for (size_type i = 0; i < size(); i++) {
- if (ret[i] >= 'a' && ret[i] <= 'z')
- ret[i] += 'A' - 'a';
- }
- return ret;
- }
- u8string u8string::erase_char(char xch) const
- {
- u8string ret;
- for (size_type i = 0; i < size(); i++) {
- if ((*this)[i] != xch)
- ret += (*this)[i];
- }
- return ret;
- }
- void u8string::cformat(const char *fmt, ...)
- {
- va_list ap;
- va_start(ap, fmt);
- vcformat(fmt, ap);
- va_end(ap);
- }
- void u8string::vcformat(const char *fmt, va_list ap)
- {
- #ifdef HAVE_VASPRINTF
- char *buf;
- int result = vasprintf(&buf, fmt, ap);
- if (result != -1 && buf) {
- *this = buf;
- free(buf);
- } else {
- clear();
- }
- #else
- # define MAX_MSG_LEN 4096
- char buf[MAX_MSG_LEN+1];
- buf[MAX_MSG_LEN] = 0;
- # ifdef HAVE_VSNPRINTF
- vsnprintf(buf, MAX_MSG_LEN, fmt, ap);
- # else
- vsprintf(buf, fmt, ap);
- # endif
- *this = buf;
- # undef MAX_MSG_LEN
- #endif
- }
|