123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- /* Declarations for the string type.
- This file is part of khipu.
- khipu is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
- #ifndef __KP_STR__
- #define __KP_STR__ 1
- #include "bvector.hpp"
- KP_DECLS_BEGIN
- /* Because strings are immutable and very prevalent as keys in hash-tables
- * and the like, it makes sense to actually store the hash code in the
- * structure (lazily initialized, of course). */
- struct string : public bvector
- {
- static const int code = typecode::STR;
- uint32_t len;
- uint32_t hval;
- static result<object> make (interpreter *interp, const void *s);
- static result<object> make (interpreter *interp, const void *s, uint32_t len);
- static string* alloc_raw (uint32_t nb);
- inline void local_init (const void *ptr);
- inline void local_init (const void *ptr, uint32_t nbytes);
- };
- // Max value for a character (as a UTF-32 codepoint).
- const uint32_t MAX_CHAR = 0x10ffff;
- #ifdef KP_ARCH_WIDE
- inline constexpr object charobj (uint32_t ch)
- {
- return (ptrtype ((object)(uint32_t)ch, typecode::CHAR));
- }
- inline constexpr bool char_p (object obj)
- {
- return (itype (obj) == typecode::CHAR);
- }
- inline constexpr uint32_t as_char (object obj)
- {
- return ((uint32_t)(obj & 0xffffffffu));
- }
- #else
- inline constexpr object charobj (uint32_t ch)
- {
- return ((ch << 3) | 1);
- }
- inline constexpr uint32_t as_char (object obj)
- {
- return ((uint32_t)(obj >> 3));
- }
- inline constexpr bool char_p (object obj)
- {
- return ((obj & 3) == 1 && as_char (obj) <= MAX_CHAR);
- }
- #endif
- inline string* as_str (object obj)
- {
- return ((string *)unmask (obj));
- }
- #ifdef KP_ARCH_WIDE
- inline constexpr bool str_p (object obj)
- {
- return (itype (obj) == typecode::STR);
- }
- #else
- inline bool str_p (object obj)
- {
- return (varobj_p (obj) && as_varobj(obj)->vo_type == typecode::STR);
- }
- #endif
- inline uint32_t
- len_s (object obj)
- {
- return (as_str(obj)->len);
- }
- inline char* fscpy (void *dstp, const void *srcp, int n)
- {
- char *retp = (char *)dstp;
- const char *sp = (const char *)srcp;
- switch (n)
- {
- case 4: *retp++ = *sp++;
- case 3: *retp++ = *sp++;
- case 2: *retp++ = *sp++;
- case 1: *retp++ = *sp++;
- }
- return (retp);
- }
- inline const char*
- str_cdata (object str)
- {
- return ((const char *)as_str(str)->data);
- }
- // Allocate a string with room for NBYTES bytes.
- KP_EXPORT result<object> alloc_str (interpreter *interp, uint32_t nbytes);
- // Index a string.
- KP_EXPORT result<object> get_s (interpreter *interp,
- object str, object idx, object dfl);
- // Get the subsequence of a string.
- KP_EXPORT result<object> subseq_s (interpreter *interp,
- object str, object i1, object i2);
- // Find a string or character.
- KP_EXPORT result<object> find_s (interpreter *interp, object obj, object key,
- object start, object end, object test);
- // Write a string to a stream.
- KP_EXPORT result<int64_t> write_s (interpreter *interp,
- stream *strm, object obj, io_info& info);
- // Write a character to a stream.
- KP_EXPORT result<int64_t> write_c (interpreter *interp,
- stream *strm, object obj, io_info& info);
- // Serialize a string in a stream.
- KP_EXPORT result<int64_t> pack_s (interpreter *interp,
- stream *strm, object obj, pack_info& info);
- // Serialize a character in a stream.
- KP_EXPORT result<int64_t> pack_c (interpreter *interp,
- stream *strm, object obj, pack_info& info);
- // Deserialize a string from a stream.
- KP_EXPORT result<object> unpack_s (interpreter *interp,
- stream *strm, pack_info& info, bool save);
- // Deserialize a character from a stream.
- KP_EXPORT result<object> unpack_c (interpreter *interp,
- stream *strm, pack_info& info, bool save);
- // Concatenate strings STR1 and STR2.
- KP_EXPORT result<object> add_ss (interpreter *interp,
- object str1, object str2);
- // Concatenate ARGC strings in ARGV.
- KP_EXPORT result<object> concat_s (interpreter *interp, object *argv, int argc);
- // Add a character to a string.
- KP_EXPORT result<object> add_sc (interpreter *interp, object str, object chr);
- // Add a string to a character.
- KP_EXPORT result<object> add_cs (interpreter *interp, object chr, object str);
- // Add 2 characters together.
- KP_EXPORT result<object> add_cc (interpreter *interp, object ch1, object ch2);
- // Multiply a character with an integer.
- KP_EXPORT result<object> mul_ic (interpreter *interp, object ival, object ch);
- // Multiply a string with an integer.
- KP_EXPORT result<object> mul_is (interpreter *interp, object ival, object str);
- // Compute the hashcode of a string.
- KP_EXPORT uint32_t hash_s (interpreter *interp, object obj);
- // Reverse a string.
- KP_EXPORT result<object> reverse_s (interpreter *interp, object obj);
- // Iterator interface for strings.
- KP_EXPORT result<object> iter_s (interpreter *interp,
- object obj, object token, bool adv);
- // Get the last character of a string.
- KP_EXPORT result<object> last_s (interpreter *interp, object obj);
- // Skip table for UTF-8 code points.
- extern const uint8_t UTF8_SKIP[256];
- // Return the byte offset for the IDX'th codepoint in string S.
- KP_EXPORT uint32_t stridx (const string *s, uint32_t idx);
- // Return the string index for offset OFF in string S.
- KP_EXPORT uint32_t stroff (const string *s, uint32_t off);
- // Get the lower bound of bytes that may be advanced for [S .. S + N)
- KP_EXPORT uint32_t utf8min (const char *s, uint32_t n);
- // Compute the codepoint length in *LENP and return the byte length of S.
- KP_EXPORT uint32_t ustrlen (const void *s, uint32_t *lenp);
- // Return the codepoint length of [S .. S + BYTES).
- KP_EXPORT uint32_t ustrnlen (const void *s, uint32_t bytes);
- // Convert the UTF-8 encoded bytes in [S .. S + LEN) to a UTF-32 codepoint.
- KP_EXPORT uint32_t u8tou32 (const unsigned char *s, uint32_t len);
- // Convert the UTF-32 codepoint CH to a UTF-8 sequence in BUFP.
- KP_EXPORT uint32_t u32tou8 (unsigned char *bufp, uint32_t ch);
- // Create a stream from string STR.
- KP_EXPORT result<stream*> strstream (interpreter *interp,
- object str, int mode);
- // Get the string from stream STRM.
- KP_EXPORT result<object> sstream_get (interpreter *interp, stream *strm);
- // Format the arguments in ARGV[1 .. ARGC) according to ARGV[0].
- KP_EXPORT result<object> p_fmt_str (interpreter *interp,
- object *argv, int argc);
- // Return the readable representation of character CH.
- KP_EXPORT const char* chobj_repr (uint32_t ch);
- // Local initialization methods.
- void string::local_init (const void *ptr)
- {
- this->data = (unsigned char *)ptr;
- this->nbytes = ustrlen (ptr, &this->len);
- this->hval = 0;
- }
- void string::local_init (const void *ptr, uint32_t nbytes)
- {
- this->data = (unsigned char *)ptr;
- this->len = ustrnlen (ptr, this->nbytes = nbytes);
- this->hval = 0;
- }
- KP_DECLS_END
- #endif
|