str.hpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /* Declarations for the string type.
  2. This file is part of khipu.
  3. khipu is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. #ifndef __KP_STR__
  14. #define __KP_STR__ 1
  15. #include "bvector.hpp"
  16. KP_DECLS_BEGIN
  17. /* Because strings are immutable and very prevalent as keys in hash-tables
  18. * and the like, it makes sense to actually store the hash code in the
  19. * structure (lazily initialized, of course). */
  20. struct string : public bvector
  21. {
  22. static const int code = typecode::STR;
  23. uint32_t len;
  24. uint32_t hval;
  25. static result<object> make (interpreter *interp, const void *s);
  26. static result<object> make (interpreter *interp, const void *s, uint32_t len);
  27. static string* alloc_raw (uint32_t nb);
  28. inline void local_init (const void *ptr);
  29. inline void local_init (const void *ptr, uint32_t nbytes);
  30. };
  31. // Max value for a character (as a UTF-32 codepoint).
  32. const uint32_t MAX_CHAR = 0x10ffff;
  33. #ifdef KP_ARCH_WIDE
  34. inline constexpr object charobj (uint32_t ch)
  35. {
  36. return (ptrtype ((object)(uint32_t)ch, typecode::CHAR));
  37. }
  38. inline constexpr bool char_p (object obj)
  39. {
  40. return (itype (obj) == typecode::CHAR);
  41. }
  42. inline constexpr uint32_t as_char (object obj)
  43. {
  44. return ((uint32_t)(obj & 0xffffffffu));
  45. }
  46. #else
  47. inline constexpr object charobj (uint32_t ch)
  48. {
  49. return ((ch << 3) | 1);
  50. }
  51. inline constexpr uint32_t as_char (object obj)
  52. {
  53. return ((uint32_t)(obj >> 3));
  54. }
  55. inline constexpr bool char_p (object obj)
  56. {
  57. return ((obj & 3) == 1 && as_char (obj) <= MAX_CHAR);
  58. }
  59. #endif
  60. inline string* as_str (object obj)
  61. {
  62. return ((string *)unmask (obj));
  63. }
  64. #ifdef KP_ARCH_WIDE
  65. inline constexpr bool str_p (object obj)
  66. {
  67. return (itype (obj) == typecode::STR);
  68. }
  69. #else
  70. inline bool str_p (object obj)
  71. {
  72. return (varobj_p (obj) && as_varobj(obj)->vo_type == typecode::STR);
  73. }
  74. #endif
  75. inline uint32_t
  76. len_s (object obj)
  77. {
  78. return (as_str(obj)->len);
  79. }
  80. inline char* fscpy (void *dstp, const void *srcp, int n)
  81. {
  82. char *retp = (char *)dstp;
  83. const char *sp = (const char *)srcp;
  84. switch (n)
  85. {
  86. case 4: *retp++ = *sp++;
  87. case 3: *retp++ = *sp++;
  88. case 2: *retp++ = *sp++;
  89. case 1: *retp++ = *sp++;
  90. }
  91. return (retp);
  92. }
  93. inline const char*
  94. str_cdata (object str)
  95. {
  96. return ((const char *)as_str(str)->data);
  97. }
  98. // Allocate a string with room for NBYTES bytes.
  99. KP_EXPORT result<object> alloc_str (interpreter *interp, uint32_t nbytes);
  100. // Index a string.
  101. KP_EXPORT result<object> get_s (interpreter *interp,
  102. object str, object idx, object dfl);
  103. // Get the subsequence of a string.
  104. KP_EXPORT result<object> subseq_s (interpreter *interp,
  105. object str, object i1, object i2);
  106. // Find a string or character.
  107. KP_EXPORT result<object> find_s (interpreter *interp, object obj, object key,
  108. object start, object end, object test);
  109. // Write a string to a stream.
  110. KP_EXPORT result<int64_t> write_s (interpreter *interp,
  111. stream *strm, object obj, io_info& info);
  112. // Write a character to a stream.
  113. KP_EXPORT result<int64_t> write_c (interpreter *interp,
  114. stream *strm, object obj, io_info& info);
  115. // Serialize a string in a stream.
  116. KP_EXPORT result<int64_t> pack_s (interpreter *interp,
  117. stream *strm, object obj, pack_info& info);
  118. // Serialize a character in a stream.
  119. KP_EXPORT result<int64_t> pack_c (interpreter *interp,
  120. stream *strm, object obj, pack_info& info);
  121. // Deserialize a string from a stream.
  122. KP_EXPORT result<object> unpack_s (interpreter *interp,
  123. stream *strm, pack_info& info, bool save);
  124. // Deserialize a character from a stream.
  125. KP_EXPORT result<object> unpack_c (interpreter *interp,
  126. stream *strm, pack_info& info, bool save);
  127. // Concatenate strings STR1 and STR2.
  128. KP_EXPORT result<object> add_ss (interpreter *interp,
  129. object str1, object str2);
  130. // Concatenate ARGC strings in ARGV.
  131. KP_EXPORT result<object> concat_s (interpreter *interp, object *argv, int argc);
  132. // Add a character to a string.
  133. KP_EXPORT result<object> add_sc (interpreter *interp, object str, object chr);
  134. // Add a string to a character.
  135. KP_EXPORT result<object> add_cs (interpreter *interp, object chr, object str);
  136. // Add 2 characters together.
  137. KP_EXPORT result<object> add_cc (interpreter *interp, object ch1, object ch2);
  138. // Multiply a character with an integer.
  139. KP_EXPORT result<object> mul_ic (interpreter *interp, object ival, object ch);
  140. // Multiply a string with an integer.
  141. KP_EXPORT result<object> mul_is (interpreter *interp, object ival, object str);
  142. // Compute the hashcode of a string.
  143. KP_EXPORT uint32_t hash_s (interpreter *interp, object obj);
  144. // Reverse a string.
  145. KP_EXPORT result<object> reverse_s (interpreter *interp, object obj);
  146. // Iterator interface for strings.
  147. KP_EXPORT result<object> iter_s (interpreter *interp,
  148. object obj, object token, bool adv);
  149. // Get the last character of a string.
  150. KP_EXPORT result<object> last_s (interpreter *interp, object obj);
  151. // Skip table for UTF-8 code points.
  152. extern const uint8_t UTF8_SKIP[256];
  153. // Return the byte offset for the IDX'th codepoint in string S.
  154. KP_EXPORT uint32_t stridx (const string *s, uint32_t idx);
  155. // Return the string index for offset OFF in string S.
  156. KP_EXPORT uint32_t stroff (const string *s, uint32_t off);
  157. // Get the lower bound of bytes that may be advanced for [S .. S + N)
  158. KP_EXPORT uint32_t utf8min (const char *s, uint32_t n);
  159. // Compute the codepoint length in *LENP and return the byte length of S.
  160. KP_EXPORT uint32_t ustrlen (const void *s, uint32_t *lenp);
  161. // Return the codepoint length of [S .. S + BYTES).
  162. KP_EXPORT uint32_t ustrnlen (const void *s, uint32_t bytes);
  163. // Convert the UTF-8 encoded bytes in [S .. S + LEN) to a UTF-32 codepoint.
  164. KP_EXPORT uint32_t u8tou32 (const unsigned char *s, uint32_t len);
  165. // Convert the UTF-32 codepoint CH to a UTF-8 sequence in BUFP.
  166. KP_EXPORT uint32_t u32tou8 (unsigned char *bufp, uint32_t ch);
  167. // Create a stream from string STR.
  168. KP_EXPORT result<stream*> strstream (interpreter *interp,
  169. object str, int mode);
  170. // Get the string from stream STRM.
  171. KP_EXPORT result<object> sstream_get (interpreter *interp, stream *strm);
  172. // Format the arguments in ARGV[1 .. ARGC) according to ARGV[0].
  173. KP_EXPORT result<object> p_fmt_str (interpreter *interp,
  174. object *argv, int argc);
  175. // Return the readable representation of character CH.
  176. KP_EXPORT const char* chobj_repr (uint32_t ch);
  177. // Local initialization methods.
  178. void string::local_init (const void *ptr)
  179. {
  180. this->data = (unsigned char *)ptr;
  181. this->nbytes = ustrlen (ptr, &this->len);
  182. this->hval = 0;
  183. }
  184. void string::local_init (const void *ptr, uint32_t nbytes)
  185. {
  186. this->data = (unsigned char *)ptr;
  187. this->len = ustrnlen (ptr, this->nbytes = nbytes);
  188. this->hval = 0;
  189. }
  190. KP_DECLS_END
  191. #endif