bvector.cpp 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189
  1. /* Definitions for the byte vector type.
  2. This file is part of khipu.
  3. khipu is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. #include <climits>
  14. #include <cstdio>
  15. #include "khipu.hpp"
  16. KP_DECLS_BEGIN
  17. /* Byte vectors and strings are almost the same in our implementation (Since
  18. * strings are encoded in UTF-8). In order to reduce code bloat, we provide a
  19. * "generic" interface that receives one of these instances that encapsulate
  20. * the differences between the two. */
  21. struct bvargs
  22. {
  23. result<int> (*getidx_fct) (interpreter *, const bvector *,
  24. int, int *, uint32_t);
  25. result<bvector*> (*test) (interpreter *, object);
  26. int hsize;
  27. int rtype;
  28. int flags;
  29. bvector *empty;
  30. result<bvector*> alloc (interpreter *interp, uint32_t nbytes) const
  31. {
  32. if (!nbytes)
  33. {
  34. interp->alval = this->empty->as_obj ();
  35. return (this->empty);
  36. }
  37. auto eg = KP_TRY (evh_guard::make (interp));
  38. bvector *ret = (bvector *)alloch (nbytes + this->hsize + 1, this->rtype);
  39. ret->vo_full |= this->flags;
  40. ret->data = (unsigned char *)ret + this->hsize;
  41. ret->nbytes = nbytes;
  42. ret->data[nbytes] = '\0';
  43. interp->alval = ret->as_obj ();
  44. gc_register (interp, ret, nbytes + this->hsize + 1);
  45. return (ret);
  46. }
  47. result<int> getidx (interpreter *interp, const bvector *bvp,
  48. int pos, int *outp, uint32_t off = 0) const
  49. {
  50. return (this->getidx_fct (interp, bvp, pos, outp, off));
  51. }
  52. };
  53. static bvector empty_bvector;
  54. static string empty_string;
  55. // Upcast a byte vector and get the UTF-8 length.
  56. #define KPSLEN(ptr) ((string *)(ptr))->len
  57. /* Strings need their length set after the operation is done. We rely on
  58. * strings being subtypes of byte vectors (See above) for this.
  59. * Assumes 'args' is bound. */
  60. #define SETLEN(ptr, val) \
  61. if (args.rtype == typecode::STR) \
  62. KPSLEN(ptr) = (val), ((string *)(ptr))->hval = 0u; \
  63. else \
  64. (void)0
  65. bvector* bvector::alloc_raw (uint32_t cap)
  66. {
  67. bvector *retp = (bvector *)alloch (cap + sizeof (*retp), typecode::BVECTOR);
  68. retp->data = (unsigned char *)&retp[1];
  69. retp->nbytes = 0;
  70. return (retp);
  71. }
  72. string* string::alloc_raw (uint32_t cap)
  73. {
  74. string *retp = (string *)alloch (cap + sizeof (*retp), typecode::STR);
  75. retp->data = (unsigned char *)&retp[1];
  76. retp->nbytes = retp->len = 0;
  77. retp->hval = 0u;
  78. return (retp);
  79. }
  80. static result<bvector*>
  81. bytes_subseq (interpreter *interp, const bvector *src,
  82. object ix1, object ix2, const bvargs& args)
  83. {
  84. int i1, i2, o1, o2;
  85. if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
  86. return (interp->raise ("type-error", "index is not an integer"));
  87. i1 = KP_TRY (args.getidx (interp, src, i1, &o1, 1));
  88. i2 = KP_TRY (args.getidx (interp, src, i2, &o2, 1));
  89. if (i1 > i2)
  90. return (interp->raise ("index-error", "indices out of bounds"));
  91. bvector *ret = KP_TRY (args.alloc (interp, i2 - i1));
  92. memcpy (ret->data, src->data + i1, ret->nbytes);
  93. SETLEN (ret, o2 - o1);
  94. return (ret);
  95. }
  96. static result<bvector*>
  97. bytes_rpl (interpreter *interp, const bvector *src,
  98. object ix1, object ix2, object rpl, const bvargs& args)
  99. {
  100. int i1, i2, o1, o2;
  101. if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
  102. return (interp->raise ("type-error", "indexes must be integers"));
  103. i1 = KP_TRY (args.getidx (interp, src, i1, &o1));
  104. i2 = KP_TRY (args.getidx (interp, src, i2, &o2));
  105. if (kp_unlikely (i1 > i2))
  106. return (interp->raise ("index-error", "invalid indices"));
  107. bvector *rp = KP_TRY (args.test (interp, rpl));
  108. bvector *ret = KP_TRY (args.alloc (interp, src->nbytes +
  109. rp->nbytes - i2 + i1));
  110. // ret := src[0:i1] + rpl + src[:i2]
  111. memcpy (ret->data, src->data, i1);
  112. memcpy (ret->data + i1, rp->data, rp->nbytes);
  113. memcpy (ret->data + i1 + rp->nbytes,
  114. src->data + i2, src->nbytes - i2);
  115. SETLEN (ret, KPSLEN (src) + KPSLEN (rp) - o2 + o1);
  116. return (ret);
  117. }
  118. static result<bvector*>
  119. bytes_erase (interpreter *interp, const bvector *src,
  120. object ix1, object ix2, const bvargs& args)
  121. {
  122. int i1, i2, o1, o2;
  123. if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
  124. return (interp->raise ("type-error", "index is not an integer"));
  125. i1 = KP_TRY (args.getidx (interp, src, i1, &o1));
  126. i2 = KP_TRY (args.getidx (interp, src, i2, &o2));
  127. if (kp_unlikely (i1 > i2))
  128. return (interp->raise ("arg-error", "invalid indices"));
  129. bvector *ret = KP_TRY (args.alloc (interp, src->nbytes - i2 + i1));
  130. // ret := src[0:i1] + src[:i2]
  131. memcpy (ret->data, src->data, i1);
  132. memcpy (ret->data + i1, src->data + i2, src->nbytes - i2);
  133. SETLEN (ret, KPSLEN (src) - o2 + o1);
  134. return (ret);
  135. }
  136. static result<bvector*>
  137. bytes_insert (interpreter *interp, const bvector *src,
  138. object ix, object ins, const bvargs& args)
  139. {
  140. int oidx, idx;
  141. if (kp_unlikely (!as<int> (ix, idx)))
  142. return (interp->raise ("type-error", "index is not an integer"));
  143. idx = KP_TRY (args.getidx (interp, src, idx, &oidx));
  144. bvector *insp = KP_TRY (args.test (interp, ins));
  145. bvector *ret = KP_TRY (args.alloc (interp, src->nbytes + insp->nbytes));
  146. // ret := src[0:idx] + insp + src[idx:]
  147. memcpy (ret->data, src->data, idx);
  148. memcpy (ret->data + idx, insp->data, insp->nbytes);
  149. memcpy (ret->data + idx + insp->nbytes,
  150. src->data + idx, src->nbytes - idx);
  151. SETLEN (ret, KPSLEN (src) + KPSLEN (insp));
  152. return (ret);
  153. }
  154. static result<bvector*>
  155. bytes_add (interpreter *interp, const bvector *v1,
  156. const bvector *v2, const bvargs& args)
  157. {
  158. if (args.rtype != typecode::STR)
  159. ;
  160. else if (!v1->nbytes)
  161. return ((bvector *)v2);
  162. else if (!v2->nbytes)
  163. return ((bvector *)v1);
  164. bvector *ret = KP_TRY (args.alloc (interp, v1->nbytes + v2->nbytes));
  165. memcpy (ret->data, v1->data, v1->nbytes);
  166. memcpy (ret->data + v1->nbytes, v2->data, v2->nbytes);
  167. SETLEN (ret, KPSLEN (v1) + KPSLEN (v2));
  168. return (ret);
  169. }
  170. static result<bvector*>
  171. bytes_concat (interpreter *interp, object *argv,
  172. int argc, const bvargs& args)
  173. {
  174. if (argc == 1)
  175. return (as_bvector (*argv));
  176. uint32_t nbytes = 0, len = 0;
  177. for (int i = 0; i < argc; ++i)
  178. {
  179. const bvector *tmp = KP_TRY (args.test (interp, argv[i]));
  180. nbytes += tmp->nbytes;
  181. if (tmp->vo_type == typecode::STR)
  182. len += KPSLEN (tmp);
  183. }
  184. if (args.rtype == typecode::STR &&
  185. nbytes == as_bvector(*argv)->nbytes)
  186. return ((bvector *)as_bvector (*argv));
  187. bvector *ret = KP_TRY (args.alloc (interp, nbytes));
  188. unsigned char *dstp = ret->data;
  189. for (int i = 0; i < argc; ++i)
  190. {
  191. const bvector *tmp = as_bvector (argv[i]);
  192. memcpy (dstp, tmp->data, tmp->nbytes);
  193. dstp += tmp->nbytes;
  194. }
  195. SETLEN (ret, len);
  196. return (ret);
  197. }
  198. static result<bvector*>
  199. bytes_mul (interpreter *interp, const bvector *src,
  200. int n, const bvargs& args)
  201. {
  202. if (n <= 0)
  203. return (args.empty);
  204. else if (n == 1 && args.rtype == typecode::STR)
  205. return ((bvector *)src);
  206. bvector *ret = KP_TRY (args.alloc (interp, src->nbytes * n));
  207. if (src->nbytes == 1)
  208. memset (ret->data, *src->data, n);
  209. else
  210. {
  211. unsigned char *ptr = ret->data;
  212. for (int i = 0; i < n; ++i)
  213. ptr = (unsigned char *)memcpy (ptr, src->data, src->nbytes) +
  214. src->nbytes;
  215. }
  216. SETLEN (ret, KPSLEN (src) * n);
  217. return (ret);
  218. }
  219. static const unsigned char*
  220. bfind_2 (const unsigned char *in, uint32_t n, const unsigned char *b)
  221. {
  222. uint16_t in_h = (uint16_t)in[0] << 8 | in[1];
  223. uint16_t b_h = (uint16_t)b[0] << 8 | b[1];
  224. for (in += 2, n -= 2; n != 0; --n, in_h = (in_h << 8) | *in++)
  225. if (in_h == b_h)
  226. return (in - 2);
  227. return (in_h == b_h ? in - 2 : nullptr);
  228. }
  229. static const unsigned char*
  230. bfind_3 (const unsigned char *in, uint32_t n, const unsigned char *b)
  231. {
  232. uint32_t in_h = (uint32_t)in[0] << 24 | (uint32_t)in[1] << 16 |
  233. (uint32_t)in[2] << 8;
  234. uint32_t b_h = (uint32_t)b[0] << 24 | (uint32_t)b[1] << 16 |
  235. (uint32_t)b[0] << 8;
  236. for (in += 3, n -= 3; n != 0; --n, in_h = (in_h | *in++) << 8)
  237. if (in_h == b_h)
  238. return (in - 3);
  239. return (in_h == b_h ? in - 3 : nullptr);
  240. }
  241. static const unsigned char*
  242. bfind_4 (const unsigned char *in, uint32_t n, const unsigned char *b)
  243. {
  244. uint32_t in_h = (uint32_t)in[0] << 24 | (uint32_t)in[1] << 16 |
  245. (uint32_t)in[2] << 8 | in[3];
  246. uint32_t b_h = (uint32_t)b[0] << 24 | (uint32_t)b[1] << 16 |
  247. (uint32_t)b[2] << 8 | b[3];
  248. for (in += 4, n -= 4; n != 0; --n, in_h = (in_h << 8) | *in++)
  249. if (in_h == b_h)
  250. return (in - 4);
  251. return (in_h == b_h ? in - 4 : nullptr);
  252. }
  253. struct byteset
  254. {
  255. uintptr_t s[32 / sizeof (uintptr_t)];
  256. byteset ()
  257. {
  258. for (int i = 0; i < (int)KP_NELEM (this->s); ++i)
  259. this->s[i] = 0;
  260. }
  261. void set (uint32_t byte)
  262. {
  263. const size_t SHIFT = 8 * sizeof (*this->s);
  264. this->s[byte / SHIFT] |= (uintptr_t)1 << (byte % SHIFT);
  265. }
  266. bool tst (uint32_t byte)
  267. {
  268. const size_t SHIFT = 8 * sizeof (*this->s);
  269. return ((this->s[byte / SHIFT] & ((uintptr_t)1 << (byte % SHIFT))) != 0);
  270. }
  271. };
  272. static const unsigned char*
  273. bfind_impl (const unsigned char *hp, const unsigned char *zp,
  274. const unsigned char *np, uint32_t nbytes)
  275. {
  276. byteset bset;
  277. uint32_t shift[256];
  278. for (uint32_t i = 0; i < nbytes; ++i)
  279. {
  280. bset.set (np[i]);
  281. shift[np[i]] = i + 1;
  282. }
  283. // Start by computing the maximum length suffix.
  284. uint32_t ix = ~(uint32_t)0, jx = 0, kx = 1, px = 1;
  285. while (jx + kx < nbytes)
  286. {
  287. if (np[ix + kx] == np[jx + kx])
  288. {
  289. if (kx == px)
  290. {
  291. jx += px;
  292. kx = 1;
  293. }
  294. else
  295. ++kx;
  296. }
  297. else if (np[ix + kx] > np[jx + kx])
  298. {
  299. jx += kx;
  300. kx = 1;
  301. px = jx - ix;
  302. }
  303. else
  304. {
  305. ix = jx++;
  306. kx = px = 1;
  307. }
  308. }
  309. uint32_t ms = ix, pz = px;
  310. // Move on to the opposite way.
  311. for (ix = ~(uint32_t)0, jx = 0, jx = px = 1; jx + kx < nbytes; )
  312. {
  313. if (np[ix + kx] == np[jx + kx])
  314. {
  315. if (kx == px)
  316. {
  317. jx += px;
  318. kx = 1;
  319. }
  320. else
  321. ++kx;
  322. }
  323. else if (np[ix + kx] < np[jx + kx])
  324. {
  325. jx += kx;
  326. kx = 1;
  327. px = jx - ix;
  328. }
  329. else
  330. {
  331. ix = jx++;
  332. kx = px = 1;
  333. }
  334. }
  335. if (ix + 1 > ms + 1)
  336. ms = ix;
  337. else
  338. px = pz;
  339. // Test for a repeating needle.
  340. uint32_t mz = nbytes - px;
  341. if (memcmp (np, np + px, ms + 1) != 0)
  342. {
  343. mz = 0;
  344. px = max (ms, nbytes - ms - 1) + 1;
  345. }
  346. for (uint32_t mx = 0 ; ; )
  347. {
  348. if (zp - hp < (ptrdiff_t)nbytes)
  349. return (nullptr);
  350. else if (bset.tst (hp[nbytes - 1]))
  351. {
  352. kx = nbytes - shift[hp[nbytes - 1]];
  353. if (kx != 0)
  354. {
  355. if (kx < mx)
  356. kx = mx;
  357. hp += kx;
  358. mx = 0;
  359. continue;
  360. }
  361. }
  362. else
  363. {
  364. hp += nbytes;
  365. mx = 0;
  366. continue;
  367. }
  368. // Test for the right half.
  369. for (kx = max (ms + 1, mx); kx < nbytes && np[kx] == hp[kx]; ++kx) ;
  370. if (kx < nbytes)
  371. {
  372. hp += kx - ms;
  373. mx = 0;
  374. continue;
  375. }
  376. // Test for the left half.
  377. for (kx = ms + 1; kx > mx && np[kx - 1] == hp[kx - 1]; --kx) ;
  378. if (kx <= mx)
  379. return (hp);
  380. hp += px;
  381. mx = mz;
  382. }
  383. }
  384. static const unsigned char*
  385. bytes_find (const unsigned char *hp, uint32_t hlen,
  386. const unsigned char *np, uint32_t nlen)
  387. {
  388. if (!nlen)
  389. return (hp);
  390. else if (hlen < nlen)
  391. return (nullptr);
  392. auto hx = (const unsigned char *)memchr (hp, *np, hlen);
  393. if (!hx || nlen == 1)
  394. return (hx);
  395. hlen -= hp - hx;
  396. if (hlen < nlen)
  397. return (nullptr);
  398. else if (nlen == 2)
  399. return (bfind_2 (hx, hlen, np));
  400. else if (nlen == 3)
  401. return (bfind_3 (hx, hlen, np));
  402. else if (nlen == 4)
  403. return (bfind_4 (hx, hlen, np));
  404. else
  405. return (bfind_impl (hx, hx + hlen, np, nlen));
  406. }
  407. // Byte vector implementation.
  408. static result<int> getidx_b (interpreter *interp, const bvector *bp,
  409. int idx, int *, uint32_t off)
  410. {
  411. if ((idx < 0 && (idx += bp->nbytes) < 0) ||
  412. (uint32_t)idx >= bp->nbytes + off)
  413. return (interp->raise_oob (idx, bp->nbytes));
  414. return (idx);
  415. }
  416. static result<bvector*> test_b (interpreter *interp, object obj)
  417. {
  418. bvector *ret = as<bvector> (obj);
  419. if (!ret)
  420. return (interp->raise ("type-error", "value is not a bvector"));
  421. return (ret);
  422. }
  423. static const bvargs BV_ARGS =
  424. {
  425. getidx_b,
  426. test_b,
  427. sizeof (bvector),
  428. typecode::BVECTOR,
  429. 0,
  430. &empty_bvector
  431. };
  432. result<object> alloc_bvector (interpreter *interp, uint32_t nbytes)
  433. {
  434. KP_VTRY (BV_ARGS.alloc (interp, nbytes));
  435. return (interp->alval);
  436. }
  437. result<object> get_b (interpreter *interp, object bvec,
  438. object ix, object dfl)
  439. {
  440. int idx;
  441. if (kp_unlikely (dfl != UNBOUND))
  442. return (interp->raise_nargs (2, 2, 3));
  443. else if (kp_unlikely (!as<int> (ix, idx)))
  444. return (interp->raise ("type-error", "index is not an integer"));
  445. const bvector *bp = as_bvector (bvec);
  446. idx = KP_TRY (getidx_b (interp, bp, idx, 0, 0));
  447. kp_return (fixint (bp->data[idx]));
  448. }
  449. result<object> subseq_b (interpreter *interp, object src,
  450. object ix1, object ix2)
  451. {
  452. if (ix2 == UNBOUND)
  453. ix2 = fixint (as_bvector(src)->nbytes);
  454. auto ret = KP_TRY (bytes_subseq (interp, as_bvector (src),
  455. ix1, ix2, BV_ARGS));
  456. kp_return (ret->as_obj ());
  457. }
  458. result<object> rpl_b (interpreter *interp, object src,
  459. object ix1, object ix2, object rpl)
  460. {
  461. auto ret = KP_TRY (bytes_rpl (interp, as_bvector (src),
  462. ix1, ix2, rpl, BV_ARGS));
  463. kp_return (ret->as_obj ());
  464. }
  465. result<object> erase_b (interpreter *interp, object src,
  466. object ix1, object ix2)
  467. {
  468. auto ret = KP_TRY (bytes_erase (interp, as_bvector (src),
  469. ix1, ix2, BV_ARGS));
  470. kp_return (ret->as_obj ());
  471. }
  472. result<object> insert_b (interpreter *interp, object src,
  473. object idx, object ins)
  474. {
  475. auto ret = KP_TRY (bytes_insert (interp, as_bvector (src),
  476. idx, ins, BV_ARGS));
  477. kp_return (ret->as_obj ());
  478. }
  479. result<object> nput_b (interpreter *interp,
  480. object bvec, object ix, object byte)
  481. {
  482. bvector *bp = as_bvector (bvec);
  483. int idx, val;
  484. if (kp_unlikely (!as<int> (ix, idx)))
  485. return (interp->raise ("type-error", "index must be an integer"));
  486. else if (kp_unlikely (!as<int> (byte, val) || (unsigned int)val > 0xff))
  487. return (interp->raise ("arg-error",
  488. "value must be an integer in range [0, 255]"));
  489. else if (kp_unlikely (bp->flagged_p (FLAGS_CONST)))
  490. return (interp->raise_const ());
  491. idx = KP_TRY (getidx_b (interp, bp, idx, 0, 0));
  492. bp->data[idx] = (unsigned char)val;
  493. kp_return (byte);
  494. }
  495. result<object> add_bb (interpreter *interp, object v1, object v2)
  496. {
  497. auto ret = KP_TRY (bytes_add (interp, as_bvector (v1),
  498. as_bvector (v2), BV_ARGS));
  499. kp_return (ret->as_obj ());
  500. }
  501. result<object> concat_b (interpreter *interp, object *argv, int argc)
  502. {
  503. auto ret = KP_TRY (bytes_concat (interp, argv, argc, BV_ARGS));
  504. kp_return (ret->as_obj ());
  505. }
  506. result<object> mul_ib (interpreter *interp, object ix, object bv)
  507. {
  508. auto ret = KP_TRY (bytes_mul (interp, as_bvector (bv),
  509. as_int (ix), BV_ARGS));
  510. kp_return (ret->as_obj ());
  511. }
  512. int cmp_bb (interpreter *interp, object b1, object b2)
  513. {
  514. const bvector *v1 = as_bvector (b1), *v2 = as_bvector (b2);
  515. int len = min (v1->nbytes, v2->nbytes);
  516. int ret = memcmp (v1->data, v2->data, len);
  517. return (ret == 0 ? v1->nbytes - v2->nbytes : ret);
  518. }
  519. bool eq_bb (interpreter *interp, object b1, object b2)
  520. {
  521. const bvector *v1 = as_bvector (b1), *v2 = as_bvector (b2);
  522. return (v1->nbytes == v2->nbytes &&
  523. memcmp (v1->data, v2->data, v2->nbytes) == 0);
  524. }
  525. uint32_t hashbuf (const void *bp, uint32_t len)
  526. {
  527. uint32_t ret = len;
  528. for (uint32_t ix = 0; ix < len; ++ix)
  529. {
  530. ret = (ret << 9) | (ret >> 23);
  531. ret += ((const unsigned char *)bp)[ix];
  532. }
  533. return (ret == 0 ? ~(uint32_t)0 : ret);
  534. }
  535. result<object> copy_b (interpreter *interp, object obj, bool)
  536. {
  537. const bvector *bp = as_bvector (obj);
  538. object ret = KP_TRY (alloc_bvector (interp, bp->nbytes));
  539. memcpy (as_bvector(ret)->data, bp->data, bp->nbytes);
  540. kp_return (ret);
  541. }
  542. result<object> iter_b (interpreter *interp, object obj,
  543. object token, bool adv)
  544. {
  545. if (token == UNBOUND)
  546. kp_return (as_bvector(obj)->nbytes == 0 ? NIL : fixint (0));
  547. else if (!adv)
  548. return (get_b (interp, obj, token, UNBOUND));
  549. else if (!fixint_p (token))
  550. return (interp->raise ("type-error", "token must be an int"));
  551. int ix = as_int (token) + 1;
  552. kp_return ((uint32_t)ix >= as_bvector(obj)->nbytes ? NIL : fixint (ix));
  553. }
  554. uint32_t hash_b (interpreter *, object bv)
  555. {
  556. const bvector *bp = as_bvector (bv);
  557. return (hashbuf (bp->data, bp->nbytes));
  558. }
  559. result<object> nreverse_b (interpreter *interp, object obj)
  560. {
  561. bvector *bp = as_bvector (obj);
  562. if (kp_unlikely (bp->flagged_p (FLAGS_CONST)))
  563. return (interp->raise_const ());
  564. else if (bp->nbytes == 0)
  565. kp_return (obj);
  566. for (uint32_t i = 0, j = bp->nbytes - 1; i < j; ++i, --j)
  567. swap (bp->data[i], bp->data[j]);
  568. kp_return (obj);
  569. }
  570. result<object> reverse_b (interpreter *interp, object obj)
  571. {
  572. const bvector *bp = as_bvector (obj);
  573. if (bp->nbytes == 0)
  574. kp_return (obj);
  575. object ret = KP_TRY (alloc_bvector (interp, bp->nbytes));
  576. for (uint32_t i = 0, j = bp->nbytes - 1; i < bp->nbytes; ++i, --j)
  577. as_bvector(ret)->data[i] = bp->data[j];
  578. kp_return (ret);
  579. }
  580. result<object> find_b (interpreter *interp, object obj,
  581. object key, object start, object end, object test)
  582. {
  583. const bvector *src = as_bvector (obj);
  584. int istart = 0, iend = src->nbytes;
  585. if (start != UNBOUND)
  586. istart = KP_TRY (getidx_b (interp, src, as_int (start), nullptr, 0));
  587. if (end != UNBOUND)
  588. iend = KP_TRY (getidx_b (interp, src, as_int (end), nullptr, 0));
  589. if (istart > iend)
  590. kp_return (NIL);
  591. else if ((istart | iend) < 0 || (uint32_t)iend > src->nbytes)
  592. return (interp->raise ("index-error", "indices out of bounds"));
  593. auto data = src->data + istart;
  594. uint32_t nbytes = (uint32_t)iend - (data - src->data);
  595. if (!fixint_p (key))
  596. {
  597. if (test != UNBOUND)
  598. return (interp->raise ("arg-error", "test function is unsupported"));
  599. const bvector *kb = KP_TRY (test_b (interp, key));
  600. auto pos = bytes_find (data, nbytes, kb->data, kb->nbytes);
  601. kp_return (pos ? fixint (pos - src->data) : NIL);
  602. }
  603. else if (test != UNBOUND)
  604. {
  605. KP_VTRY (interp->growstk (2));
  606. for (uint32_t i = 0; i < nbytes; ++i)
  607. {
  608. *interp->stkend++ = test;
  609. *interp->stkend++ = fixint (data[i]);
  610. *interp->stkend++ = key;
  611. KP_VTRY (call_n (interp, 2));
  612. if (interp->retval != NIL)
  613. kp_return (fixint ((data + i) - src->data));
  614. }
  615. kp_return (NIL);
  616. }
  617. else
  618. {
  619. int iv = as_int (key);
  620. if ((unsigned int)iv > 0xff)
  621. kp_return (NIL);
  622. auto pos = (const unsigned char *)memchr (data, iv, nbytes);
  623. kp_return (pos ? fixint (pos - src->data) : NIL);
  624. }
  625. }
  626. result<int64_t> write_b (interpreter *interp, stream *strm,
  627. object obj, io_info& info)
  628. {
  629. const bvector *bp = as_bvector (obj);
  630. if (info.flags & io_info::FLG_RAW)
  631. return (strm->write (interp, bp->data, bp->nbytes));
  632. int64_t ret = 0;
  633. ret += KP_TRY (strm->write (interp, "#\"", 2));
  634. for (uint32_t i = 0; i < bp->nbytes; ++i)
  635. {
  636. int ch = bp->data[i];
  637. if (ch >= 32 && ch <= 126) // isprint (ch)
  638. { ret += KP_TRY (strm->putb (interp, ch)); }
  639. else
  640. {
  641. int p1 = ch / 16;
  642. int p2 = ch % 16;
  643. char s[] = { '\\', 'x',
  644. (char)(p1 < 10 ? '0' + p1 : 'a' + p1 - 10),
  645. (char)(p2 < 10 ? '0' + p2 : 'a' + p2 - 10) };
  646. ret += KP_TRY (strm->write (interp, s, sizeof (s)));
  647. }
  648. }
  649. ret += KP_TRY (strm->putb (interp, '"'));
  650. return (ret);
  651. }
  652. result<int64_t> pack_b (interpreter *interp, stream *strm,
  653. object obj, pack_info&)
  654. {
  655. const bvector *bvp = as_bvector (obj);
  656. int64_t ret = 0;
  657. if (kp_likely (bvp->nbytes <= 0xff))
  658. { ret += KP_TRY (strm->putb (interp, (unsigned char)bvp->nbytes)); }
  659. else
  660. {
  661. unsigned char buf[sizeof (bvp->nbytes) + 1] = { 0 };
  662. memcpy (&buf[1], &bvp->nbytes, sizeof (bvp->nbytes));
  663. ret += KP_TRY (strm->write (interp, buf, sizeof (buf)));
  664. }
  665. ret += KP_TRY (strm->write (interp, bvp->data, bvp->nbytes));
  666. return (ret);
  667. }
  668. result<int64_t> pack_s (interpreter *interp, stream *strm,
  669. object obj, pack_info&)
  670. {
  671. const string *sp = as_str (obj);
  672. int64_t ret = 0;
  673. if (kp_likely (sp->nbytes <= 0xff))
  674. {
  675. unsigned char buf[] = { (unsigned char)sp->nbytes,
  676. (unsigned char)sp->len };
  677. ret += KP_TRY (strm->write (interp, buf, 2));
  678. }
  679. else
  680. {
  681. unsigned char buf[sizeof (sp->nbytes) + sizeof (sp->len) + 1] = { 0 };
  682. memcpy (&buf[1], &sp->nbytes, sizeof (sp->nbytes));
  683. memcpy (&buf[1 + sizeof (sp->nbytes)], &sp->len, sizeof (sp->len));
  684. ret += KP_TRY (strm->write (interp, buf, sizeof (buf)));
  685. }
  686. ret += KP_TRY (strm->write (interp, sp->data, sp->nbytes));
  687. return (ret);
  688. }
  689. result<object> unpack_b (interpreter *interp, stream *strm,
  690. pack_info& info, bool save)
  691. {
  692. uint32_t nb = KP_TRY (strm->getb (interp));
  693. if ((int)nb < 0)
  694. return (info.error ("invalid bvector length"));
  695. else if (nb == 0)
  696. {
  697. bool rv = KP_TRY (strm->sread (interp, &nb));
  698. if (!rv)
  699. return (info.error ("invalid bvector length"));
  700. }
  701. KP_VTRY (alloc_bvector (interp, nb + 1));
  702. bvector *bvp = as_bvector (interp->alval);
  703. bvp->nbytes = KP_TRY (strm->read (interp, bvp->data, nb));
  704. if (bvp->nbytes != nb)
  705. return (info.error ("invalid bvector bytes read"));
  706. else if (save)
  707. KP_VTRY (info.add_mapping (interp, *info.offset, bvp->as_obj ()));
  708. bvp->data[bvp->nbytes] = 0;
  709. kp_return (bvp->as_obj ());
  710. }
  711. result<object> unpack_s (interpreter *interp, stream *strm,
  712. pack_info& info, bool save)
  713. {
  714. uint32_t len, nb = KP_TRY (strm->getb (interp));
  715. if ((int)nb < 0)
  716. return (info.error ("invalid string length"));
  717. else if (nb == 0)
  718. {
  719. bool rv = KP_TRY (strm->sread (interp, &nb));
  720. rv = rv && KP_TRY (strm->sread (interp, &len));
  721. if (!rv)
  722. return (info.error ("invalid string length"));
  723. }
  724. else
  725. {
  726. len = KP_TRY (strm->getb (interp));
  727. if ((int)len < 0)
  728. return (info.error ("invalid string length"));
  729. }
  730. KP_VTRY (alloc_str (interp, nb));
  731. string *sp = as_str (interp->alval);
  732. sp->nbytes = KP_TRY (strm->read (interp, sp->data, nb));
  733. if (sp->nbytes != nb)
  734. return (info.error ("invalid string characters read"));
  735. else if (save)
  736. KP_VTRY (info.add_mapping (interp, *info.offset, sp->as_obj ()));
  737. sp->len = len;
  738. sp->data[sp->nbytes] = '\0';
  739. kp_return (sp->as_obj ());
  740. }
  741. // String implementation.
  742. static result<int>
  743. getidx_s (interpreter *interp, const bvector *bp,
  744. int idx, int *p, uint32_t off)
  745. {
  746. const string *sp = (const string *)bp;
  747. if ((idx < 0 && (idx += sp->len) < 0) || (uint32_t)idx >= sp->len + off)
  748. return (interp->raise_oob (idx, sp->len));
  749. return (stridx (sp, *p = idx));
  750. }
  751. static result<bvector*>
  752. test_s (interpreter *interp, object obj)
  753. {
  754. string *ret = as<string> (obj);
  755. if (!ret)
  756. return (interp->raise ("type-error", "argument is not a string"));
  757. return (ret);
  758. }
  759. static const bvargs STR_ARGS =
  760. {
  761. getidx_s,
  762. test_s,
  763. sizeof (string),
  764. typecode::STR,
  765. FLAGS_CONST,
  766. &empty_string
  767. };
  768. result<object> alloc_str (interpreter *interp, uint32_t nbytes)
  769. {
  770. string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, nbytes));
  771. if (nbytes)
  772. ret->len = ret->hval = 0;
  773. return (interp->alval);
  774. }
  775. result<object> subseq_s (interpreter *interp, object src,
  776. object ix1, object ix2)
  777. {
  778. if (ix2 == UNBOUND)
  779. ix2 = fixint (len_s (src));
  780. auto ret = KP_TRY (bytes_subseq (interp, as_bvector (src),
  781. ix1, ix2, STR_ARGS));
  782. kp_return (ret->as_obj ());
  783. }
  784. result<object> add_ss (interpreter *interp, object s1, object s2)
  785. {
  786. auto ret = KP_TRY (bytes_add (interp, as_bvector (s1),
  787. as_bvector (s2), STR_ARGS));
  788. kp_return (ret->as_obj ());
  789. }
  790. result<object> concat_s (interpreter *interp, object *argv, int argc)
  791. {
  792. auto ret = KP_TRY (bytes_concat (interp, argv, argc, STR_ARGS));
  793. kp_return (ret->as_obj ());
  794. }
  795. static inline result<object>
  796. xadd_sc_cs (interpreter *interp, object s,
  797. object c, bool str_first_p)
  798. {
  799. unsigned char buf[6];
  800. uint32_t len = u32tou8 (buf, as_char (c));
  801. const string *src = as_str (s);
  802. string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, src->nbytes + len));
  803. if (str_first_p)
  804. fscpy ((char *)memcpy (ret->data, src->data, src->nbytes) +
  805. src->nbytes, buf, len);
  806. else
  807. memcpy (fscpy (ret->data, buf, len), src->data, src->nbytes);
  808. ret->len = src->len + 1;
  809. kp_return (interp->alval);
  810. }
  811. result<object> add_sc (interpreter *interp, object s, object c)
  812. {
  813. return (xadd_sc_cs (interp, s, c, true));
  814. }
  815. result<object> add_cs (interpreter *interp, object c, object s)
  816. {
  817. return (xadd_sc_cs (interp, s, c, false));
  818. }
  819. result<object> add_cc (interpreter *interp, object c1, object c2)
  820. {
  821. unsigned char b1[6], b2[6];
  822. uint32_t n1 = u32tou8 (b1, as_char (c1)), n2 = u32tou8 (b2, as_char (c2));
  823. string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, n1 + n2));
  824. fscpy (fscpy (ret->data, b1, n1), b2, n2);
  825. ret->len = 2;
  826. kp_return (interp->alval);
  827. }
  828. result<object> mul_ic (interpreter *interp, object ix, object ch)
  829. {
  830. unsigned char buf[6];
  831. bvector bv;
  832. bv.nbytes = u32tou8 (bv.data = buf, as_char (ch));
  833. auto ret = KP_TRY (bytes_mul (interp, &bv, as_int (ix), STR_ARGS));
  834. kp_return (ret->as_obj ());
  835. }
  836. result<object> mul_is (interpreter *interp, object ix, object s)
  837. {
  838. auto ret = KP_TRY (bytes_mul (interp, as_bvector (s), as_int (ix), STR_ARGS));
  839. kp_return (ret->as_obj ());
  840. }
  841. result<object> get_s (interpreter *interp, object s, object ix, object dfl)
  842. {
  843. int idx;
  844. if (kp_unlikely (dfl != UNBOUND))
  845. return (interp->raise_nargs (2, 2, 3));
  846. else if (kp_unlikely (!as<int> (ix, idx)))
  847. return (interp->raise ("type-error", "index is not an integer"));
  848. const string *sp = as_str (s);
  849. if (idx < 0)
  850. idx = sp->len + idx;
  851. if (kp_unlikely (idx < 0 || (uint32_t)idx >= sp->len))
  852. return (interp->raise_oob (idx, sp->len));
  853. const unsigned char *ptr = sp->data + stridx (sp, idx);
  854. kp_return (charobj (u8tou32 (ptr, UTF8_SKIP[*ptr])));
  855. }
  856. uint32_t hash_s (interpreter *, object s)
  857. {
  858. string *sp = as_str (s);
  859. if (sp->hval == 0)
  860. sp->hval = hashbuf (sp->data, sp->nbytes);
  861. return (sp->hval);
  862. }
  863. result<object> last_b (interpreter *interp, object obj)
  864. {
  865. const bvector *bvp = as_bvector (obj);
  866. if (!bvp->nbytes)
  867. return (interp->raise_oob (0, 0));
  868. kp_return (fixint (bvp->data[bvp->nbytes - 1]));
  869. }
  870. // Stream interface.
  871. struct bvstream_info
  872. {
  873. unsigned char *datap;
  874. uint32_t curpos;
  875. uint32_t nmax;
  876. uint32_t nbytes;
  877. };
  878. static result<int64_t>
  879. bv_read (interpreter *, stream& strm, void *dstp, uint64_t bytes)
  880. {
  881. auto dp = (bvstream_info *)strm.cookie;
  882. uint32_t rb = (uint32_t)(dp->nbytes - dp->curpos);
  883. rb = min ((uint64_t)rb, bytes);
  884. memcpy (dstp, dp->datap + dp->curpos, rb);
  885. dp->curpos += rb;
  886. return ((int64_t)rb);
  887. }
  888. static result<int64_t>
  889. bv_write (interpreter *interp, stream& strm, const void *src, uint64_t bytes)
  890. {
  891. auto dp = (bvstream_info *)strm.cookie;
  892. if (dp->curpos + bytes >= dp->nmax)
  893. {
  894. uint32_t nsz = upsize (dp->curpos + bytes + 1);
  895. dp->datap = (unsigned char *)xrealloc (dp->datap, dp->nmax = nsz);
  896. }
  897. memcpy (dp->datap + dp->curpos, src, bytes);
  898. if ((dp->curpos += bytes) > dp->nbytes)
  899. dp->nbytes = dp->curpos;
  900. return ((int64_t)bytes);
  901. }
  902. static result<bool>
  903. bv_seek (interpreter *interp, stream& strm, spos& pos, int whence)
  904. {
  905. auto dp = (bvstream_info *)strm.cookie;
  906. int64_t roff = pos.offset +
  907. (whence == SEEK_SET ? 0 : whence == SEEK_CUR ?
  908. dp->curpos : dp->nbytes);
  909. if (roff < 0)
  910. return (false);
  911. else if (roff > dp->nbytes)
  912. {
  913. if (!(strm.io_flags & STRM_WRITE) || roff > UINT32_MAX)
  914. return (false);
  915. else if (roff > dp->nmax)
  916. dp->datap = (unsigned char *)xrealloc (dp->datap,
  917. dp->nmax = upsize (roff + 1));
  918. memset (dp->datap + dp->nbytes, 0, roff - dp->nbytes);
  919. }
  920. if ((dp->curpos = (uint32_t)roff) > dp->nbytes)
  921. dp->nbytes = dp->curpos;
  922. pos.offset = roff;
  923. return (true);
  924. }
  925. static bool
  926. bv_close (interpreter *, stream& strm)
  927. {
  928. auto dp = (bvstream_info *)strm.cookie;
  929. xfree (dp->datap);
  930. xfree (dp);
  931. return (true);
  932. }
  933. static const stream::xops bv_ops =
  934. {
  935. bv_read,
  936. bv_write,
  937. bv_seek,
  938. bv_close
  939. };
  940. result<stream*> bvstream (interpreter *interp, object bv, int mode)
  941. {
  942. if (!(mode & STRM_RDWR))
  943. return (nullptr);
  944. auto dp = (bvstream_info *)xmalloc (sizeof (bvstream_info));
  945. dp->nbytes = upsize (as_bvector(bv)->nbytes + 1);
  946. dp->curpos = 0;
  947. dp->datap = (unsigned char *)xmalloc (dp->nbytes);
  948. memcpy (dp->datap, &as_bvector(bv)->data, as_bvector(bv)->nbytes);
  949. auto strm = stream::make (interp, mode, STRM_BUFSIZ, &bv_ops, dp);
  950. if (strm.error_p ())
  951. {
  952. xfree (dp->datap);
  953. xfree (dp);
  954. return (exception ());
  955. }
  956. return (deref (strm));
  957. }
  958. result<object> bvstream_get (interpreter *interp, stream *strm)
  959. {
  960. if (strm->io_flags & STRM_CLOSED)
  961. return (interp->raise ("arg-error", "stream has been closed"));
  962. bool rv = KP_TRY (strm->flush (interp));
  963. if (!rv)
  964. return (interp->raise ("io-error", "failed to flush stream"));
  965. auto dp = (bvstream_info *)strm->cookie;
  966. object ret = KP_TRY (alloc_bvector (interp, dp->nbytes));
  967. memcpy (as_bvector(ret)->data, dp->datap, dp->nbytes);
  968. return (interp->retval);
  969. }
  970. unsigned char* bvstream_data (stream *strm, uint32_t& size)
  971. {
  972. auto dp = (bvstream_info *)strm->cookie;
  973. size = dp->nbytes;
  974. return (dp->datap);
  975. }
  976. static int
  977. do_init_bvector (interpreter *)
  978. {
  979. static const unsigned char empty_data[] = { 0 };
  980. empty_bvector.vo_full = FLAGS_CONST;
  981. empty_bvector.vo_type = typecode::BVECTOR;
  982. empty_string.vo_full = FLAGS_CONST;
  983. empty_string.vo_type = typecode::STR;
  984. empty_string.hval = 1;
  985. empty_bvector.data = empty_string.data = (unsigned char *)empty_data;
  986. return (init_op::result_ok);
  987. }
  988. init_op init_bvector (do_init_bvector, "bvector");
  989. KP_DECLS_END