1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189 |
- /* Definitions for the byte vector type.
- This file is part of khipu.
- khipu is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
- #include <climits>
- #include <cstdio>
- #include "khipu.hpp"
- KP_DECLS_BEGIN
- /* Byte vectors and strings are almost the same in our implementation (Since
- * strings are encoded in UTF-8). In order to reduce code bloat, we provide a
- * "generic" interface that receives one of these instances that encapsulate
- * the differences between the two. */
- struct bvargs
- {
- result<int> (*getidx_fct) (interpreter *, const bvector *,
- int, int *, uint32_t);
- result<bvector*> (*test) (interpreter *, object);
- int hsize;
- int rtype;
- int flags;
- bvector *empty;
- result<bvector*> alloc (interpreter *interp, uint32_t nbytes) const
- {
- if (!nbytes)
- {
- interp->alval = this->empty->as_obj ();
- return (this->empty);
- }
- auto eg = KP_TRY (evh_guard::make (interp));
- bvector *ret = (bvector *)alloch (nbytes + this->hsize + 1, this->rtype);
- ret->vo_full |= this->flags;
- ret->data = (unsigned char *)ret + this->hsize;
- ret->nbytes = nbytes;
- ret->data[nbytes] = '\0';
- interp->alval = ret->as_obj ();
- gc_register (interp, ret, nbytes + this->hsize + 1);
- return (ret);
- }
- result<int> getidx (interpreter *interp, const bvector *bvp,
- int pos, int *outp, uint32_t off = 0) const
- {
- return (this->getidx_fct (interp, bvp, pos, outp, off));
- }
- };
- static bvector empty_bvector;
- static string empty_string;
- // Upcast a byte vector and get the UTF-8 length.
- #define KPSLEN(ptr) ((string *)(ptr))->len
- /* Strings need their length set after the operation is done. We rely on
- * strings being subtypes of byte vectors (See above) for this.
- * Assumes 'args' is bound. */
- #define SETLEN(ptr, val) \
- if (args.rtype == typecode::STR) \
- KPSLEN(ptr) = (val), ((string *)(ptr))->hval = 0u; \
- else \
- (void)0
- bvector* bvector::alloc_raw (uint32_t cap)
- {
- bvector *retp = (bvector *)alloch (cap + sizeof (*retp), typecode::BVECTOR);
- retp->data = (unsigned char *)&retp[1];
- retp->nbytes = 0;
- return (retp);
- }
- string* string::alloc_raw (uint32_t cap)
- {
- string *retp = (string *)alloch (cap + sizeof (*retp), typecode::STR);
- retp->data = (unsigned char *)&retp[1];
- retp->nbytes = retp->len = 0;
- retp->hval = 0u;
- return (retp);
- }
- static result<bvector*>
- bytes_subseq (interpreter *interp, const bvector *src,
- object ix1, object ix2, const bvargs& args)
- {
- int i1, i2, o1, o2;
- if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
- return (interp->raise ("type-error", "index is not an integer"));
- i1 = KP_TRY (args.getidx (interp, src, i1, &o1, 1));
- i2 = KP_TRY (args.getidx (interp, src, i2, &o2, 1));
- if (i1 > i2)
- return (interp->raise ("index-error", "indices out of bounds"));
- bvector *ret = KP_TRY (args.alloc (interp, i2 - i1));
- memcpy (ret->data, src->data + i1, ret->nbytes);
- SETLEN (ret, o2 - o1);
- return (ret);
- }
- static result<bvector*>
- bytes_rpl (interpreter *interp, const bvector *src,
- object ix1, object ix2, object rpl, const bvargs& args)
- {
- int i1, i2, o1, o2;
- if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
- return (interp->raise ("type-error", "indexes must be integers"));
- i1 = KP_TRY (args.getidx (interp, src, i1, &o1));
- i2 = KP_TRY (args.getidx (interp, src, i2, &o2));
- if (kp_unlikely (i1 > i2))
- return (interp->raise ("index-error", "invalid indices"));
- bvector *rp = KP_TRY (args.test (interp, rpl));
- bvector *ret = KP_TRY (args.alloc (interp, src->nbytes +
- rp->nbytes - i2 + i1));
- // ret := src[0:i1] + rpl + src[:i2]
- memcpy (ret->data, src->data, i1);
- memcpy (ret->data + i1, rp->data, rp->nbytes);
- memcpy (ret->data + i1 + rp->nbytes,
- src->data + i2, src->nbytes - i2);
- SETLEN (ret, KPSLEN (src) + KPSLEN (rp) - o2 + o1);
- return (ret);
- }
- static result<bvector*>
- bytes_erase (interpreter *interp, const bvector *src,
- object ix1, object ix2, const bvargs& args)
- {
- int i1, i2, o1, o2;
- if (kp_unlikely (!as<int> (ix1, i1) || !as<int> (ix2, i2)))
- return (interp->raise ("type-error", "index is not an integer"));
- i1 = KP_TRY (args.getidx (interp, src, i1, &o1));
- i2 = KP_TRY (args.getidx (interp, src, i2, &o2));
- if (kp_unlikely (i1 > i2))
- return (interp->raise ("arg-error", "invalid indices"));
- bvector *ret = KP_TRY (args.alloc (interp, src->nbytes - i2 + i1));
- // ret := src[0:i1] + src[:i2]
- memcpy (ret->data, src->data, i1);
- memcpy (ret->data + i1, src->data + i2, src->nbytes - i2);
- SETLEN (ret, KPSLEN (src) - o2 + o1);
- return (ret);
- }
- static result<bvector*>
- bytes_insert (interpreter *interp, const bvector *src,
- object ix, object ins, const bvargs& args)
- {
- int oidx, idx;
- if (kp_unlikely (!as<int> (ix, idx)))
- return (interp->raise ("type-error", "index is not an integer"));
- idx = KP_TRY (args.getidx (interp, src, idx, &oidx));
- bvector *insp = KP_TRY (args.test (interp, ins));
- bvector *ret = KP_TRY (args.alloc (interp, src->nbytes + insp->nbytes));
- // ret := src[0:idx] + insp + src[idx:]
- memcpy (ret->data, src->data, idx);
- memcpy (ret->data + idx, insp->data, insp->nbytes);
- memcpy (ret->data + idx + insp->nbytes,
- src->data + idx, src->nbytes - idx);
- SETLEN (ret, KPSLEN (src) + KPSLEN (insp));
- return (ret);
- }
- static result<bvector*>
- bytes_add (interpreter *interp, const bvector *v1,
- const bvector *v2, const bvargs& args)
- {
- if (args.rtype != typecode::STR)
- ;
- else if (!v1->nbytes)
- return ((bvector *)v2);
- else if (!v2->nbytes)
- return ((bvector *)v1);
- bvector *ret = KP_TRY (args.alloc (interp, v1->nbytes + v2->nbytes));
- memcpy (ret->data, v1->data, v1->nbytes);
- memcpy (ret->data + v1->nbytes, v2->data, v2->nbytes);
- SETLEN (ret, KPSLEN (v1) + KPSLEN (v2));
- return (ret);
- }
- static result<bvector*>
- bytes_concat (interpreter *interp, object *argv,
- int argc, const bvargs& args)
- {
- if (argc == 1)
- return (as_bvector (*argv));
- uint32_t nbytes = 0, len = 0;
- for (int i = 0; i < argc; ++i)
- {
- const bvector *tmp = KP_TRY (args.test (interp, argv[i]));
- nbytes += tmp->nbytes;
- if (tmp->vo_type == typecode::STR)
- len += KPSLEN (tmp);
- }
- if (args.rtype == typecode::STR &&
- nbytes == as_bvector(*argv)->nbytes)
- return ((bvector *)as_bvector (*argv));
- bvector *ret = KP_TRY (args.alloc (interp, nbytes));
- unsigned char *dstp = ret->data;
- for (int i = 0; i < argc; ++i)
- {
- const bvector *tmp = as_bvector (argv[i]);
- memcpy (dstp, tmp->data, tmp->nbytes);
- dstp += tmp->nbytes;
- }
- SETLEN (ret, len);
- return (ret);
- }
- static result<bvector*>
- bytes_mul (interpreter *interp, const bvector *src,
- int n, const bvargs& args)
- {
- if (n <= 0)
- return (args.empty);
- else if (n == 1 && args.rtype == typecode::STR)
- return ((bvector *)src);
- bvector *ret = KP_TRY (args.alloc (interp, src->nbytes * n));
- if (src->nbytes == 1)
- memset (ret->data, *src->data, n);
- else
- {
- unsigned char *ptr = ret->data;
- for (int i = 0; i < n; ++i)
- ptr = (unsigned char *)memcpy (ptr, src->data, src->nbytes) +
- src->nbytes;
- }
- SETLEN (ret, KPSLEN (src) * n);
- return (ret);
- }
- static const unsigned char*
- bfind_2 (const unsigned char *in, uint32_t n, const unsigned char *b)
- {
- uint16_t in_h = (uint16_t)in[0] << 8 | in[1];
- uint16_t b_h = (uint16_t)b[0] << 8 | b[1];
- for (in += 2, n -= 2; n != 0; --n, in_h = (in_h << 8) | *in++)
- if (in_h == b_h)
- return (in - 2);
- return (in_h == b_h ? in - 2 : nullptr);
- }
- static const unsigned char*
- bfind_3 (const unsigned char *in, uint32_t n, const unsigned char *b)
- {
- uint32_t in_h = (uint32_t)in[0] << 24 | (uint32_t)in[1] << 16 |
- (uint32_t)in[2] << 8;
- uint32_t b_h = (uint32_t)b[0] << 24 | (uint32_t)b[1] << 16 |
- (uint32_t)b[0] << 8;
- for (in += 3, n -= 3; n != 0; --n, in_h = (in_h | *in++) << 8)
- if (in_h == b_h)
- return (in - 3);
- return (in_h == b_h ? in - 3 : nullptr);
- }
- static const unsigned char*
- bfind_4 (const unsigned char *in, uint32_t n, const unsigned char *b)
- {
- uint32_t in_h = (uint32_t)in[0] << 24 | (uint32_t)in[1] << 16 |
- (uint32_t)in[2] << 8 | in[3];
- uint32_t b_h = (uint32_t)b[0] << 24 | (uint32_t)b[1] << 16 |
- (uint32_t)b[2] << 8 | b[3];
- for (in += 4, n -= 4; n != 0; --n, in_h = (in_h << 8) | *in++)
- if (in_h == b_h)
- return (in - 4);
- return (in_h == b_h ? in - 4 : nullptr);
- }
- struct byteset
- {
- uintptr_t s[32 / sizeof (uintptr_t)];
- byteset ()
- {
- for (int i = 0; i < (int)KP_NELEM (this->s); ++i)
- this->s[i] = 0;
- }
- void set (uint32_t byte)
- {
- const size_t SHIFT = 8 * sizeof (*this->s);
- this->s[byte / SHIFT] |= (uintptr_t)1 << (byte % SHIFT);
- }
- bool tst (uint32_t byte)
- {
- const size_t SHIFT = 8 * sizeof (*this->s);
- return ((this->s[byte / SHIFT] & ((uintptr_t)1 << (byte % SHIFT))) != 0);
- }
- };
- static const unsigned char*
- bfind_impl (const unsigned char *hp, const unsigned char *zp,
- const unsigned char *np, uint32_t nbytes)
- {
- byteset bset;
- uint32_t shift[256];
- for (uint32_t i = 0; i < nbytes; ++i)
- {
- bset.set (np[i]);
- shift[np[i]] = i + 1;
- }
- // Start by computing the maximum length suffix.
- uint32_t ix = ~(uint32_t)0, jx = 0, kx = 1, px = 1;
- while (jx + kx < nbytes)
- {
- if (np[ix + kx] == np[jx + kx])
- {
- if (kx == px)
- {
- jx += px;
- kx = 1;
- }
- else
- ++kx;
- }
- else if (np[ix + kx] > np[jx + kx])
- {
- jx += kx;
- kx = 1;
- px = jx - ix;
- }
- else
- {
- ix = jx++;
- kx = px = 1;
- }
- }
- uint32_t ms = ix, pz = px;
- // Move on to the opposite way.
- for (ix = ~(uint32_t)0, jx = 0, jx = px = 1; jx + kx < nbytes; )
- {
- if (np[ix + kx] == np[jx + kx])
- {
- if (kx == px)
- {
- jx += px;
- kx = 1;
- }
- else
- ++kx;
- }
- else if (np[ix + kx] < np[jx + kx])
- {
- jx += kx;
- kx = 1;
- px = jx - ix;
- }
- else
- {
- ix = jx++;
- kx = px = 1;
- }
- }
- if (ix + 1 > ms + 1)
- ms = ix;
- else
- px = pz;
- // Test for a repeating needle.
- uint32_t mz = nbytes - px;
- if (memcmp (np, np + px, ms + 1) != 0)
- {
- mz = 0;
- px = max (ms, nbytes - ms - 1) + 1;
- }
- for (uint32_t mx = 0 ; ; )
- {
- if (zp - hp < (ptrdiff_t)nbytes)
- return (nullptr);
- else if (bset.tst (hp[nbytes - 1]))
- {
- kx = nbytes - shift[hp[nbytes - 1]];
- if (kx != 0)
- {
- if (kx < mx)
- kx = mx;
- hp += kx;
- mx = 0;
- continue;
- }
- }
- else
- {
- hp += nbytes;
- mx = 0;
- continue;
- }
- // Test for the right half.
- for (kx = max (ms + 1, mx); kx < nbytes && np[kx] == hp[kx]; ++kx) ;
- if (kx < nbytes)
- {
- hp += kx - ms;
- mx = 0;
- continue;
- }
- // Test for the left half.
- for (kx = ms + 1; kx > mx && np[kx - 1] == hp[kx - 1]; --kx) ;
- if (kx <= mx)
- return (hp);
- hp += px;
- mx = mz;
- }
- }
- static const unsigned char*
- bytes_find (const unsigned char *hp, uint32_t hlen,
- const unsigned char *np, uint32_t nlen)
- {
- if (!nlen)
- return (hp);
- else if (hlen < nlen)
- return (nullptr);
- auto hx = (const unsigned char *)memchr (hp, *np, hlen);
- if (!hx || nlen == 1)
- return (hx);
- hlen -= hp - hx;
- if (hlen < nlen)
- return (nullptr);
- else if (nlen == 2)
- return (bfind_2 (hx, hlen, np));
- else if (nlen == 3)
- return (bfind_3 (hx, hlen, np));
- else if (nlen == 4)
- return (bfind_4 (hx, hlen, np));
- else
- return (bfind_impl (hx, hx + hlen, np, nlen));
- }
- // Byte vector implementation.
- static result<int> getidx_b (interpreter *interp, const bvector *bp,
- int idx, int *, uint32_t off)
- {
- if ((idx < 0 && (idx += bp->nbytes) < 0) ||
- (uint32_t)idx >= bp->nbytes + off)
- return (interp->raise_oob (idx, bp->nbytes));
- return (idx);
- }
- static result<bvector*> test_b (interpreter *interp, object obj)
- {
- bvector *ret = as<bvector> (obj);
- if (!ret)
- return (interp->raise ("type-error", "value is not a bvector"));
- return (ret);
- }
- static const bvargs BV_ARGS =
- {
- getidx_b,
- test_b,
- sizeof (bvector),
- typecode::BVECTOR,
- 0,
- &empty_bvector
- };
- result<object> alloc_bvector (interpreter *interp, uint32_t nbytes)
- {
- KP_VTRY (BV_ARGS.alloc (interp, nbytes));
- return (interp->alval);
- }
- result<object> get_b (interpreter *interp, object bvec,
- object ix, object dfl)
- {
- int idx;
- if (kp_unlikely (dfl != UNBOUND))
- return (interp->raise_nargs (2, 2, 3));
- else if (kp_unlikely (!as<int> (ix, idx)))
- return (interp->raise ("type-error", "index is not an integer"));
- const bvector *bp = as_bvector (bvec);
- idx = KP_TRY (getidx_b (interp, bp, idx, 0, 0));
- kp_return (fixint (bp->data[idx]));
- }
- result<object> subseq_b (interpreter *interp, object src,
- object ix1, object ix2)
- {
- if (ix2 == UNBOUND)
- ix2 = fixint (as_bvector(src)->nbytes);
- auto ret = KP_TRY (bytes_subseq (interp, as_bvector (src),
- ix1, ix2, BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> rpl_b (interpreter *interp, object src,
- object ix1, object ix2, object rpl)
- {
- auto ret = KP_TRY (bytes_rpl (interp, as_bvector (src),
- ix1, ix2, rpl, BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> erase_b (interpreter *interp, object src,
- object ix1, object ix2)
- {
- auto ret = KP_TRY (bytes_erase (interp, as_bvector (src),
- ix1, ix2, BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> insert_b (interpreter *interp, object src,
- object idx, object ins)
- {
- auto ret = KP_TRY (bytes_insert (interp, as_bvector (src),
- idx, ins, BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> nput_b (interpreter *interp,
- object bvec, object ix, object byte)
- {
- bvector *bp = as_bvector (bvec);
- int idx, val;
- if (kp_unlikely (!as<int> (ix, idx)))
- return (interp->raise ("type-error", "index must be an integer"));
- else if (kp_unlikely (!as<int> (byte, val) || (unsigned int)val > 0xff))
- return (interp->raise ("arg-error",
- "value must be an integer in range [0, 255]"));
- else if (kp_unlikely (bp->flagged_p (FLAGS_CONST)))
- return (interp->raise_const ());
- idx = KP_TRY (getidx_b (interp, bp, idx, 0, 0));
- bp->data[idx] = (unsigned char)val;
- kp_return (byte);
- }
- result<object> add_bb (interpreter *interp, object v1, object v2)
- {
- auto ret = KP_TRY (bytes_add (interp, as_bvector (v1),
- as_bvector (v2), BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> concat_b (interpreter *interp, object *argv, int argc)
- {
- auto ret = KP_TRY (bytes_concat (interp, argv, argc, BV_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> mul_ib (interpreter *interp, object ix, object bv)
- {
- auto ret = KP_TRY (bytes_mul (interp, as_bvector (bv),
- as_int (ix), BV_ARGS));
- kp_return (ret->as_obj ());
- }
- int cmp_bb (interpreter *interp, object b1, object b2)
- {
- const bvector *v1 = as_bvector (b1), *v2 = as_bvector (b2);
- int len = min (v1->nbytes, v2->nbytes);
- int ret = memcmp (v1->data, v2->data, len);
- return (ret == 0 ? v1->nbytes - v2->nbytes : ret);
- }
- bool eq_bb (interpreter *interp, object b1, object b2)
- {
- const bvector *v1 = as_bvector (b1), *v2 = as_bvector (b2);
- return (v1->nbytes == v2->nbytes &&
- memcmp (v1->data, v2->data, v2->nbytes) == 0);
- }
- uint32_t hashbuf (const void *bp, uint32_t len)
- {
- uint32_t ret = len;
- for (uint32_t ix = 0; ix < len; ++ix)
- {
- ret = (ret << 9) | (ret >> 23);
- ret += ((const unsigned char *)bp)[ix];
- }
- return (ret == 0 ? ~(uint32_t)0 : ret);
- }
- result<object> copy_b (interpreter *interp, object obj, bool)
- {
- const bvector *bp = as_bvector (obj);
- object ret = KP_TRY (alloc_bvector (interp, bp->nbytes));
- memcpy (as_bvector(ret)->data, bp->data, bp->nbytes);
- kp_return (ret);
- }
- result<object> iter_b (interpreter *interp, object obj,
- object token, bool adv)
- {
- if (token == UNBOUND)
- kp_return (as_bvector(obj)->nbytes == 0 ? NIL : fixint (0));
- else if (!adv)
- return (get_b (interp, obj, token, UNBOUND));
- else if (!fixint_p (token))
- return (interp->raise ("type-error", "token must be an int"));
- int ix = as_int (token) + 1;
- kp_return ((uint32_t)ix >= as_bvector(obj)->nbytes ? NIL : fixint (ix));
- }
- uint32_t hash_b (interpreter *, object bv)
- {
- const bvector *bp = as_bvector (bv);
- return (hashbuf (bp->data, bp->nbytes));
- }
- result<object> nreverse_b (interpreter *interp, object obj)
- {
- bvector *bp = as_bvector (obj);
- if (kp_unlikely (bp->flagged_p (FLAGS_CONST)))
- return (interp->raise_const ());
- else if (bp->nbytes == 0)
- kp_return (obj);
- for (uint32_t i = 0, j = bp->nbytes - 1; i < j; ++i, --j)
- swap (bp->data[i], bp->data[j]);
- kp_return (obj);
- }
- result<object> reverse_b (interpreter *interp, object obj)
- {
- const bvector *bp = as_bvector (obj);
- if (bp->nbytes == 0)
- kp_return (obj);
- object ret = KP_TRY (alloc_bvector (interp, bp->nbytes));
- for (uint32_t i = 0, j = bp->nbytes - 1; i < bp->nbytes; ++i, --j)
- as_bvector(ret)->data[i] = bp->data[j];
- kp_return (ret);
- }
- result<object> find_b (interpreter *interp, object obj,
- object key, object start, object end, object test)
- {
- const bvector *src = as_bvector (obj);
- int istart = 0, iend = src->nbytes;
-
- if (start != UNBOUND)
- istart = KP_TRY (getidx_b (interp, src, as_int (start), nullptr, 0));
- if (end != UNBOUND)
- iend = KP_TRY (getidx_b (interp, src, as_int (end), nullptr, 0));
- if (istart > iend)
- kp_return (NIL);
- else if ((istart | iend) < 0 || (uint32_t)iend > src->nbytes)
- return (interp->raise ("index-error", "indices out of bounds"));
- auto data = src->data + istart;
- uint32_t nbytes = (uint32_t)iend - (data - src->data);
- if (!fixint_p (key))
- {
- if (test != UNBOUND)
- return (interp->raise ("arg-error", "test function is unsupported"));
- const bvector *kb = KP_TRY (test_b (interp, key));
- auto pos = bytes_find (data, nbytes, kb->data, kb->nbytes);
- kp_return (pos ? fixint (pos - src->data) : NIL);
- }
- else if (test != UNBOUND)
- {
- KP_VTRY (interp->growstk (2));
- for (uint32_t i = 0; i < nbytes; ++i)
- {
- *interp->stkend++ = test;
- *interp->stkend++ = fixint (data[i]);
- *interp->stkend++ = key;
- KP_VTRY (call_n (interp, 2));
- if (interp->retval != NIL)
- kp_return (fixint ((data + i) - src->data));
- }
- kp_return (NIL);
- }
- else
- {
- int iv = as_int (key);
- if ((unsigned int)iv > 0xff)
- kp_return (NIL);
- auto pos = (const unsigned char *)memchr (data, iv, nbytes);
- kp_return (pos ? fixint (pos - src->data) : NIL);
- }
- }
- result<int64_t> write_b (interpreter *interp, stream *strm,
- object obj, io_info& info)
- {
- const bvector *bp = as_bvector (obj);
- if (info.flags & io_info::FLG_RAW)
- return (strm->write (interp, bp->data, bp->nbytes));
- int64_t ret = 0;
- ret += KP_TRY (strm->write (interp, "#\"", 2));
- for (uint32_t i = 0; i < bp->nbytes; ++i)
- {
- int ch = bp->data[i];
- if (ch >= 32 && ch <= 126) // isprint (ch)
- { ret += KP_TRY (strm->putb (interp, ch)); }
- else
- {
- int p1 = ch / 16;
- int p2 = ch % 16;
- char s[] = { '\\', 'x',
- (char)(p1 < 10 ? '0' + p1 : 'a' + p1 - 10),
- (char)(p2 < 10 ? '0' + p2 : 'a' + p2 - 10) };
- ret += KP_TRY (strm->write (interp, s, sizeof (s)));
- }
- }
- ret += KP_TRY (strm->putb (interp, '"'));
- return (ret);
- }
- result<int64_t> pack_b (interpreter *interp, stream *strm,
- object obj, pack_info&)
- {
- const bvector *bvp = as_bvector (obj);
- int64_t ret = 0;
- if (kp_likely (bvp->nbytes <= 0xff))
- { ret += KP_TRY (strm->putb (interp, (unsigned char)bvp->nbytes)); }
- else
- {
- unsigned char buf[sizeof (bvp->nbytes) + 1] = { 0 };
- memcpy (&buf[1], &bvp->nbytes, sizeof (bvp->nbytes));
- ret += KP_TRY (strm->write (interp, buf, sizeof (buf)));
- }
- ret += KP_TRY (strm->write (interp, bvp->data, bvp->nbytes));
- return (ret);
- }
- result<int64_t> pack_s (interpreter *interp, stream *strm,
- object obj, pack_info&)
- {
- const string *sp = as_str (obj);
- int64_t ret = 0;
- if (kp_likely (sp->nbytes <= 0xff))
- {
- unsigned char buf[] = { (unsigned char)sp->nbytes,
- (unsigned char)sp->len };
- ret += KP_TRY (strm->write (interp, buf, 2));
- }
- else
- {
- unsigned char buf[sizeof (sp->nbytes) + sizeof (sp->len) + 1] = { 0 };
- memcpy (&buf[1], &sp->nbytes, sizeof (sp->nbytes));
- memcpy (&buf[1 + sizeof (sp->nbytes)], &sp->len, sizeof (sp->len));
- ret += KP_TRY (strm->write (interp, buf, sizeof (buf)));
- }
- ret += KP_TRY (strm->write (interp, sp->data, sp->nbytes));
- return (ret);
- }
- result<object> unpack_b (interpreter *interp, stream *strm,
- pack_info& info, bool save)
- {
- uint32_t nb = KP_TRY (strm->getb (interp));
- if ((int)nb < 0)
- return (info.error ("invalid bvector length"));
- else if (nb == 0)
- {
- bool rv = KP_TRY (strm->sread (interp, &nb));
- if (!rv)
- return (info.error ("invalid bvector length"));
- }
- KP_VTRY (alloc_bvector (interp, nb + 1));
- bvector *bvp = as_bvector (interp->alval);
- bvp->nbytes = KP_TRY (strm->read (interp, bvp->data, nb));
- if (bvp->nbytes != nb)
- return (info.error ("invalid bvector bytes read"));
- else if (save)
- KP_VTRY (info.add_mapping (interp, *info.offset, bvp->as_obj ()));
- bvp->data[bvp->nbytes] = 0;
- kp_return (bvp->as_obj ());
- }
- result<object> unpack_s (interpreter *interp, stream *strm,
- pack_info& info, bool save)
- {
- uint32_t len, nb = KP_TRY (strm->getb (interp));
- if ((int)nb < 0)
- return (info.error ("invalid string length"));
- else if (nb == 0)
- {
- bool rv = KP_TRY (strm->sread (interp, &nb));
- rv = rv && KP_TRY (strm->sread (interp, &len));
- if (!rv)
- return (info.error ("invalid string length"));
- }
- else
- {
- len = KP_TRY (strm->getb (interp));
- if ((int)len < 0)
- return (info.error ("invalid string length"));
- }
- KP_VTRY (alloc_str (interp, nb));
- string *sp = as_str (interp->alval);
- sp->nbytes = KP_TRY (strm->read (interp, sp->data, nb));
- if (sp->nbytes != nb)
- return (info.error ("invalid string characters read"));
- else if (save)
- KP_VTRY (info.add_mapping (interp, *info.offset, sp->as_obj ()));
- sp->len = len;
- sp->data[sp->nbytes] = '\0';
- kp_return (sp->as_obj ());
- }
- // String implementation.
- static result<int>
- getidx_s (interpreter *interp, const bvector *bp,
- int idx, int *p, uint32_t off)
- {
- const string *sp = (const string *)bp;
- if ((idx < 0 && (idx += sp->len) < 0) || (uint32_t)idx >= sp->len + off)
- return (interp->raise_oob (idx, sp->len));
- return (stridx (sp, *p = idx));
- }
- static result<bvector*>
- test_s (interpreter *interp, object obj)
- {
- string *ret = as<string> (obj);
- if (!ret)
- return (interp->raise ("type-error", "argument is not a string"));
- return (ret);
- }
- static const bvargs STR_ARGS =
- {
- getidx_s,
- test_s,
- sizeof (string),
- typecode::STR,
- FLAGS_CONST,
- &empty_string
- };
- result<object> alloc_str (interpreter *interp, uint32_t nbytes)
- {
- string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, nbytes));
- if (nbytes)
- ret->len = ret->hval = 0;
- return (interp->alval);
- }
- result<object> subseq_s (interpreter *interp, object src,
- object ix1, object ix2)
- {
- if (ix2 == UNBOUND)
- ix2 = fixint (len_s (src));
- auto ret = KP_TRY (bytes_subseq (interp, as_bvector (src),
- ix1, ix2, STR_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> add_ss (interpreter *interp, object s1, object s2)
- {
- auto ret = KP_TRY (bytes_add (interp, as_bvector (s1),
- as_bvector (s2), STR_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> concat_s (interpreter *interp, object *argv, int argc)
- {
- auto ret = KP_TRY (bytes_concat (interp, argv, argc, STR_ARGS));
- kp_return (ret->as_obj ());
- }
- static inline result<object>
- xadd_sc_cs (interpreter *interp, object s,
- object c, bool str_first_p)
- {
- unsigned char buf[6];
- uint32_t len = u32tou8 (buf, as_char (c));
- const string *src = as_str (s);
- string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, src->nbytes + len));
- if (str_first_p)
- fscpy ((char *)memcpy (ret->data, src->data, src->nbytes) +
- src->nbytes, buf, len);
- else
- memcpy (fscpy (ret->data, buf, len), src->data, src->nbytes);
- ret->len = src->len + 1;
- kp_return (interp->alval);
- }
- result<object> add_sc (interpreter *interp, object s, object c)
- {
- return (xadd_sc_cs (interp, s, c, true));
- }
- result<object> add_cs (interpreter *interp, object c, object s)
- {
- return (xadd_sc_cs (interp, s, c, false));
- }
- result<object> add_cc (interpreter *interp, object c1, object c2)
- {
- unsigned char b1[6], b2[6];
- uint32_t n1 = u32tou8 (b1, as_char (c1)), n2 = u32tou8 (b2, as_char (c2));
- string *ret = (string *)KP_TRY (STR_ARGS.alloc (interp, n1 + n2));
- fscpy (fscpy (ret->data, b1, n1), b2, n2);
- ret->len = 2;
- kp_return (interp->alval);
- }
- result<object> mul_ic (interpreter *interp, object ix, object ch)
- {
- unsigned char buf[6];
- bvector bv;
- bv.nbytes = u32tou8 (bv.data = buf, as_char (ch));
- auto ret = KP_TRY (bytes_mul (interp, &bv, as_int (ix), STR_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> mul_is (interpreter *interp, object ix, object s)
- {
- auto ret = KP_TRY (bytes_mul (interp, as_bvector (s), as_int (ix), STR_ARGS));
- kp_return (ret->as_obj ());
- }
- result<object> get_s (interpreter *interp, object s, object ix, object dfl)
- {
- int idx;
- if (kp_unlikely (dfl != UNBOUND))
- return (interp->raise_nargs (2, 2, 3));
- else if (kp_unlikely (!as<int> (ix, idx)))
- return (interp->raise ("type-error", "index is not an integer"));
-
- const string *sp = as_str (s);
- if (idx < 0)
- idx = sp->len + idx;
-
- if (kp_unlikely (idx < 0 || (uint32_t)idx >= sp->len))
- return (interp->raise_oob (idx, sp->len));
- const unsigned char *ptr = sp->data + stridx (sp, idx);
- kp_return (charobj (u8tou32 (ptr, UTF8_SKIP[*ptr])));
- }
- uint32_t hash_s (interpreter *, object s)
- {
- string *sp = as_str (s);
- if (sp->hval == 0)
- sp->hval = hashbuf (sp->data, sp->nbytes);
- return (sp->hval);
- }
- result<object> last_b (interpreter *interp, object obj)
- {
- const bvector *bvp = as_bvector (obj);
- if (!bvp->nbytes)
- return (interp->raise_oob (0, 0));
- kp_return (fixint (bvp->data[bvp->nbytes - 1]));
- }
- // Stream interface.
- struct bvstream_info
- {
- unsigned char *datap;
- uint32_t curpos;
- uint32_t nmax;
- uint32_t nbytes;
- };
- static result<int64_t>
- bv_read (interpreter *, stream& strm, void *dstp, uint64_t bytes)
- {
- auto dp = (bvstream_info *)strm.cookie;
- uint32_t rb = (uint32_t)(dp->nbytes - dp->curpos);
- rb = min ((uint64_t)rb, bytes);
- memcpy (dstp, dp->datap + dp->curpos, rb);
- dp->curpos += rb;
- return ((int64_t)rb);
- }
- static result<int64_t>
- bv_write (interpreter *interp, stream& strm, const void *src, uint64_t bytes)
- {
- auto dp = (bvstream_info *)strm.cookie;
- if (dp->curpos + bytes >= dp->nmax)
- {
- uint32_t nsz = upsize (dp->curpos + bytes + 1);
- dp->datap = (unsigned char *)xrealloc (dp->datap, dp->nmax = nsz);
- }
- memcpy (dp->datap + dp->curpos, src, bytes);
- if ((dp->curpos += bytes) > dp->nbytes)
- dp->nbytes = dp->curpos;
- return ((int64_t)bytes);
- }
- static result<bool>
- bv_seek (interpreter *interp, stream& strm, spos& pos, int whence)
- {
- auto dp = (bvstream_info *)strm.cookie;
- int64_t roff = pos.offset +
- (whence == SEEK_SET ? 0 : whence == SEEK_CUR ?
- dp->curpos : dp->nbytes);
- if (roff < 0)
- return (false);
- else if (roff > dp->nbytes)
- {
- if (!(strm.io_flags & STRM_WRITE) || roff > UINT32_MAX)
- return (false);
- else if (roff > dp->nmax)
- dp->datap = (unsigned char *)xrealloc (dp->datap,
- dp->nmax = upsize (roff + 1));
- memset (dp->datap + dp->nbytes, 0, roff - dp->nbytes);
- }
- if ((dp->curpos = (uint32_t)roff) > dp->nbytes)
- dp->nbytes = dp->curpos;
- pos.offset = roff;
- return (true);
- }
- static bool
- bv_close (interpreter *, stream& strm)
- {
- auto dp = (bvstream_info *)strm.cookie;
- xfree (dp->datap);
- xfree (dp);
- return (true);
- }
- static const stream::xops bv_ops =
- {
- bv_read,
- bv_write,
- bv_seek,
- bv_close
- };
- result<stream*> bvstream (interpreter *interp, object bv, int mode)
- {
- if (!(mode & STRM_RDWR))
- return (nullptr);
- auto dp = (bvstream_info *)xmalloc (sizeof (bvstream_info));
- dp->nbytes = upsize (as_bvector(bv)->nbytes + 1);
- dp->curpos = 0;
- dp->datap = (unsigned char *)xmalloc (dp->nbytes);
- memcpy (dp->datap, &as_bvector(bv)->data, as_bvector(bv)->nbytes);
- auto strm = stream::make (interp, mode, STRM_BUFSIZ, &bv_ops, dp);
- if (strm.error_p ())
- {
- xfree (dp->datap);
- xfree (dp);
- return (exception ());
- }
- return (deref (strm));
- }
- result<object> bvstream_get (interpreter *interp, stream *strm)
- {
- if (strm->io_flags & STRM_CLOSED)
- return (interp->raise ("arg-error", "stream has been closed"));
- bool rv = KP_TRY (strm->flush (interp));
- if (!rv)
- return (interp->raise ("io-error", "failed to flush stream"));
- auto dp = (bvstream_info *)strm->cookie;
- object ret = KP_TRY (alloc_bvector (interp, dp->nbytes));
- memcpy (as_bvector(ret)->data, dp->datap, dp->nbytes);
- return (interp->retval);
- }
- unsigned char* bvstream_data (stream *strm, uint32_t& size)
- {
- auto dp = (bvstream_info *)strm->cookie;
- size = dp->nbytes;
- return (dp->datap);
- }
- static int
- do_init_bvector (interpreter *)
- {
- static const unsigned char empty_data[] = { 0 };
- empty_bvector.vo_full = FLAGS_CONST;
- empty_bvector.vo_type = typecode::BVECTOR;
- empty_string.vo_full = FLAGS_CONST;
- empty_string.vo_type = typecode::STR;
- empty_string.hval = 1;
- empty_bvector.data = empty_string.data = (unsigned char *)empty_data;
- return (init_op::result_ok);
- }
- init_op init_bvector (do_init_bvector, "bvector");
- KP_DECLS_END
|