123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- (*
- * _ _ ____ _
- * _| || |_/ ___| ___ _ __ _ __ ___ | |
- * |_ .. _\___ \ / _ \ '_ \| '_ \ / _ \| |
- * |_ _|___) | __/ |_) | |_) | (_) |_|
- * |_||_| |____/ \___| .__/| .__/ \___/(_)
- * |_| |_|
- *
- * Personal Social Web.
- *
- * Copyright (C) The #Seppo contributors. All rights reserved.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *)
- (** A Map.Make[1] inspired cdb.
- **
- ** http://cr.yp.to/cdb.html
- **
- ** [1] https://v2.ocaml.org/api/Map.Make.html
- **)
- type cdb = Cdb of string
- module P = struct
- let take2uint ic buf : (Optint.t * Optint.t) =
- let uint pos =
- Bytes.get_int32_le buf pos
- |> Optint.of_unsigned_int32
- in
- really_input ic buf 0 8;
- (uint 0, uint 4)
- let take2uint_seek ic buf pos =
- let pos = pos |> Optint.to_int in
- seek_in ic pos;
- take2uint ic buf
- let slurp ic n =
- let buf = n |> Bytes.create in
- n |> really_input ic buf 0;
- buf
- let put2uint oc buf (a, b) =
- let uint pos v = v |> Optint.to_unsigned_int32
- |> Bytes.set_int32_le buf pos in
- uint 0 a;
- uint 4 b;
- output_bytes oc buf
- module PosSetItem = struct
- type t = Optint.t * Optint.t
- let compare (h0, p0) (h1, p1) =
- match Optint.compare h0 h1 with
- | 0 -> Optint.compare p0 p1
- | r -> r
- end
- module PosSet = Set.Make (PosSetItem)
- end
- let _32_0xFFffFFff = 0xFFffFFffL |> Optint.of_int64
- let _32_5381 = 5381 |> Optint.of_int
- (* http://cr.yp.to/cdb/cdb.txt *)
- let hash32_gen len get : Optint.t =
- let ( +. ) = Optint.add
- and ( << ) = Optint.shift_left
- and ( ^ ) = Optint.logxor
- and ( land ) = Optint.logand in
- let rec fkt (idx : int) (h : Optint.t) =
- if idx = len
- then h
- else
- let c = idx |> get |> Char.code |> Optint.of_int in
- (((h << 5) +. h) ^ c) land _32_0xFFffFFff
- |> fkt (idx + 1)
- in
- fkt 0 _32_5381
- let hash32_byt dat : Optint.t =
- hash32_gen (Bytes.length dat) (Bytes.get dat)
- let hash32_str dat : Optint.t =
- hash32_gen (String.length dat) (String.get dat)
- let add_many
- (keep : bytes * bytes -> bool)
- (fkt_add_n : (bytes * bytes -> unit) -> unit)
- (Cdb fn) =
- (* Logr.debug (fun m -> m "%s.%s %s" "Mapcdb" "add_many" fn); *)
- (* Logr.debug (fun m -> m "Mapcdb.add_seq ... %s" fn); *)
- let fn' = fn ^ "~" in
- (try
- let oc = open_out_gen
- [ Open_binary; Open_creat; Open_excl; Open_wronly ]
- 0o644 fn' in
- let cdc = Ds_cdb.cdb_creator_of_out_channel oc
- and buf = Bytes.create 8 in
- let _ =
- let ic = open_in_gen [ Open_binary; Open_rdonly ] 0 fn in
- try
- let hoff, _hsiz = P.take2uint ic buf in
- let hoff = hoff |> Optint.to_int in
- let rec next_rec recpos =
- match hoff - recpos with
- | 0 -> hoff
- | _ ->
- let klen, dlen = P.take2uint ic buf in
- let klen, dlen = klen |> Optint.to_int, dlen |> Optint.to_int in
- let key = P.slurp ic klen
- and dat = P.slurp ic dlen in
- if keep (key,dat)
- then Ds_cdb.add cdc key dat;
- next_rec (recpos + 8 + klen + dlen)
- in
- seek_in ic 2048;
- let ret = next_rec 2048 in
- close_in ic;
- ret
- with
- End_of_file -> 2048
- in
- let adder (k,v) = Ds_cdb.add cdc k v in
- fkt_add_n adder;
- Ds_cdb.close_cdb_out cdc;
- Unix.rename fn' fn;
- with | e ->
- Logr.err (fun m -> m "%s %s.%s %s" E.e1020 "Mapcdb" "add_many" fn);
- Unix.unlink fn';
- raise e
- );
- Cdb fn
- let add_seq keep seq cdb =
- let fkt_add_n add1 = Seq.iter add1 seq in
- add_many keep fkt_add_n cdb
- let add k v cdb =
- let all _ = true
- and fkt_add_n add1 = add1 (k,v) in
- add_many all fkt_add_n cdb
- let add_ k v cdb =
- add_seq
- (fun _ -> true)
- (Seq.return (k, v))
- cdb
- let add_string k v cdb =
- add
- (k |> Bytes.unsafe_of_string)
- (v |> Bytes.unsafe_of_string)
- cdb
- let update k v cdb =
- add_seq
- (fun (k',_) -> not (Bytes.equal k k'))
- (Seq.return (k, v))
- cdb
- let update_string k v cdb =
- update
- (k |> Bytes.unsafe_of_string)
- (v |> Bytes.unsafe_of_string)
- cdb
- let remove k cdb =
- add_seq
- (fun (k',_) -> not (Bytes.equal k k'))
- Seq.empty
- cdb
- let remove_string k cdb =
- remove
- (k |> Bytes.unsafe_of_string)
- cdb
- (* http://cr.yp.to/cdb/cdb.txt *)
- let my_find_opt key (Cdb fn) =
- let ic = open_in_gen [ Open_binary; Open_rdonly ] 0 fn in
- let hash = hash32_byt key in
- let buf = Bytes.create 8 in
- (* Each hash table slot states a hash value and a byte position. If the byte position is 0, the slot is empty. Otherwise, the slot points to a record whose key has that hash value. *)
- let rec_pos p0 =
- let hash',ptr = p0 |> P.take2uint_seek ic buf in
- if ptr |> Optint.equal Optint.zero
- then None
- else if hash != hash'
- then None
- else Some ptr
- in
- let ret =
- let _0x100 = 8 |> Optint.shift_left Optint.one
- and _0x8 = 3 |> Optint.shift_left Optint.one
- and _0x1 = Optint.one
- and _0x0 = Optint.zero
- and ( *. ) = Optint.mul
- and ( +. ) = Optint.add
- and ( /. ) = Optint.div
- and ( mod ) = Optint.rem in
- let hpos,hslots =
- try P.take2uint_seek ic buf (hash mod _0x100 *. _0x8)
- with End_of_file -> (_0x0, _0x0)
- in
- if hslots |> Optint.equal Optint.zero
- then None
- else
- (* Probe that slot, the next higher slot, and so on, until you find the record or run into an empty slot. *)
- let rec probe_slot slot =
- match slot < hslots with
- | false -> None
- | true -> (
- match rec_pos (hpos +. (slot *. _0x8)) with
- | Some posr -> (
- (* Records are stored sequentially, without special alignment. A record states a key length, a data length, the key, and the data. *)
- let klen, dlen = P.take2uint_seek ic buf posr in
- let dlen, klen = dlen |> Optint.to_int, klen |> Optint.to_int in
- (* TODO reduce the 2 allocations to 1 and reuse the key buffer to
- return the data *)
- match Bytes.equal key (P.slurp ic klen) with
- | true -> Some (P.slurp ic dlen)
- | false -> probe_slot (slot +. _0x1))
- | None -> probe_slot (slot +. _0x1))
- in
- probe_slot (hash /. _0x100 mod hslots)
- in
- close_in ic;
- ret
- let ds_find_opt key fn =
- try
- let cdb = Ds_cdb.open_cdb_in fn in
- let ret = Ds_cdb.find_first cdb key in
- Ds_cdb.close_cdb_in cdb;
- ret
- with
- | End_of_file
- | _ -> None
- (* | Sys_error e ->
- Logr.err (fun m -> m "%s.%s %s %s" E.e0001 "Mapcdb" "ds_find_opt" (key |> Bytes.to_string) e);
- None
- *)
- let find_opt key (Cdb fn) =
- ds_find_opt key fn
- let find_string_opt (key : string) (fn : cdb) : string option =
- (* Logr.debug (fun m ->
- let Cdb fn' = fn in
- m "%s.%s %s %s" "Mapcdb" "find_string_opt" key fn'); *)
- match find_opt (key |> Bytes.unsafe_of_string) fn with
- | Some v -> Some (v |> Bytes.unsafe_to_string)
- | None -> None
- let fold_left f init (Cdb fn) =
- let init = ref init in
- let ifu kv =
- init := f !init kv;
- true
- in
- (try Ds_cdb.iter ifu fn
- with End_of_file -> ());
- !init
|