mapcdb.ml 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. (*
  2. * _ _ ____ _
  3. * _| || |_/ ___| ___ _ __ _ __ ___ | |
  4. * |_ .. _\___ \ / _ \ '_ \| '_ \ / _ \| |
  5. * |_ _|___) | __/ |_) | |_) | (_) |_|
  6. * |_||_| |____/ \___| .__/| .__/ \___/(_)
  7. * |_| |_|
  8. *
  9. * Personal Social Web.
  10. *
  11. * Copyright (C) The #Seppo contributors. All rights reserved.
  12. *
  13. * This program is free software: you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation, either version 3 of the License, or
  16. * (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. *)
  26. (** A Map.Make[1] inspired cdb.
  27. **
  28. ** http://cr.yp.to/cdb.html
  29. **
  30. ** [1] https://v2.ocaml.org/api/Map.Make.html
  31. **)
  32. type cdb = Cdb of string
  33. module P = struct
  34. let take2uint ic buf : (Optint.t * Optint.t) =
  35. let uint pos =
  36. Bytes.get_int32_le buf pos
  37. |> Optint.of_unsigned_int32
  38. in
  39. really_input ic buf 0 8;
  40. (uint 0, uint 4)
  41. let take2uint_seek ic buf pos =
  42. let pos = pos |> Optint.to_int in
  43. seek_in ic pos;
  44. take2uint ic buf
  45. let slurp ic n =
  46. let buf = n |> Bytes.create in
  47. n |> really_input ic buf 0;
  48. buf
  49. let put2uint oc buf (a, b) =
  50. let uint pos v = v |> Optint.to_unsigned_int32
  51. |> Bytes.set_int32_le buf pos in
  52. uint 0 a;
  53. uint 4 b;
  54. output_bytes oc buf
  55. module PosSetItem = struct
  56. type t = Optint.t * Optint.t
  57. let compare (h0, p0) (h1, p1) =
  58. match Optint.compare h0 h1 with
  59. | 0 -> Optint.compare p0 p1
  60. | r -> r
  61. end
  62. module PosSet = Set.Make (PosSetItem)
  63. end
  64. let _32_0xFFffFFff = 0xFFffFFffL |> Optint.of_int64
  65. let _32_5381 = 5381 |> Optint.of_int
  66. (* http://cr.yp.to/cdb/cdb.txt *)
  67. let hash32_gen len get : Optint.t =
  68. let ( +. ) = Optint.add
  69. and ( << ) = Optint.shift_left
  70. and ( ^ ) = Optint.logxor
  71. and ( land ) = Optint.logand in
  72. let rec fkt (idx : int) (h : Optint.t) =
  73. if idx = len
  74. then h
  75. else
  76. let c = idx |> get |> Char.code |> Optint.of_int in
  77. (((h << 5) +. h) ^ c) land _32_0xFFffFFff
  78. |> fkt (idx + 1)
  79. in
  80. fkt 0 _32_5381
  81. let hash32_byt dat : Optint.t =
  82. hash32_gen (Bytes.length dat) (Bytes.get dat)
  83. let hash32_str dat : Optint.t =
  84. hash32_gen (String.length dat) (String.get dat)
  85. let add_many
  86. (keep : bytes * bytes -> bool)
  87. (fkt_add_n : (bytes * bytes -> unit) -> unit)
  88. (Cdb fn) =
  89. (* Logr.debug (fun m -> m "%s.%s %s" "Mapcdb" "add_many" fn); *)
  90. (* Logr.debug (fun m -> m "Mapcdb.add_seq ... %s" fn); *)
  91. let fn' = fn ^ "~" in
  92. (try
  93. let oc = open_out_gen
  94. [ Open_binary; Open_creat; Open_excl; Open_wronly ]
  95. 0o644 fn' in
  96. let cdc = Ds_cdb.cdb_creator_of_out_channel oc
  97. and buf = Bytes.create 8 in
  98. let _ =
  99. let ic = open_in_gen [ Open_binary; Open_rdonly ] 0 fn in
  100. try
  101. let hoff, _hsiz = P.take2uint ic buf in
  102. let hoff = hoff |> Optint.to_int in
  103. let rec next_rec recpos =
  104. match hoff - recpos with
  105. | 0 -> hoff
  106. | _ ->
  107. let klen, dlen = P.take2uint ic buf in
  108. let klen, dlen = klen |> Optint.to_int, dlen |> Optint.to_int in
  109. let key = P.slurp ic klen
  110. and dat = P.slurp ic dlen in
  111. if keep (key,dat)
  112. then Ds_cdb.add cdc key dat;
  113. next_rec (recpos + 8 + klen + dlen)
  114. in
  115. seek_in ic 2048;
  116. let ret = next_rec 2048 in
  117. close_in ic;
  118. ret
  119. with
  120. End_of_file -> 2048
  121. in
  122. let adder (k,v) = Ds_cdb.add cdc k v in
  123. fkt_add_n adder;
  124. Ds_cdb.close_cdb_out cdc;
  125. Unix.rename fn' fn;
  126. with | e ->
  127. Logr.err (fun m -> m "%s %s.%s %s" E.e1020 "Mapcdb" "add_many" fn);
  128. Unix.unlink fn';
  129. raise e
  130. );
  131. Cdb fn
  132. let add_seq keep seq cdb =
  133. let fkt_add_n add1 = Seq.iter add1 seq in
  134. add_many keep fkt_add_n cdb
  135. let add k v cdb =
  136. let all _ = true
  137. and fkt_add_n add1 = add1 (k,v) in
  138. add_many all fkt_add_n cdb
  139. let add_ k v cdb =
  140. add_seq
  141. (fun _ -> true)
  142. (Seq.return (k, v))
  143. cdb
  144. let add_string k v cdb =
  145. add
  146. (k |> Bytes.unsafe_of_string)
  147. (v |> Bytes.unsafe_of_string)
  148. cdb
  149. let update k v cdb =
  150. add_seq
  151. (fun (k',_) -> not (Bytes.equal k k'))
  152. (Seq.return (k, v))
  153. cdb
  154. let update_string k v cdb =
  155. update
  156. (k |> Bytes.unsafe_of_string)
  157. (v |> Bytes.unsafe_of_string)
  158. cdb
  159. let remove k cdb =
  160. add_seq
  161. (fun (k',_) -> not (Bytes.equal k k'))
  162. Seq.empty
  163. cdb
  164. let remove_string k cdb =
  165. remove
  166. (k |> Bytes.unsafe_of_string)
  167. cdb
  168. (* http://cr.yp.to/cdb/cdb.txt *)
  169. let my_find_opt key (Cdb fn) =
  170. let ic = open_in_gen [ Open_binary; Open_rdonly ] 0 fn in
  171. let hash = hash32_byt key in
  172. let buf = Bytes.create 8 in
  173. (* Each hash table slot states a hash value and a byte position. If the byte position is 0, the slot is empty. Otherwise, the slot points to a record whose key has that hash value. *)
  174. let rec_pos p0 =
  175. let hash',ptr = p0 |> P.take2uint_seek ic buf in
  176. if ptr |> Optint.equal Optint.zero
  177. then None
  178. else if hash != hash'
  179. then None
  180. else Some ptr
  181. in
  182. let ret =
  183. let _0x100 = 8 |> Optint.shift_left Optint.one
  184. and _0x8 = 3 |> Optint.shift_left Optint.one
  185. and _0x1 = Optint.one
  186. and _0x0 = Optint.zero
  187. and ( *. ) = Optint.mul
  188. and ( +. ) = Optint.add
  189. and ( /. ) = Optint.div
  190. and ( mod ) = Optint.rem in
  191. let hpos,hslots =
  192. try P.take2uint_seek ic buf (hash mod _0x100 *. _0x8)
  193. with End_of_file -> (_0x0, _0x0)
  194. in
  195. if hslots |> Optint.equal Optint.zero
  196. then None
  197. else
  198. (* Probe that slot, the next higher slot, and so on, until you find the record or run into an empty slot. *)
  199. let rec probe_slot slot =
  200. match slot < hslots with
  201. | false -> None
  202. | true -> (
  203. match rec_pos (hpos +. (slot *. _0x8)) with
  204. | Some posr -> (
  205. (* Records are stored sequentially, without special alignment. A record states a key length, a data length, the key, and the data. *)
  206. let klen, dlen = P.take2uint_seek ic buf posr in
  207. let dlen, klen = dlen |> Optint.to_int, klen |> Optint.to_int in
  208. (* TODO reduce the 2 allocations to 1 and reuse the key buffer to
  209. return the data *)
  210. match Bytes.equal key (P.slurp ic klen) with
  211. | true -> Some (P.slurp ic dlen)
  212. | false -> probe_slot (slot +. _0x1))
  213. | None -> probe_slot (slot +. _0x1))
  214. in
  215. probe_slot (hash /. _0x100 mod hslots)
  216. in
  217. close_in ic;
  218. ret
  219. let ds_find_opt key fn =
  220. try
  221. let cdb = Ds_cdb.open_cdb_in fn in
  222. let ret = Ds_cdb.find_first cdb key in
  223. Ds_cdb.close_cdb_in cdb;
  224. ret
  225. with
  226. | End_of_file
  227. | _ -> None
  228. (* | Sys_error e ->
  229. Logr.err (fun m -> m "%s.%s %s %s" E.e0001 "Mapcdb" "ds_find_opt" (key |> Bytes.to_string) e);
  230. None
  231. *)
  232. let find_opt key (Cdb fn) =
  233. ds_find_opt key fn
  234. let find_string_opt (key : string) (fn : cdb) : string option =
  235. (* Logr.debug (fun m ->
  236. let Cdb fn' = fn in
  237. m "%s.%s %s %s" "Mapcdb" "find_string_opt" key fn'); *)
  238. match find_opt (key |> Bytes.unsafe_of_string) fn with
  239. | Some v -> Some (v |> Bytes.unsafe_to_string)
  240. | None -> None
  241. let fold_left f init (Cdb fn) =
  242. let init = ref init in
  243. let ifu kv =
  244. init := f !init kv;
  245. true
  246. in
  247. (try Ds_cdb.iter ifu fn
  248. with End_of_file -> ());
  249. !init