t_mcdb.ml 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. (*
  2. * _ _ ____ _
  3. * _| || |_/ ___| ___ _ __ _ __ ___ | |
  4. * |_ .. _\___ \ / _ \ '_ \| '_ \ / _ \| |
  5. * |_ _|___) | __/ |_) | |_) | (_) |_|
  6. * |_||_| |____/ \___| .__/| .__/ \___/(_)
  7. * |_| |_|
  8. *
  9. * Personal Social Web.
  10. *
  11. * Copyright (C) The #Seppo contributors. All rights reserved.
  12. *
  13. * This program is free software: you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation, either version 3 of the License, or
  16. * (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. *)
  26. open Alcotest
  27. open Mcdb__
  28. let set_up () =
  29. Unix.chdir "../../../test/"
  30. let tc_hash () =
  31. "s" |> Bytes.of_string |> Ds_cdb.hash
  32. |> check int32 __LOC__ 177622l;
  33. "s" |> Bytes.of_string |> Mcdb.hash32_byt |> Optint.to_int |> check int __LOC__ 177622;
  34. "a" |> Bytes.of_string |> Mcdb.hash32_byt |> Optint.to_int |> check int __LOC__ 0x2b5c4 ;
  35. let k0 =
  36. "http://www.traunsteiner-tagblatt.de/region+lokal/landkreis-traunstein/traunstein/pressemitteilungen-der-stadt-traunstein_artikel,-Traunstein-20-%E2%80%93-Neue-Medien-im-Mittelpunkt-_arid,198374.html"
  37. in
  38. k0 |> Bytes.of_string |> Mcdb.hash32_byt |> Optint.to_int64 |> check int64 __LOC__ 0xc7410a37L ;
  39. assert true
  40. let tc_hash_foldr () =
  41. "s"
  42. |> Astring.String.fold_left Mcdb.H.foldr Mcdb.H._32_5381
  43. |> Optint.to_int |> check int __LOC__ 177622;
  44. "s"
  45. |> String.to_bytes |> Bytes.fold_left Mcdb.H.foldr Mcdb.H._32_5381
  46. |> Optint.to_int |> check int __LOC__ 177622;
  47. "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda"
  48. |> Astring.String.fold_left Mcdb.H.foldr Mcdb.H._32_5381
  49. |> Optint.to_string
  50. |> check string __LOC__ "1316135747"
  51. let tc_find_string_opt () =
  52. (* Logr.info (fun m -> m "mapcdb_test.test_find_string_opt"); *)
  53. Mcdb.find_string_opt "s" (Cdb "mini.cdb")
  54. |> Option.get
  55. |> check string __LOC__ "ß";
  56. assert (None = Mcdb.find_string_opt "zzz" (Cdb "mini.cdb"));
  57. (*
  58. find_string_opt
  59. "http://www.traunsteiner-tagblatt.de/region+lokal/landkreis-traunstein/traunstein/pressemitteilungen-der-stadt-traunstein_artikel,-Traunstein-20-%E2%80%93-Neue-Medien-im-Mittelpunkt-_arid,198374.html"
  60. (Cdb "big.cdb")
  61. |> Option.get |> String.length
  62. |> check int __LOC__ 1000;
  63. *)
  64. assert true
  65. let pDir = 0o755
  66. let mk_db fn =
  67. (try
  68. Unix.mkdir "tmp" pDir;
  69. with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
  70. (try
  71. Unix.unlink fn;
  72. Unix.unlink (fn ^ "~")
  73. with Unix.Unix_error (Unix.ENOENT, _, _) -> ());
  74. Mcdb.Cdb fn
  75. let _tc_update () =
  76. (* Logr.info (fun m -> m "mapcdb_test.test_update"); *)
  77. let fn = mk_db "tmp/add.cdb" in
  78. let _ = Mcdb.update_string "a" "1" fn in
  79. Mcdb.find_string_opt "a" fn |> Option.get |> check string __LOC__ "1";
  80. let _ = Mcdb.update_string "a" "2" fn in
  81. Mcdb.find_string_opt "a" fn |> Option.get |> check string __LOC__ "2";
  82. let _ = Mcdb.remove_string "a" fn in
  83. assert (Mcdb.find_string_opt "a" fn |> Option.is_none);
  84. assert true
  85. let tc_fold () =
  86. (* Logr.info (fun m -> m "%s.%s" "mapcdb_test" "fold_left"); *)
  87. let cdb = Mcdb.Cdb "data/mini.cdb" in
  88. Mcdb.fold_left (fun init (k,v) ->
  89. let k = k |> Bytes.to_string
  90. and v = v |> Bytes.to_string in
  91. (* Logr.debug (fun m -> m " %s->%s" k v); *)
  92. (k,v) :: init) [] cdb
  93. |> List.length |> check int __LOC__ 3
  94. let tc_fold_1 () =
  95. Mcdb.Cdb "data/2024-04-30-131146-subscribed.cdb"
  96. |> Mcdb.fold_left (fun c _ -> 1 + c) 0
  97. |> check int __LOC__ 52
  98. let tc_hash32_str_base64 () =
  99. (* Logr.info (fun m -> m "%s.%s" "Mcdb_test" "test_hash32_str_base64"); *)
  100. Optint.max_int
  101. |> Optint.to_int64
  102. |> Printf.sprintf "0x%Lx"
  103. |> check string __LOC__ "0x7fffffff";
  104. Mcdb.H._32_0xFFffFFff
  105. |> Optint.to_int64
  106. |> Printf.sprintf "0x%Lx"
  107. |> check string __LOC__ "0xffffffff";
  108. let h = "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda"
  109. |> Uri.of_string |> Uri.to_string
  110. |> Mcdb.hash32_str in
  111. h |> Optint.to_string
  112. |> check string __LOC__ "1316135747";
  113. Optint.encoded_size |> check int __LOC__ 4;
  114. let b = Bytes.make Optint.encoded_size (Char.chr 0) in
  115. h |> Optint.encode b ~off:0;
  116. b |> Bytes.to_string
  117. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  118. |> check string __LOC__ "TnKjQw";
  119. assert true
  120. let tc_hash63_str_base64 () =
  121. (* Logr.info (fun m -> m "%s.%s" "Mcdb_test" "test_hash63_str_base64"); *)
  122. Optint.Int63.encoded_size |> check int __LOC__ 8;
  123. let _mask_63 = Optint.Int63.max_int
  124. and _5381_63 = 5381 |> Optint.Int63.of_int in
  125. (* http://cr.yp.to/cdb/cdb.txt *)
  126. let hash63_gen len f_get : Optint.Int63.t =
  127. let ( +. ) = Optint.Int63.add
  128. and ( << ) = Optint.Int63.shift_left
  129. and ( ^ ) = Optint.Int63.logxor
  130. and ( land ) = Optint.Int63.logand in
  131. let rec fkt (idx : int) (h : Optint.Int63.t) =
  132. if idx = len
  133. then h
  134. else
  135. let c = idx |> f_get |> Char.code |> Optint.Int63.of_int in
  136. (((h << 5) +. h) ^ c) land _mask_63
  137. |> fkt (succ idx)
  138. in
  139. fkt 0 _5381_63
  140. in
  141. let hash63_str dat : Optint.Int63.t =
  142. hash63_gen (String.length dat) (String.get dat)
  143. in
  144. let uhash ?(off = 0) ?(buf = Bytes.make (Optint.Int63.encoded_size) (Char.chr 0)) u =
  145. u
  146. |> Uri.to_string
  147. |> hash63_str
  148. |> Optint.Int63.encode buf ~off;
  149. buf
  150. |> Bytes.to_string
  151. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  152. in
  153. _mask_63
  154. |> Optint.Int63.to_int64
  155. |> Printf.sprintf "0x%Lx"
  156. |> check string __LOC__ "0x3fffffffffffffff";
  157. let h = "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda"
  158. |> Uri.of_string |> Uri.to_string
  159. |> hash63_str in
  160. h |> Optint.Int63.to_string
  161. |> check string __LOC__ "4387560302522311491";
  162. let b = Bytes.make Optint.Int63.encoded_size (Char.chr 0) in
  163. h |> Optint.Int63.encode b ~off:0;
  164. b |> Bytes.to_string
  165. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  166. |> check string __LOC__ "POO-2U5yo0M";
  167. "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda" |> Uri.of_string
  168. |> uhash |> check string __LOC__ "POO-2U5yo0M";
  169. "https://digitalcourage.social/users/mro" |> Uri.of_string
  170. |> uhash |> check string __LOC__ "BlamH6XVcgE";
  171. "https://digitalcourage.social/users/mrp" |> Uri.of_string
  172. |> uhash |> check string __LOC__ "BlamH6XVch4";
  173. "ittps://digitalcourage.social/users/mrp" |> Uri.of_string
  174. |> uhash |> check string __LOC__ "LesfhY0ub58";
  175. assert true
  176. let _bench_update_1 n =
  177. (* Logr.info (fun m -> m "mapcdb_test.bench_update_1"); *)
  178. let fn = mk_db "tmp/add_1.cdb" in
  179. let i = ref 1 in
  180. while !i < n do
  181. Printf.printf ".";
  182. let k = Printf.sprintf "%d %f" !i (Sys.time()) in
  183. let _ = Mcdb.update_string k k fn in
  184. incr i
  185. done;
  186. assert true
  187. let _bench_update_2 n =
  188. (* Logr.info (fun m -> m "mapcdb_test.bench_update_2"); *)
  189. let fn = mk_db "tmp/add_2.cdb" in
  190. let l = List.init n
  191. (fun i ->
  192. let k :bytes = Printf.sprintf "%d" i |> Bytes.of_string in
  193. (k,k)) in
  194. let s = List.to_seq l in
  195. let all _ = true in
  196. let _ = Mcdb.add_seq all s fn in
  197. assert true
  198. let _bench_update_3 n =
  199. (* Logr.info (fun m -> m "mapcdb_test.bench_update_3"); *)
  200. let fn = mk_db "tmp/add_3.cdb"
  201. and all _ = true
  202. and fkt add =
  203. let i = ref 0 in
  204. while !i < n do
  205. let k = Printf.sprintf "%d" !i |> Bytes.of_string in
  206. let v = Printf.sprintf "%f" (Sys.time()) |> Bytes.of_string in
  207. let _ = add (k,v) in
  208. incr i
  209. done;
  210. in
  211. let _ = Mcdb.add_many all fkt fn in
  212. assert true
  213. let () =
  214. run
  215. "Mcdb" [
  216. __FILE__ , [
  217. "set_up" , `Quick, set_up;
  218. "tc_hash" , `Quick, tc_hash ;
  219. "tc_hash_fold" , `Quick, tc_hash_foldr ;
  220. "tc_find_string_opt" , `Quick, tc_find_string_opt ;
  221. (* "tc_update" , `Quick, tc_update ; *)
  222. "tc_fold" , `Quick, tc_fold ;
  223. "tc_fold_1" , `Quick, tc_fold_1 ;
  224. "tc_hash32_str_base64", `Quick, tc_hash32_str_base64 ;
  225. "tc_hash63_str_base64", `Quick, tc_hash63_str_base64 ;
  226. (* "bench_update_1" , `Quick, bench_update_1 1000; *)
  227. (* "bench_update_2" , `Quick, bench_update_2 100_000; *)
  228. (* "bench_update_3" , `Quick, bench_update_3 100_000; *)
  229. ]
  230. ]