t_mapcdb.ml 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. (*
  2. * _ _ ____ _
  3. * _| || |_/ ___| ___ _ __ _ __ ___ | |
  4. * |_ .. _\___ \ / _ \ '_ \| '_ \ / _ \| |
  5. * |_ _|___) | __/ |_) | |_) | (_) |_|
  6. * |_||_| |____/ \___| .__/| .__/ \___/(_)
  7. * |_| |_|
  8. *
  9. * Personal Social Web.
  10. *
  11. * Copyright (C) The #Seppo contributors. All rights reserved.
  12. *
  13. * This program is free software: you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation, either version 3 of the License, or
  16. * (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. *)
  26. open Seppo_lib
  27. let o i = i |> Optint.of_int
  28. let oo i = i |> Optint.of_int64
  29. let test_hash () =
  30. Logr.info (fun m -> m "mapcdb_test.test_hash");
  31. let open Mapcdb in
  32. "s" |> Bytes.of_string |> Ds_cdb.hash
  33. |> Assrt.equals_int32 __LOC__ 177622l;
  34. "s" |> Bytes.of_string |> hash32_byt |> Assrt.equals_optint __LOC__ (o 177622);
  35. Assrt.equals_optint __LOC__ (o 0x2b5c4) ("a" |> Bytes.of_string |> hash32_byt);
  36. let k0 =
  37. "http://www.traunsteiner-tagblatt.de/region+lokal/landkreis-traunstein/traunstein/pressemitteilungen-der-stadt-traunstein_artikel,-Traunstein-20-%E2%80%93-Neue-Medien-im-Mittelpunkt-_arid,198374.html"
  38. in
  39. Assrt.equals_optint __LOC__ (oo 0xc7410a37L) (k0 |> Bytes.of_string |> hash32_byt);
  40. assert true
  41. let test_find_string_opt () =
  42. Logr.info (fun m -> m "mapcdb_test.test_find_string_opt");
  43. let open Mapcdb in
  44. find_string_opt "s" (Cdb "mini.cdb")
  45. |> Option.get
  46. |> Assrt.equals_string __LOC__ "ß";
  47. assert (None = find_string_opt "zzz" (Cdb "mini.cdb"));
  48. (*
  49. find_string_opt
  50. "http://www.traunsteiner-tagblatt.de/region+lokal/landkreis-traunstein/traunstein/pressemitteilungen-der-stadt-traunstein_artikel,-Traunstein-20-%E2%80%93-Neue-Medien-im-Mittelpunkt-_arid,198374.html"
  51. (Cdb "big.cdb")
  52. |> Option.get |> String.length
  53. |> Assrt.equals_int __LOC__ 1000;
  54. *)
  55. assert true
  56. let mk_db fn =
  57. (try
  58. Unix.mkdir "tmp" File.pDir;
  59. with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
  60. (try
  61. Unix.unlink fn;
  62. Unix.unlink (fn ^ "~")
  63. with Unix.Unix_error (Unix.ENOENT, _, _) -> ());
  64. File.touch fn;
  65. Mapcdb.Cdb fn
  66. let test_update () =
  67. Logr.info (fun m -> m "mapcdb_test.test_update");
  68. let fn = mk_db "tmp/add.cdb" in
  69. let _ = Mapcdb.update_string "a" "1" fn in
  70. Mapcdb.find_string_opt "a" fn |> Option.get |> Assrt.equals_string __LOC__ "1";
  71. let _ = Mapcdb.update_string "a" "2" fn in
  72. Mapcdb.find_string_opt "a" fn |> Option.get |> Assrt.equals_string __LOC__ "2";
  73. let _ = Mapcdb.remove_string "a" fn in
  74. assert (Mapcdb.find_string_opt "a" fn |> Option.is_none);
  75. assert true
  76. let test_fold () =
  77. Logr.info (fun m -> m "%s.%s" "mapcdb_test" "fold_left");
  78. let cdb = Mapcdb.Cdb "data/mini.cdb" in
  79. Mapcdb.fold_left (fun init (k,v) ->
  80. let k = k |> Bytes.to_string
  81. and v = v |> Bytes.to_string in
  82. (* Logr.debug (fun m -> m " %s->%s" k v); *)
  83. (k,v) :: init) [] cdb
  84. |> List.length |> Assrt.equals_int __LOC__ 3
  85. let test_fold_1 () =
  86. Mapcdb.Cdb "data/2024-04-30-131146-subscribed.cdb"
  87. |> Mapcdb.fold_left (fun c _ -> 1 + c) 0
  88. |> Assrt.equals_int __LOC__ 52
  89. let test_hash32_str_base64 () =
  90. Logr.info (fun m -> m "%s.%s" "Mapcdb_test" "test_hash32_str_base64");
  91. Optint.max_int
  92. |> Optint.to_int64
  93. |> Printf.sprintf "0x%Lx"
  94. |> Assrt.equals_string __LOC__ "0x7fffffff";
  95. Mapcdb._32_0xFFffFFff
  96. |> Optint.to_int64
  97. |> Printf.sprintf "0x%Lx"
  98. |> Assrt.equals_string __LOC__ "0xffffffff";
  99. let h = "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda"
  100. |> Uri.of_string |> Uri.to_string
  101. |> Mapcdb.hash32_str in
  102. h |> Optint.to_string
  103. |> Assrt.equals_string __LOC__ "1316135747";
  104. Optint.encoded_size |> Assrt.equals_int __LOC__ 4;
  105. let b = Bytes.make Optint.encoded_size (Char.chr 0) in
  106. h |> Optint.encode b ~off:0;
  107. b |> Bytes.to_string
  108. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  109. |> Assrt.equals_string __LOC__ "TnKjQw";
  110. assert true
  111. let test_hash63_str_base64 () =
  112. Logr.info (fun m -> m "%s.%s" "Mapcdb_test" "test_hash63_str_base64");
  113. Optint.Int63.encoded_size |> Assrt.equals_int __LOC__ 8;
  114. let _mask_63 = Optint.Int63.max_int
  115. and _5381_63 = 5381 |> Optint.Int63.of_int in
  116. (* http://cr.yp.to/cdb/cdb.txt *)
  117. let hash63_gen len f_get : Optint.Int63.t =
  118. let ( +. ) = Optint.Int63.add
  119. and ( << ) = Optint.Int63.shift_left
  120. and ( ^ ) = Optint.Int63.logxor
  121. and ( land ) = Optint.Int63.logand in
  122. let rec fkt (idx : int) (h : Optint.Int63.t) =
  123. if idx = len
  124. then h
  125. else
  126. let c = idx |> f_get |> Char.code |> Optint.Int63.of_int in
  127. (((h << 5) +. h) ^ c) land _mask_63
  128. |> fkt (succ idx)
  129. in
  130. fkt 0 _5381_63
  131. in
  132. let hash63_str dat : Optint.Int63.t =
  133. hash63_gen (String.length dat) (String.get dat)
  134. in
  135. let uhash ?(off = 0) ?(buf = Bytes.make (Optint.Int63.encoded_size) (Char.chr 0)) u =
  136. u
  137. |> Uri.to_string
  138. |> hash63_str
  139. |> Optint.Int63.encode buf ~off;
  140. buf
  141. |> Bytes.to_string
  142. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  143. in
  144. _mask_63
  145. |> Optint.Int63.to_int64
  146. |> Printf.sprintf "0x%Lx"
  147. |> Assrt.equals_string __LOC__ "0x3fffffffffffffff";
  148. let h = "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda"
  149. |> Uri.of_string |> Uri.to_string
  150. |> hash63_str in
  151. h |> Optint.Int63.to_string
  152. |> Assrt.equals_string __LOC__ "4387560302522311491";
  153. let b = Bytes.make Optint.Int63.encoded_size (Char.chr 0) in
  154. h |> Optint.Int63.encode b ~off:0;
  155. b |> Bytes.to_string
  156. |> Base64.encode_string ~pad:false ~alphabet:Base64.uri_safe_alphabet
  157. |> Assrt.equals_string __LOC__ "POO-2U5yo0M";
  158. "https://dev.seppo.social/2023-11-13/activitypub/profile.jlda" |> Uri.of_string
  159. |> uhash |> Assrt.equals_string __LOC__ "POO-2U5yo0M";
  160. "https://digitalcourage.social/users/mro" |> Uri.of_string
  161. |> uhash |> Assrt.equals_string __LOC__ "BlamH6XVcgE";
  162. "https://digitalcourage.social/users/mrp" |> Uri.of_string
  163. |> uhash |> Assrt.equals_string __LOC__ "BlamH6XVch4";
  164. "ittps://digitalcourage.social/users/mrp" |> Uri.of_string
  165. |> uhash |> Assrt.equals_string __LOC__ "LesfhY0ub58";
  166. assert true
  167. let bench_update_1 n =
  168. Logr.info (fun m -> m "mapcdb_test.bench_update_1");
  169. let fn = mk_db "tmp/add_1.cdb" in
  170. let i = ref 1 in
  171. while !i < n do
  172. Printf.printf ".";
  173. let k = Printf.sprintf "%d %f" !i (Sys.time()) in
  174. let _ = Mapcdb.update_string k k fn in
  175. incr i
  176. done;
  177. assert true
  178. let bench_update_2 n =
  179. Logr.info (fun m -> m "mapcdb_test.bench_update_2");
  180. let fn = mk_db "tmp/add_2.cdb" in
  181. let l = List.init n
  182. (fun i ->
  183. let k :bytes = Printf.sprintf "%d" i |> Bytes.of_string in
  184. (k,k)) in
  185. let s = List.to_seq l in
  186. let all _ = true in
  187. let _ = Mapcdb.add_seq all s fn in
  188. assert true
  189. let bench_update_3 n =
  190. Logr.info (fun m -> m "mapcdb_test.bench_update_3");
  191. let fn = mk_db "tmp/add_3.cdb"
  192. and all _ = true
  193. and fkt add =
  194. let i = ref 0 in
  195. while !i < n do
  196. let k = Printf.sprintf "%d" !i |> Bytes.of_string in
  197. let v = Printf.sprintf "%f" (Sys.time()) |> Bytes.of_string in
  198. let _ = add (k,v) in
  199. incr i
  200. done;
  201. in
  202. let _ = Mapcdb.add_many all fkt fn in
  203. assert true
  204. let () =
  205. Unix.chdir "../../../test/";
  206. test_hash ();
  207. test_find_string_opt ();
  208. test_update ();
  209. test_fold ();
  210. test_fold_1 ();
  211. test_hash32_str_base64 ();
  212. test_hash63_str_base64 ();
  213. bench_update_1 1000;
  214. bench_update_2 100_000;
  215. bench_update_3 100_000;
  216. assert true