t_search.ml 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. (* *)
  2. open Seppo_lib
  3. let test_ubase () =
  4. let nfc = "V\197\169 Ng\225\187\141c Phan"
  5. and nfd = "Vu\204\131 Ngo\204\163c Phan" in
  6. Assrt.equals_string __LOC__ "Vũ Ngọc Phan" nfc;
  7. Assrt.equals_string __LOC__ "Vũ Ngọc Phan" nfd;
  8. Assrt.equals_string __LOC__ "Vu Ngoc Phan" (Ubase.from_utf8 nfc);
  9. Assrt.equals_string __LOC__ "Vu Ngoc Phan" (Ubase.from_utf8 nfd)
  10. let test_regexp () =
  11. let t0 = Str.regexp_case_fold ".*yst.*" in
  12. assert (Str.string_match t0 "haystack" 0);
  13. let t2 = Str.regexp_string_case_fold "ySt" in
  14. assert (2 = Str.search_forward t2 "haystack" 0);
  15. assert (2 = try Str.search_forward t2 "haystack" 0 with Not_found -> -1);
  16. assert (-1 = try Str.search_forward t2 "hay_stack" 0 with Not_found -> -1);
  17. assert (0 <= try Str.search_forward t2 "haystack" 0 with Not_found -> -1)
  18. (*
  19. Assrt.equals_int __LOC__ 5 (List.length c0.url_cleaner);
  20. Assrt.equals_int __LOC__ 2 (List.length c0.posse)
  21. *)
  22. let test_needle () =
  23. let t = Str.regexp_case_fold ".*yst.*" in
  24. Assrt.equals_int __LOC__ 1 (Search.string_rank t "haystack")
  25. let test_needles () =
  26. let tp = [ "föo"; "bär"; "báz" ] |> Search.needles_prepare in
  27. ("my fÓo", "", "") |> Search.entry_rank tp |> Assrt.equals_int __LOC__ 2
  28. let test_emoji () =
  29. "my 😷 ö" |> Ubase.from_utf8 |> Assrt.equals_string __LOC__ "my 😷 o"
  30. let () =
  31. Unix.chdir "../../../test/";
  32. test_ubase ();
  33. test_regexp ();
  34. test_needle ();
  35. test_needles ();
  36. test_emoji ()