fts5tokenizer3.test 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # 2024 Aug 10
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests focusing on the built-in fts5 tokenizers.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5tokenizer3
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. proc get_sod {args} { return "split_on_dot" }
  22. proc get_lowercase {args} { return "lowercase" }
  23. proc lowercase {flags txt} {
  24. set n [string length $txt]
  25. sqlite3_fts5_token [string tolower $txt] 0 $n
  26. return 0
  27. }
  28. proc split_on_dot {flags txt} {
  29. set iOff 0
  30. foreach t [split $txt "."] {
  31. set n [string length $txt]
  32. sqlite3_fts5_token $t $iOff [expr $iOff+$n]
  33. incr iOff [expr {$n+1}]
  34. }
  35. return ""
  36. }
  37. foreach {tn script} {
  38. 1 {
  39. sqlite3_fts5_create_tokenizer db lowercase get_lowercase
  40. sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
  41. }
  42. 2 {
  43. sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
  44. sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod
  45. }
  46. 3 {
  47. sqlite3_fts5_create_tokenizer db lowercase get_lowercase
  48. sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
  49. }
  50. 4 {
  51. sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase
  52. sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod
  53. }
  54. } {
  55. reset_db
  56. eval $script
  57. do_execsql_test 1.$tn.0 {
  58. CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot);
  59. CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance);
  60. INSERT INTO t1 VALUES('ABC.Def.ghi');
  61. }
  62. do_execsql_test 1.$tn.1 {
  63. SELECT term FROM t1vocab ORDER BY 1
  64. } {abc def ghi}
  65. }
  66. finish_test