fts5tokenizer2.test 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # 2023 Nov 03
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests focusing on the built-in fts5 tokenizers.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5tokenizer2
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. sqlite3_fts5_create_tokenizer db tst get_tst_tokenizer
  22. proc get_tst_tokenizer {args} {
  23. return "tst_tokenizer"
  24. }
  25. proc tst_tokenizer {flags txt} {
  26. set token ""
  27. set lTok [list]
  28. foreach c [split $txt {}] {
  29. if {$token==""} {
  30. append token $c
  31. } else {
  32. set t1 [string is upper $token]
  33. set t2 [string is upper $c]
  34. if {$t1!=$t2} {
  35. lappend lTok $token
  36. set token ""
  37. }
  38. append token $c
  39. }
  40. }
  41. if {$token!=""} { lappend lTok $token }
  42. set iOff 0
  43. foreach t $lTok {
  44. set n [string length $t]
  45. sqlite3_fts5_token $t $iOff [expr $iOff+$n]
  46. incr iOff $n
  47. }
  48. }
  49. do_execsql_test 1.0 {
  50. CREATE VIRTUAL TABLE t1 USING fts5(t, tokenize=tst);
  51. }
  52. do_execsql_test 1.1 {
  53. INSERT INTO t1 VALUES('AAdontBBmess');
  54. }
  55. do_execsql_test 1.2 {
  56. SELECT snippet(t1, 0, '>', '<', '...', 4) FROM t1('BB');
  57. } {AAdont>BB<mess}
  58. do_execsql_test 1.3 {
  59. SELECT highlight(t1, 0, '>', '<') FROM t1('BB');
  60. } {AAdont>BB<mess}
  61. do_execsql_test 1.4 {
  62. SELECT highlight(t1, 0, '>', '<') FROM t1('AA');
  63. } {>AA<dontBBmess}
  64. do_execsql_test 1.5 {
  65. SELECT highlight(t1, 0, '>', '<') FROM t1('dont');
  66. } {AA>dont<BBmess}
  67. do_execsql_test 1.6 {
  68. SELECT highlight(t1, 0, '>', '<') FROM t1('mess');
  69. } {AAdontBB>mess<}
  70. do_execsql_test 1.7 {
  71. SELECT highlight(t1, 0, '>', '<') FROM t1('BB mess');
  72. } {AAdont>BBmess<}
  73. # 2024-08-06 https://sqlite.org/forum/forumpost/171bcc2bcd
  74. # Error handling of tokenize= arguments.
  75. #
  76. foreach {n tkz} {
  77. 1 {ascii none}
  78. 2 {unicode61 none}
  79. 3 {porter none}
  80. 4 {trigram none}
  81. 5 {ascii none 0}
  82. 6 {unicode61 none 0}
  83. 7 {porter none 0}
  84. 8 {trigram none 0}
  85. } {
  86. db eval {DROP TABLE IF EXISTS t2;}
  87. do_catchsql_test 2.$n "
  88. DROP TABLE IF EXISTS t2;
  89. CREATE VIRTUAL TABLE t2 USING fts5(a,b,c,tokenize='$tkz');
  90. " {1 {error in tokenizer constructor}}
  91. }
  92. finish_test