fts5origintext5.test 6.7 KB


  1. # 2023 Dec 04
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests for tables that use both tokendata=1 and contentless_delete=1.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5origintext
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. # Return a random integer between 0 and n-1.
  22. #
  23. proc random {n} { expr {abs(int(rand()*$n))} }
  24. # Select an element of the list passed as the only argument at random and
  25. # return it.
  26. #
  27. proc select_one {list} {
  28. set n [llength $list]
  29. lindex $list [random $n]
  30. }
  31. # Given a term that consists entirely of alphabet characters, return all
  32. # permutations of the term using upper and lower case characters. e.g.
  33. #
  34. # "abc" -> {CBA cBA CbA cbA CBa cBa Cba cba}
  35. #
  36. proc casify {term {lRet {{}}}} {
  37. if {$term==""} { return $lRet }
  38. set t [string range $term 1 end]
  39. set f1 [string toupper [string range $term 0 0]]
  40. set f2 [string tolower [string range $term 0 0]]
  41. set ret [list]
  42. foreach x $lRet {
  43. lappend ret "$x$f1"
  44. lappend ret "$x$f2"
  45. }
  46. return [casify $t $ret]
  47. }
  48. proc vocab {} {
  49. list abc def ghi jkl mno pqr stu vwx yza
  50. }
  51. # Return a random 3 letter term.
  52. #
  53. proc term {} {
  54. if {[info exists ::expanded_vocab]==0} {
  55. foreach v [vocab] { lappend ::expanded_vocab {*}[casify $v] }
  56. }
  57. select_one $::expanded_vocab
  58. }
  59. # Return a document - between 3 and 10 terms.
  60. #
  61. proc document {} {
  62. set nTerm [expr [random 3] + 7]
  63. set doc ""
  64. for {set ii 0} {$ii < $nTerm} {incr ii} {
  65. lappend doc [term]
  66. }
  67. set doc
  68. }
  69. db func document document
  70. #-------------------------------------------------------------------------
  71. expr srand(6)
  72. set NDOC 200
  73. set NLOOP 50
  74. sqlite3_fts5_register_origintext db
  75. proc tokens {cmd} {
  76. set ret [list]
  77. for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
  78. set txt [$cmd xInstToken $iTok 0]
  79. set txt [string map [list "\0" "."] $txt]
  80. lappend ret $txt
  81. }
  82. set ret
  83. }
  84. sqlite3_fts5_create_function db tokens tokens
  85. proc rankfunc {cmd} {
  86. $cmd xRowid
  87. }
  88. sqlite3_fts5_create_function db rankfunc rankfunc
  89. proc ctrl_tokens {term args} {
  90. set ret [list]
  91. set term [string tolower $term]
  92. foreach doc $args {
  93. foreach a $doc {
  94. if {[string tolower $a]==$term} {
  95. if {$a==$term} {
  96. lappend ret $a
  97. } else {
  98. lappend ret [string tolower $a].$a
  99. }
  100. }
  101. }
  102. }
  103. set ret
  104. }
  105. db func ctrl_tokens ctrl_tokens
  106. proc do_all_vocab_test {tn} {
  107. foreach ::v [concat [vocab] nnn] {
  108. set answer [execsql {
  109. SELECT id, ctrl_tokens($::v, x) FROM ctrl WHERE x LIKE '%' || $::v || '%'
  110. }]
  111. do_execsql_test $tn.$::v.1 {
  112. SELECT rowid, tokens(ft) FROM ft($::v)
  113. } $answer
  114. do_execsql_test $tn.$::v.2 {
  115. SELECT rowid, tokens(ft) FROM ft($::v) ORDER BY rank
  116. } $answer
  117. }
  118. }
  119. do_execsql_test 1.0 {
  120. CREATE VIRTUAL TABLE ft USING fts5(
  121. x, tokenize="origintext unicode61", content=, contentless_delete=1,
  122. tokendata=1
  123. );
  124. CREATE TABLE ctrl(id INTEGER PRIMARY KEY, x TEXT);
  125. INSERT INTO ft(ft, rank) VALUES('pgsz', 64);
  126. INSERT INTO ft(ft, rank) VALUES('rank', 'rankfunc()');
  127. }
  128. do_test 1.1 {
  129. for {set ii 0} {$ii < $NDOC} {incr ii} {
  130. set doc [document]
  131. execsql {
  132. INSERT INTO ft(rowid, x) VALUES($ii, $doc);
  133. INSERT INTO ctrl(id, x) VALUES($ii, $doc);
  134. }
  135. }
  136. } {}
  137. #execsql_pp { SELECT * FROM ctrl }
  138. #execsql_pp { SELECT * FROM ft }
  139. #fts5_aux_test_functions db
  140. #execsql_pp { SELECT rowid, tokens(ft), fts5_test_poslist(ft) FROM ft('ghi'); }
  141. do_all_vocab_test 1.2
  142. for {set ii 0} {$ii < $NLOOP} {incr ii} {
  143. set lRowid [execsql { SELECT id FROM ctrl WHERE random() % 2 }]
  144. foreach r $lRowid {
  145. execsql { DELETE FROM ft WHERE rowid = $r }
  146. execsql { DELETE FROM ctrl WHERE rowid = $r }
  147. set doc [document]
  148. execsql { INSERT INTO ft(rowid, x) VALUES($r, $doc) }
  149. execsql { INSERT INTO ctrl(id, x) VALUES($r, $doc) }
  150. }
  151. do_all_vocab_test 1.3.$ii
  152. }
  153. #-------------------------------------------------------------------------
  154. do_execsql_test 2.0 {
  155. CREATE VIRTUAL TABLE ft2 USING fts5(
  156. x, y, tokenize="origintext unicode61", content=, contentless_delete=1,
  157. tokendata=1
  158. );
  159. CREATE TABLE ctrl2(id INTEGER PRIMARY KEY, x TEXT, y TEXT);
  160. INSERT INTO ft2(ft2, rank) VALUES('pgsz', 64);
  161. INSERT INTO ft2(ft2, rank) VALUES('rank', 'rankfunc()');
  162. }
  163. do_test 2.1 {
  164. for {set ii 0} {$ii < $NDOC} {incr ii} {
  165. set doc1 [document]
  166. set doc2 [document]
  167. execsql {
  168. INSERT INTO ft2(rowid, x, y) VALUES($ii, $doc, $doc2);
  169. INSERT INTO ctrl2(id, x, y) VALUES($ii, $doc, $doc2);
  170. }
  171. }
  172. } {}
  173. proc do_all_vocab_test2 {tn} {
  174. foreach ::v [vocab] {
  175. set answer [execsql {
  176. SELECT id, ctrl_tokens($::v, x, y) FROM ctrl2
  177. WHERE x LIKE '%' || $::v || '%' OR y LIKE '%' || $::v || '%';
  178. }]
  179. do_execsql_test $tn.$::v.1 {
  180. SELECT rowid, tokens(ft2) FROM ft2($::v)
  181. } $answer
  182. do_execsql_test $tn.$::v.2 {
  183. SELECT rowid, tokens(ft2) FROM ft2($::v) ORDER BY rank
  184. } $answer
  185. }
  186. }
  187. do_all_vocab_test2 2.2
  188. for {set ii 0} {$ii < $NLOOP} {incr ii} {
  189. set lRowid [execsql { SELECT id FROM ctrl2 WHERE random() % 2 }]
  190. foreach r $lRowid {
  191. execsql { DELETE FROM ft2 WHERE rowid = $r }
  192. execsql { DELETE FROM ctrl2 WHERE rowid = $r }
  193. set doc1 [document]
  194. set doc2 [document]
  195. execsql { INSERT INTO ft2(rowid, x, y) VALUES($r, $doc, $doc1) }
  196. execsql { INSERT INTO ctrl2(id, x, y) VALUES($r, $doc, $doc2) }
  197. }
  198. do_all_vocab_test 2.3.$ii
  199. }
  200. #-------------------------------------------------------------------------
  201. unset -nocomplain ::expanded_vocab
  202. proc vocab {} {
  203. list abcde fghij klmno
  204. }
  205. proc do_all_vocab_test3 {tn} {
  206. foreach ::v [concat [vocab] nnn] {
  207. set answer [execsql {
  208. SELECT rowid, ctrl_tokens($::v, w) FROM ctrl3 WHERE w LIKE '%' || $::v || '%'
  209. }]
  210. do_execsql_test $tn.$::v.1 {
  211. SELECT rowid, tokens(ft3) FROM ft3($::v)
  212. } $answer
  213. do_execsql_test $tn.$::v.2 {
  214. SELECT rowid, tokens(ft3) FROM ft3($::v) ORDER BY rank
  215. } $answer
  216. }
  217. }
  218. do_execsql_test 3.0 {
  219. CREATE VIRTUAL TABLE ft3 USING fts5(
  220. w, tokenize="origintext unicode61", content=, contentless_delete=1,
  221. tokendata=1
  222. );
  223. INSERT INTO ft3(ft3, rank) VALUES('rank', 'rankfunc()');
  224. CREATE TABLE ctrl3(w);
  225. }
  226. do_execsql_test 3.1 {
  227. WITH s(i) AS (
  228. SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<2
  229. )
  230. INSERT INTO ctrl3 SELECT document() FROM s;
  231. INSERT INTO ft3(rowid, w) SELECT rowid, w FROM ctrl3;
  232. }
  233. do_all_vocab_test3 3.2
  234. finish_test