fts5prefix.test 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. # 2015 Jan 13
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # This file contains tests focused on prefix indexes.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5prefix
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. do_execsql_test 1.0 {
  22. CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
  23. INSERT INTO xx VALUES('one two three');
  24. INSERT INTO xx VALUES('four five six');
  25. INSERT INTO xx VALUES('seven eight nine ten');
  26. }
  27. do_execsql_test 1.1 {
  28. SELECT rowid FROM xx WHERE xx MATCH 't*'
  29. } {1 3}
  30. #-------------------------------------------------------------------------
  31. # Check that prefix indexes really do index n-character prefixes, not
  32. # n-byte prefixes. Use the ascii tokenizer so as not to be confused by
  33. # diacritic removal.
  34. #
  35. do_execsql_test 2.0 {
  36. CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
  37. }
  38. do_test 2.1 {
  39. foreach {rowid string} {
  40. 1 "\xCA\xCB\xCC\xCD"
  41. 2 "\u1234\u5678\u4321\u8765"
  42. } {
  43. execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
  44. }
  45. } {}
  46. do_execsql_test 2.2 {
  47. INSERT INTO t1(t1) VALUES('integrity-check');
  48. }
  49. foreach {tn q res} {
  50. 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
  51. 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
  52. } {
  53. do_execsql_test 2.3.$tn $q $res
  54. }
  55. #-------------------------------------------------------------------------
  56. # Check that prefix queries with:
  57. #
  58. # * a column filter, and
  59. # * no prefix index.
  60. #
  61. # work Ok.
  62. #
  63. do_execsql_test 3.0 {
  64. CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
  65. INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
  66. BEGIN;
  67. INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
  68. INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
  69. INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
  70. INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
  71. INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
  72. INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
  73. INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
  74. INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
  75. INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
  76. COMMIT;
  77. }
  78. foreach {tn match res} {
  79. 1 "a : c*" {1 2 4 6 7 9}
  80. 2 "b : c*" {1 3 4 5 6 8 9}
  81. 3 "c : c*" {1 2 4 6 7 8 9}
  82. 4 "a : b*" {1 3 5 6 7 8 9}
  83. 5 "b : b*" {1 2 3 5 7 9}
  84. 6 "c : b*" {1 3 7 9}
  85. 7 "a : a*" {1 3 4 5 6 8}
  86. 8 "b : a*" {2 3 4 6 7 8}
  87. 9 "c : a*" {2 3 4 5 6 7 8 9}
  88. } {
  89. do_execsql_test 3.1.$tn {
  90. SELECT rowid FROM t3($match)
  91. } $res
  92. }
  93. do_test 3.2 {
  94. expr srand(0)
  95. execsql { DELETE FROM t3 }
  96. for {set i 0} {$i < 1000} {incr i} {
  97. set a [fts5_rnddoc 3]
  98. set b [fts5_rnddoc 8]
  99. set c [fts5_rnddoc 20]
  100. execsql { INSERT INTO t3 VALUES($a, $b, $c) }
  101. }
  102. execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
  103. } {}
  104. proc gmatch {col pattern} {
  105. expr {[lsearch -glob $col $pattern]>=0}
  106. }
  107. db func gmatch gmatch
  108. proc ghl {col pattern} {
  109. foreach t $col {
  110. if {[string match $pattern $t]} {
  111. lappend res "*$t*"
  112. } else {
  113. lappend res $t
  114. }
  115. }
  116. set res
  117. }
  118. db func ghl ghl
  119. set COLS(a) 0
  120. set COLS(b) 1
  121. set COLS(c) 2
  122. for {set x 0} {$x<2} {incr x} {
  123. foreach {tn pattern} {
  124. 1 {xa*}
  125. 2 {xb*}
  126. 3 {xc*}
  127. 4 {xd*}
  128. 5 {xe*}
  129. 6 {xf*}
  130. 7 {xg*}
  131. 8 {xh*}
  132. 9 {xi*}
  133. 10 {xj*}
  134. } {
  135. foreach col {a b c} {
  136. # Check that the list of returned rowids is correct.
  137. #
  138. set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
  139. set query "$col : $pattern"
  140. do_execsql_test 3.3.$x.$tn.$col.rowid {
  141. SELECT rowid FROM t3($query);
  142. } $res
  143. # Check that the highlight() function works.
  144. #
  145. set res [db eval \
  146. "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')"
  147. ]
  148. set idx $COLS($col)
  149. do_execsql_test 3.3.$x.$tn.$col.highlight {
  150. SELECT highlight(t3, $idx, '*', '*') FROM t3($query);
  151. } $res
  152. }
  153. foreach colset {{a b} {b c} {c a} {a c} {b a}} {
  154. # Check that the list of returned rowids is correct.
  155. #
  156. foreach {col1 col2} $colset {}
  157. set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')"
  158. set res [db eval "SELECT rowid FROM t3 WHERE $expr"]
  159. set query "{$colset} : $pattern"
  160. do_execsql_test 3.3.$x.$tn.{$colset}.rowid {
  161. SELECT rowid FROM t3($query);
  162. } $res
  163. set resq "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')"
  164. append resq " FROM t3 WHERE $expr"
  165. set res [db eval $resq]
  166. set idx1 $COLS($col1)
  167. set idx2 $COLS($col2)
  168. do_execsql_test 3.3.$x.$tn.{$colset}.highlight {
  169. SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*')
  170. FROM t3($query)
  171. } $res
  172. }
  173. }
  174. execsql { INSERT INTO t3(t3) VALUES('optimize') }
  175. execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
  176. }
  177. #-------------------------------------------------------------------------
  178. #
  179. reset_db
  180. do_execsql_test 4.0 {
  181. CREATE VIRTUAL TABLE t2 USING fts5(c1, c2);
  182. INSERT INTO t2 VALUES('xa xb', 'xb xa');
  183. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2
  184. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4
  185. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8
  186. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16
  187. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32
  188. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64
  189. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128
  190. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256
  191. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512
  192. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024
  193. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048
  194. INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096
  195. SELECT count(*) FROM t2('x*');
  196. } {4096}
  197. do_execsql_test 4.1 {
  198. UPDATE t2 SET c2 = 'ya yb';
  199. SELECT count(*) FROM t2('c1:x*');
  200. SELECT count(*) FROM t2('c2:x*');
  201. } {4096 0}
  202. do_execsql_test 4.2 {
  203. UPDATE t2 SET c2 = 'xa';
  204. SELECT count(*) FROM t2('c1:x*');
  205. SELECT count(*) FROM t2('c2:x*');
  206. } {4096 4096}
  207. #-------------------------------------------------------------------------
  208. #
  209. reset_db
  210. proc rnddoc {n} {
  211. set map [list a b c d]
  212. set doc [list]
  213. for {set i 0} {$i < $n} {incr i} {
  214. lappend doc "x[lindex $map [expr int(rand()*4)]]"
  215. }
  216. set doc
  217. }
  218. set cols [list]
  219. for {set i 1} {$i<250} {incr i} {
  220. lappend cols "c$i"
  221. lappend vals "'[rnddoc 10]'"
  222. }
  223. do_test 5.0 {
  224. execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])"
  225. execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)}
  226. execsql "INSERT INTO t4 VALUES([join $vals ,])"
  227. execsql "INSERT INTO t4 VALUES([join $vals ,])"
  228. execsql "INSERT INTO t4 VALUES([join $vals ,])"
  229. execsql "INSERT INTO t4 VALUES([join $vals ,])"
  230. } {}
  231. proc gmatch {col pattern} {
  232. expr {[lsearch -glob $col $pattern]>=0}
  233. }
  234. db func gmatch gmatch
  235. foreach {tn col pattern} {
  236. 1 c100 {xa*}
  237. 2 c200 {xb*}
  238. } {
  239. set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"]
  240. set query "$col : $pattern"
  241. do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res
  242. }
  243. reset_db
  244. db func fts5_rnddoc fts5_rnddoc
  245. do_test 6.0 {
  246. execsql {
  247. CREATE VIRTUAL TABLE t5 USING fts5(x, y);
  248. INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
  249. INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
  250. INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
  251. INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
  252. }
  253. } {}
  254. proc gmatch {col pattern} {
  255. expr {[lsearch -glob $col $pattern]>=0}
  256. }
  257. db func gmatch gmatch
  258. foreach {tn col pattern} {
  259. 1 y {xa*}
  260. 2 y {xb*}
  261. 3 y {xc*}
  262. 4 x {xa*}
  263. 5 x {xb*}
  264. 6 x {xc*}
  265. } {
  266. set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"]
  267. set query "$col : $pattern"
  268. do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res
  269. }
  270. #-------------------------------------------------------------------------
  271. # Check that the various ways of creating prefix indexes produce the
  272. # same database on disk.
  273. #
  274. save_prng_state
  275. foreach {tn create} {
  276. 1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") }
  277. 2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") }
  278. 3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) }
  279. 4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) }
  280. } {
  281. execsql { DROP TABLE IF EXISTS tt }
  282. restore_prng_state
  283. execsql $create
  284. execsql {
  285. INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e');
  286. INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc');
  287. INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd');
  288. INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj');
  289. INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e');
  290. INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd');
  291. INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff');
  292. INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j');
  293. INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g');
  294. INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f');
  295. INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c');
  296. INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd');
  297. INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg');
  298. INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii');
  299. }
  300. #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r}
  301. if {$tn==1} {
  302. set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}]
  303. } else {
  304. do_execsql_test 7.$tn {
  305. SELECT md5sum(id, block) FROM tt_data
  306. } [list $::checksum]
  307. }
  308. }
  309. finish_test