fts5synonym.test 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. # 2014 Dec 20
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests focusing on custom tokenizers that support synonyms.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5synonym
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. proc tcl_create {args} { return "tcl_tokenize" }
  22. foreach_detail_mode $testprefix {
  23. #-------------------------------------------------------------------------
  24. # Warm body test for the code in fts5_tcl.c.
  25. #
  26. fts5_tclnum_register db
  27. do_execsql_test 1.0 {
  28. CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = "tclnum document", detail=%DETAIL%);
  29. INSERT INTO ft VALUES('abc def ghi');
  30. INSERT INTO ft VALUES('jkl mno pqr');
  31. SELECT rowid, x FROM ft WHERE ft MATCH 'def';
  32. SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
  33. } {1 {abc def ghi} {jkl mno pqr} 2}
  34. #-------------------------------------------------------------------------
  35. # Test a tokenizer that supports synonyms by adding extra entries to the
  36. # FTS index.
  37. #
  38. reset_db
  39. fts5_tclnum_register db
  40. do_execsql_test 2.0 {
  41. CREATE VIRTUAL TABLE ft USING fts5(
  42. x, tokenize = "tclnum document", detail=%DETAIL%
  43. );
  44. INSERT INTO ft VALUES('one two three');
  45. INSERT INTO ft VALUES('four five six');
  46. INSERT INTO ft VALUES('eight nine ten');
  47. } {}
  48. foreach {tn expr res} {
  49. 1 "3" 1
  50. 2 "eight OR 8 OR 5" {2 3}
  51. 3 "10" {}
  52. 4 "1*" {1}
  53. 5 "1 + 2" {1}
  54. } {
  55. if {![fts5_expr_ok $expr ft]} continue
  56. do_execsql_test 2.1.$tn {
  57. SELECT rowid FROM ft WHERE ft MATCH $expr
  58. } $res
  59. }
  60. #-------------------------------------------------------------------------
  61. # Test some broken tokenizers:
  62. #
  63. # 3.1.*: A tokenizer that declares the very first token to be colocated.
  64. #
  65. # 3.2.*: A tokenizer that reports two identical tokens at the same position.
  66. # This is allowed.
  67. #
  68. reset_db
  69. sqlite3_fts5_create_tokenizer db tcl tcl_create
  70. proc tcl_tokenize {tflags text} {
  71. set bColo 1
  72. foreach {w iStart iEnd} [fts5_tokenize_split $text] {
  73. if {$bColo} {
  74. sqlite3_fts5_token -colo $w $iStart $iEnd
  75. set bColo 0
  76. } {
  77. sqlite3_fts5_token $w $iStart $iEnd
  78. }
  79. }
  80. }
  81. do_execsql_test 3.1.0 {
  82. CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  83. INSERT INTO ft VALUES('one two three');
  84. CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
  85. SELECT * FROM vv;
  86. } {
  87. one 1 1 three 1 1 two 1 1
  88. }
  89. do_execsql_test 3.1.1 {
  90. INSERT INTO ft(ft) VALUES('integrity-check');
  91. } {}
  92. proc tcl_tokenize {tflags text} {
  93. foreach {w iStart iEnd} [fts5_tokenize_split $text] {
  94. sqlite3_fts5_token $w $iStart $iEnd
  95. }
  96. }
  97. do_execsql_test 3.1.2 {
  98. SELECT rowid FROM ft WHERE ft MATCH 'one two three'
  99. } {1}
  100. reset_db
  101. sqlite3_fts5_create_tokenizer db tcl tcl_create
  102. proc tcl_tokenize {tflags text} {
  103. foreach {w iStart iEnd} [fts5_tokenize_split $text] {
  104. sqlite3_fts5_token $w $iStart $iEnd
  105. sqlite3_fts5_token -colo $w $iStart $iEnd
  106. }
  107. }
  108. do_execsql_test 3.2.0 {
  109. CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  110. INSERT INTO ft VALUES('one one two three');
  111. CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
  112. SELECT * FROM vv;
  113. } {
  114. one 1 4 three 1 2 two 1 2
  115. }
  116. do_execsql_test 3.2.1 {
  117. SELECT rowid FROM ft WHERE ft MATCH 'one';
  118. } {1}
  119. do_execsql_test 3.2.2 {
  120. SELECT rowid FROM ft WHERE ft MATCH 'one two three';
  121. } {1}
  122. do_execsql_test 3.2.3 {
  123. SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
  124. } {1}
  125. do_execsql_test 3.2.4 {
  126. SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
  127. } {1}
  128. do_execsql_test 3.2.5 {
  129. SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
  130. } {}
  131. #-------------------------------------------------------------------------
  132. # Check that expressions with synonyms can be parsed and executed.
  133. #
  134. reset_db
  135. fts5_tclnum_register db
  136. foreach {tn expr res} {
  137. 1 {abc} {"abc"}
  138. 2 {one} {"one"|"i"|"1"}
  139. 3 {3} {"3"|"iii"|"three"}
  140. 4 {3*} {"3" *}
  141. } {
  142. do_execsql_test 4.1.$tn {
  143. SELECT fts5_expr($expr, 'tokenize=tclnum')
  144. } [list $res]
  145. }
  146. do_execsql_test 4.2.1 {
  147. CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tclnum, detail=%DETAIL%);
  148. INSERT INTO xx VALUES('one two');
  149. INSERT INTO xx VALUES('three four');
  150. }
  151. do_execsql_test 4.2.2 {
  152. SELECT rowid FROM xx WHERE xx MATCH '2'
  153. } {1}
  154. do_execsql_test 4.2.3 {
  155. SELECT rowid FROM xx WHERE xx MATCH '3'
  156. } {2}
  157. do_test 5.0 {
  158. execsql {
  159. CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tclnum, detail=%DETAIL%)
  160. }
  161. foreach {rowid a b} {
  162. 1 {four v 4 i three} {1 3 five five 4 one}
  163. 2 {5 1 3 4 i} {2 2 v two 4}
  164. 3 {5 i 5 2 four 4 1} {iii ii five two 1}
  165. 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
  166. 5 {three i v i four 4 1} {ii five five five iii}
  167. 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
  168. 7 {ii ii two three 2 5} {iii i ii iii iii one one}
  169. 8 {2 ii i two 3 three 2} {two iv v iii 3 five}
  170. 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
  171. } {
  172. execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
  173. }
  174. } {}
  175. foreach {tn q res} {
  176. 1 {one} {
  177. 1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
  178. 2 {5 [1] 3 4 [i]} {2 2 v two 4}
  179. 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
  180. 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
  181. 5 {three [i] v [i] four 4 [1]} {ii five five five iii}
  182. 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
  183. 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
  184. 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
  185. 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
  186. }
  187. 2 {five four} {
  188. 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
  189. 2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
  190. 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
  191. 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
  192. 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
  193. 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
  194. 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
  195. }
  196. 3 {one OR two OR iii OR 4 OR v} {
  197. 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
  198. 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
  199. 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
  200. 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
  201. 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
  202. 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
  203. 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
  204. 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
  205. 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
  206. }
  207. 4 {5 + 1} {
  208. 2 {[5 1] 3 4 i} {2 2 v two 4}
  209. 3 {[5 i] 5 2 four 4 1} {iii ii five two 1}
  210. 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3}
  211. 5 {three i [v i] four 4 1} {ii five five five iii}
  212. }
  213. 5 {one + two + three} {
  214. 7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
  215. 8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
  216. }
  217. 6 {"v v"} {
  218. 1 {four v 4 i three} {1 3 [five five] 4 one}
  219. 5 {three i v i four 4 1} {ii [five five five] iii}
  220. }
  221. } {
  222. if {![fts5_expr_ok $q t1]} continue
  223. do_execsql_test 5.1.$tn {
  224. SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
  225. FROM t1 WHERE t1 MATCH $q
  226. } $res
  227. }
  228. # Test that the xQueryPhrase() API works with synonyms.
  229. #
  230. proc mit {blob} {
  231. set scan(littleEndian) i*
  232. set scan(bigEndian) I*
  233. binary scan $blob $scan($::tcl_platform(byteOrder)) r
  234. return $r
  235. }
  236. db func mit mit
  237. sqlite3_fts5_register_matchinfo db
  238. foreach {tn q res} {
  239. 1 {one} {
  240. 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6}
  241. 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6}
  242. 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6}
  243. 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6}
  244. 9 {1 11 7 2 12 6}
  245. }
  246. } {
  247. do_execsql_test 5.2.$tn {
  248. SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
  249. } $res
  250. }
  251. #-------------------------------------------------------------------------
  252. # Test terms with more than 4 synonyms.
  253. #
  254. reset_db
  255. sqlite3_fts5_create_tokenizer db tcl tcl_create
  256. proc tcl_tokenize {tflags text} {
  257. foreach {w iStart iEnd} [fts5_tokenize_split $text] {
  258. sqlite3_fts5_token $w $iStart $iEnd
  259. if {$tflags=="query" && [string length $w]==1} {
  260. for {set i 2} {$i<=10} {incr i} {
  261. sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
  262. }
  263. }
  264. }
  265. }
  266. do_execsql_test 6.0.1 {
  267. CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl, detail=%DETAIL%);
  268. INSERT INTO t1 VALUES('yy xx qq');
  269. INSERT INTO t1 VALUES('yy xx xx');
  270. }
  271. if {[fts5_expr_ok "NEAR(y q)" t1]} {
  272. do_execsql_test 6.0.2 {
  273. SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
  274. } {{yy xx qq}}
  275. }
  276. do_test 6.0.3 {
  277. execsql {
  278. CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl, detail=%DETAIL%)
  279. }
  280. foreach {rowid a b} {
  281. 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
  282. 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
  283. 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
  284. 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
  285. 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
  286. 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
  287. 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
  288. 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
  289. 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
  290. } {
  291. execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
  292. }
  293. } {}
  294. foreach {tn q res} {
  295. 1 {a} {
  296. 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
  297. 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
  298. 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
  299. 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
  300. }
  301. 2 {a AND q} {
  302. 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
  303. 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
  304. }
  305. 3 {o OR (q AND a)} {
  306. 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
  307. 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
  308. 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
  309. 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
  310. 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
  311. }
  312. 4 {NEAR(q y, 20)} {
  313. 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
  314. 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
  315. }
  316. } {
  317. if {![fts5_expr_ok $q t2]} continue
  318. do_execsql_test 6.1.$tn.asc {
  319. SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
  320. FROM t2 WHERE t2 MATCH $q
  321. } $res
  322. set res2 [list]
  323. foreach {rowid a b} $res {
  324. set res2 [concat [list $rowid $a $b] $res2]
  325. }
  326. do_execsql_test 6.1.$tn.desc {
  327. SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
  328. FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
  329. } $res2
  330. }
  331. do_execsql_test 6.2.1 {
  332. INSERT INTO t2(rowid, a, b) VALUES(13,
  333. 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
  334. );
  335. SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
  336. FROM t2 WHERE t2 MATCH 'x OR y'
  337. } {
  338. 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
  339. 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
  340. 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
  341. 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
  342. {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
  343. }
  344. #-------------------------------------------------------------------------
  345. # Test that the xColumnSize() API is not confused by colocated tokens.
  346. #
  347. reset_db
  348. sqlite3_fts5_create_tokenizer db tcl tcl_create
  349. fts5_aux_test_functions db
  350. proc tcl_tokenize {tflags text} {
  351. foreach {w iStart iEnd} [fts5_tokenize_split $text] {
  352. sqlite3_fts5_token $w $iStart $iEnd
  353. if {[string length $w]==1} {
  354. for {set i 2} {$i<=10} {incr i} {
  355. sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
  356. }
  357. }
  358. }
  359. }
  360. do_execsql_test 7.0.1 {
  361. CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl, detail=%DETAIL%);
  362. INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
  363. INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  364. SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
  365. } {{3 4} {2 10}}
  366. do_execsql_test 7.0.2 {
  367. INSERT INTO t1(t1) VALUES('integrity-check');
  368. }
  369. do_execsql_test 7.1.1 {
  370. CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl, detail=%DETAIL%);
  371. INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
  372. INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  373. SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
  374. } {{3 4} {2 10}}
  375. do_execsql_test 7.1.2 {
  376. INSERT INTO t2(t2) VALUES('integrity-check');
  377. }
  378. } ;# foreach_detail_mode
  379. finish_test