fts5origintext.test 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. # 2014 Jan 08
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests focused on phrase queries.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5origintext
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. foreach_detail_mode $testprefix {
  22. foreach {tn insttoken} {
  23. 1 0
  24. 2 1
  25. } {
  26. reset_db
  27. sqlite3_fts5_register_origintext db
  28. do_execsql_test $tn.1.0 {
  29. CREATE VIRTUAL TABLE ft USING fts5(
  30. x, tokenize="origintext unicode61", detail=%DETAIL%
  31. );
  32. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  33. CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
  34. }
  35. do_execsql_test $tn.1.1 {
  36. INSERT INTO ft VALUES('Hello world');
  37. }
  38. do_execsql_test $tn.1.2 {
  39. INSERT INTO ft(ft) VALUES('integrity-check');
  40. }
  41. proc b {x} { string map [list "\0" "."] $x }
  42. db func b b
  43. do_execsql_test $tn.1.3 {
  44. select b(term) from vocab;
  45. } {
  46. hello.Hello
  47. world
  48. }
  49. do_execsql_test $tn.1.4 {
  50. SELECT rowid FROM ft('Hello');
  51. } {1}
  52. #-------------------------------------------------------------------------
  53. reset_db
  54. # Return a random integer between 0 and n-1.
  55. #
  56. proc random {n} {
  57. expr {abs(int(rand()*$n))}
  58. }
  59. proc select_one {list} {
  60. set n [llength $list]
  61. lindex $list [random $n]
  62. }
  63. proc term {} {
  64. set first_letter {
  65. a b c d e f g h i j k l m n o p q r s t u v w x y z
  66. A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
  67. }
  68. set term [select_one $first_letter]
  69. append term [random 100]
  70. }
  71. proc document {} {
  72. set nTerm [expr [random 5] + 5]
  73. set doc ""
  74. for {set ii 0} {$ii < $nTerm} {incr ii} {
  75. lappend doc [term]
  76. }
  77. set doc
  78. }
  79. db func document document
  80. sqlite3_fts5_register_origintext db
  81. do_execsql_test $tn.2.0 {
  82. CREATE VIRTUAL TABLE ft USING fts5(
  83. x, tokenize="origintext unicode61", detail=%DETAIL%
  84. );
  85. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  86. INSERT INTO ft(ft, rank) VALUES('pgsz', 128);
  87. CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
  88. }
  89. do_test $tn.2.1 {
  90. for {set ii 0} {$ii < 500} {incr ii} {
  91. execsql { INSERT INTO ft VALUES( document() ) }
  92. }
  93. } {}
  94. do_execsql_test $tn.2.2 {
  95. INSERT INTO ft(ft) VALUES('integrity-check');
  96. }
  97. do_execsql_test $tn.2.3 {
  98. INSERT INTO ft(ft, rank) VALUES('merge', 16);
  99. }
  100. do_execsql_test $tn.2.4 {
  101. INSERT INTO ft(ft) VALUES('integrity-check');
  102. }
  103. do_execsql_test $tn.2.5 {
  104. INSERT INTO ft(ft) VALUES('optimize');
  105. }
  106. #-------------------------------------------------------------------------
  107. reset_db
  108. sqlite3_fts5_register_origintext db
  109. do_execsql_test $tn.3.0 {
  110. CREATE VIRTUAL TABLE ft USING fts5(
  111. x, tokenize="origintext unicode61", detail=%DETAIL%
  112. );
  113. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  114. CREATE VIRTUAL TABLE vocab USING fts5vocab(ft, instance);
  115. INSERT INTO ft(rowid, x) VALUES(1, 'hello');
  116. INSERT INTO ft(rowid, x) VALUES(2, 'Hello');
  117. INSERT INTO ft(rowid, x) VALUES(3, 'HELLO');
  118. }
  119. #proc b {x} { string map [list "\0" "."] $x }
  120. #db func b b
  121. #execsql_pp { SELECT b(term) FROM vocab }
  122. do_execsql_test $tn.3.1.1 { SELECT rowid FROM ft('hello') } 1
  123. do_execsql_test $tn.3.1.2 { SELECT rowid FROM ft('Hello') } 2
  124. do_execsql_test $tn.3.1.3 { SELECT rowid FROM ft('HELLO') } 3
  125. do_execsql_test $tn.3.2 {
  126. CREATE VIRTUAL TABLE ft2 USING fts5(x,
  127. tokenize="origintext unicode61",
  128. tokendata=1,
  129. detail=%DETAIL%
  130. );
  131. INSERT INTO ft2(ft2, rank) VALUES('insttoken', $insttoken);
  132. CREATE VIRTUAL TABLE vocab2 USING fts5vocab(ft2, instance);
  133. INSERT INTO ft2(rowid, x) VALUES(1, 'hello');
  134. INSERT INTO ft2(rowid, x) VALUES(2, 'Hello');
  135. INSERT INTO ft2(rowid, x) VALUES(3, 'HELLO');
  136. INSERT INTO ft2(rowid, x) VALUES(10, 'helloooo');
  137. }
  138. #proc b {x} { string map [list "\0" "."] $x }
  139. #db func b b
  140. #execsql_pp { SELECT b(term) FROM vocab }
  141. do_execsql_test $tn.3.3.1 { SELECT rowid FROM ft2('hello') } {1 2 3}
  142. do_execsql_test $tn.3.3.2 { SELECT rowid FROM ft2('Hello') } {1 2 3}
  143. do_execsql_test $tn.3.3.3 { SELECT rowid FROM ft2('HELLO') } {1 2 3}
  144. do_execsql_test $tn.3.3.4 { SELECT rowid FROM ft2('hello*') } {1 2 3 10}
  145. do_execsql_test $tn.3.3.5.1 { SELECT rowid FROM ft2('HELLO') ORDER BY rowid DESC} {
  146. 3 2 1
  147. }
  148. do_execsql_test $tn.3.3.5.2 { SELECT rowid FROM ft2('HELLO') ORDER BY +rowid DESC} {
  149. 3 2 1
  150. }
  151. #-------------------------------------------------------------------------
  152. #
  153. reset_db
  154. sqlite3_fts5_register_origintext db
  155. proc querytoken {cmd iPhrase iToken} {
  156. set txt [$cmd xQueryToken $iPhrase $iToken]
  157. string map [list "\0" "."] $txt
  158. }
  159. sqlite3_fts5_create_function db querytoken querytoken
  160. do_execsql_test $tn.4.0 {
  161. CREATE VIRTUAL TABLE ft USING fts5(
  162. x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
  163. );
  164. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  165. INSERT INTO ft VALUES('one two three four');
  166. }
  167. do_execsql_test $tn.4.1 {
  168. SELECT rowid, querytoken(ft, 0, 0) FROM ft('TwO')
  169. } {1 two.TwO}
  170. do_execsql_test $tn.4.2 {
  171. SELECT rowid, querytoken(ft, 0, 0) FROM ft('one TWO ThreE')
  172. } {1 one}
  173. do_execsql_test $tn.4.3 {
  174. SELECT rowid, querytoken(ft, 1, 0) FROM ft('one TWO ThreE')
  175. } {1 two.TWO}
  176. if {"%DETAIL%"=="full"} {
  177. # Phrase queries are only supported for detail=full.
  178. #
  179. do_execsql_test $tn.4.4 {
  180. SELECT rowid, querytoken(ft, 0, 2) FROM ft('"one TWO ThreE"')
  181. } {1 three.ThreE}
  182. do_catchsql_test $tn.4.5 {
  183. SELECT rowid, querytoken(ft, 0, 3) FROM ft('"one TWO ThreE"')
  184. } {1 SQLITE_RANGE}
  185. do_catchsql_test $tn.4.6 {
  186. SELECT rowid, querytoken(ft, 1, 0) FROM ft('"one TWO ThreE"')
  187. } {1 SQLITE_RANGE}
  188. do_catchsql_test $tn.4.7 {
  189. SELECT rowid, querytoken(ft, -1, 0) FROM ft('"one TWO ThreE"')
  190. } {1 SQLITE_RANGE}
  191. }
  192. #-------------------------------------------------------------------------
  193. #
  194. reset_db
  195. sqlite3_fts5_register_origintext db
  196. proc insttoken {cmd iIdx iToken} {
  197. set txt [$cmd xInstToken $iIdx $iToken]
  198. string map [list "\0" "."] $txt
  199. }
  200. sqlite3_fts5_create_function db insttoken insttoken
  201. fts5_aux_test_functions db
  202. do_execsql_test $tn.5.0 {
  203. CREATE VIRTUAL TABLE ft USING fts5(
  204. x, tokenize='origintext unicode61', tokendata=1, detail=%DETAIL%
  205. );
  206. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  207. INSERT INTO ft VALUES('one ONE One oNe oNE one');
  208. }
  209. do_execsql_test $tn.5.1 {
  210. SELECT insttoken(ft, 0, 0),
  211. insttoken(ft, 1, 0),
  212. insttoken(ft, 2, 0),
  213. insttoken(ft, 3, 0),
  214. insttoken(ft, 4, 0),
  215. insttoken(ft, 5, 0)
  216. FROM ft('one');
  217. } {
  218. one one.ONE one.One one.oNe one.oNE one
  219. }
  220. do_execsql_test $tn.5.2 {
  221. SELECT insttoken(ft, 0, 0),
  222. insttoken(ft, 1, 0),
  223. insttoken(ft, 2, 0),
  224. insttoken(ft, 3, 0),
  225. insttoken(ft, 4, 0),
  226. insttoken(ft, 5, 0)
  227. FROM ft('on*');
  228. } {
  229. one one.ONE one.One one.oNe one.oNE one
  230. }
  231. do_execsql_test $tn.5.3 {
  232. SELECT insttoken(ft, 0, 0),
  233. insttoken(ft, 1, 0),
  234. insttoken(ft, 2, 0),
  235. insttoken(ft, 3, 0),
  236. insttoken(ft, 4, 0),
  237. insttoken(ft, 5, 0)
  238. FROM ft(fts5_insttoken('on*'));
  239. } {
  240. one one.ONE one.One one.oNe one.oNE one
  241. }
  242. do_execsql_test $tn.5.4 {
  243. SELECT insttoken(ft, 1, 0) FROM ft('one');
  244. } {
  245. one.ONE
  246. }
  247. do_execsql_test $tn.5.5 {
  248. SELECT fts5_test_poslist(ft) FROM ft('one');
  249. } {
  250. {0.0.0 0.0.1 0.0.2 0.0.3 0.0.4 0.0.5}
  251. }
  252. #-------------------------------------------------------------------------
  253. # Test the xInstToken() API with:
  254. #
  255. # * a non tokendata=1 table.
  256. # * prefix queries.
  257. #
  258. reset_db
  259. sqlite3_fts5_register_origintext db
  260. do_execsql_test $tn.6.0 {
  261. CREATE VIRTUAL TABLE ft USING fts5(
  262. x, y, tokenize='origintext unicode61', detail=%DETAIL%, tokendata=0
  263. );
  264. INSERT INTO ft(ft, rank) VALUES('insttoken', $insttoken);
  265. INSERT INTO ft VALUES('One Two', 'Three two');
  266. INSERT INTO ft VALUES('three Three', 'one One');
  267. }
  268. proc tokens {cmd} {
  269. set ret [list]
  270. for {set iTok 0} {$iTok < [$cmd xInstCount]} {incr iTok} {
  271. set txt [$cmd xInstToken $iTok 0]
  272. set txt [string map [list "\0" "."] $txt]
  273. lappend ret $txt
  274. }
  275. set ret
  276. }
  277. sqlite3_fts5_create_function db tokens tokens
  278. do_execsql_test $tn.6.1 {
  279. SELECT rowid, tokens(ft) FROM ft('One');
  280. } {1 one.One 2 one.One}
  281. do_execsql_test $tn.6.2 {
  282. SELECT rowid, tokens(ft) FROM ft('on*');
  283. } {1 one.One 2 {one one.One}}
  284. do_execsql_test $tn.6.3 {
  285. SELECT rowid, tokens(ft) FROM ft('Three*');
  286. } {1 three.Three 2 three.Three}
  287. fts5_aux_test_functions db
  288. do_catchsql_test $tn.6.4 {
  289. SELECT fts5_test_insttoken(ft, -1, 0) FROM ft('one');
  290. } {1 SQLITE_RANGE}
  291. do_catchsql_test $tn.6.5 {
  292. SELECT fts5_test_insttoken(ft, 1, 0) FROM ft('one');
  293. } {1 SQLITE_RANGE}
  294. do_catchsql_test $tn.6.6 {
  295. CREATE VIRTUAL TABLE ft2 USING fts5(x, tokendata=2);
  296. } {1 {malformed tokendata=... directive}}
  297. do_catchsql_test $tn.6.7 {
  298. CREATE VIRTUAL TABLE ft2 USING fts5(x, content='', tokendata=11);
  299. } {1 {malformed tokendata=... directive}}
  300. }
  301. }
  302. finish_test