123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367 |
- # 2020 September 30
- #
- # The author disclaims copyright to this source code. In place of
- # a legal notice, here is a blessing:
- #
- # May you do good and not evil.
- # May you find forgiveness for yourself and forgive others.
- # May you share freely, never taking more than you give.
- #
- #*************************************************************************
- #
- # Tests for the fts5 "trigram" tokenizer.
- #
- source [file join [file dirname [info script]] fts5_common.tcl]
- ifcapable !fts5 { finish_test ; return }
- set ::testprefix fts5trigram
- do_execsql_test 1.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram);
- INSERT INTO t1 VALUES('abcdefghijklm');
- INSERT INTO t1 VALUES('กรุงเทพมหานคร');
- }
- foreach {tn s res} {
- 1 abc "(abc)defghijklm"
- 2 defgh "abc(defgh)ijklm"
- 3 abcdefghijklm "(abcdefghijklm)"
- 4 กรุ "(กรุ)งเทพมหานคร"
- 5 งเทพมห "กรุ(งเทพมห)านคร"
- 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)"
- 7 Abc "(abc)defghijklm"
- 8 deFgh "abc(defgh)ijklm"
- 9 aBcdefGhijKlm "(abcdefghijklm)"
- } {
- do_execsql_test 1.1.$tn {
- SELECT highlight(t1, 0, '(', ')') FROM t1($s)
- } $res
- }
- do_execsql_test 1.2.0 {
- SELECT fts5_expr('ABCD', 'tokenize=trigram')
- } {{"abc" + "bcd"}}
- do_execsql_test 1.2.1 {
- SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a'
- }
- foreach {tn like res} {
- 1 {%cDef%} 1
- 2 {cDef%} {}
- 3 {%f%} 1
- 4 {%f_h%} 1
- 5 {%f_g%} {}
- 6 {abc%klm} 1
- 7 {ABCDEFG%} 1
- 8 {%รุงเ%} 2
- 9 {%งเ%} 2
- 10 {%"งเ"%} {}
- } {
- do_execsql_test 1.3.$tn {
- SELECT rowid FROM t1 WHERE y LIKE $like
- } $res
- }
- #-------------------------------------------------------------------------
- reset_db
- do_execsql_test 2.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1");
- INSERT INTO t1 VALUES('abcdefghijklm');
- INSERT INTO t1 VALUES('กรุงเทพมหานคร');
- }
- do_catchsql_test 2.0.1 {
- CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram case_sensitive');
- } {1 {error in tokenizer constructor}}
- foreach {tn s res} {
- 1 abc "(abc)defghijklm"
- 2 defgh "abc(defgh)ijklm"
- 3 abcdefghijklm "(abcdefghijklm)"
- 4 กรุ "(กรุ)งเทพมหานคร"
- 5 งเทพมห "กรุ(งเทพมห)านคร"
- 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)"
- 7 Abc ""
- 8 deFgh ""
- 9 aBcdefGhijKlm ""
- } {
- do_execsql_test 2.1.$tn {
- SELECT highlight(t1, 0, '(', ')') FROM t1($s)
- } $res
- }
- foreach {tn like res} {
- 1 {%cDef%} 1
- 2 {cDef%} {}
- 3 {%f%} 1
- 4 {%f_h%} 1
- 5 {%f_g%} {}
- 6 {abc%klm} 1
- 7 {ABCDEFG%} 1
- 8 {%รุงเ%} 2
- } {
- do_execsql_test 2.2.$tn {
- SELECT rowid FROM t1 WHERE y LIKE $like
- } $res
- }
- foreach {tn like res} {
- 1 {*cdef*} 1
- 2 {cdef*} {}
- 3 {*f*} 1
- 4 {*f?h*} 1
- 5 {*f?g*} {}
- 6 {abc*klm} 1
- 7 {abcdefg*} 1
- 8 {*รุงเ*} 2
- 9 {abc[d]efg*} 1
- 10 {abc[]d]efg*} 1
- 11 {abc[^]d]efg*} {}
- 12 {abc[^]XYZ]efg*} 1
- } {
- do_execsql_test 2.3.$tn {
- SELECT rowid FROM t1 WHERE y GLOB $like
- } $res
- }
- do_execsql_test 2.3.null.1 {
- SELECT rowid FROM t1 WHERE y LIKE NULL
- }
- #-------------------------------------------------------------------------
- reset_db
- do_catchsql_test 3.1 {
- CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2");
- } {1 {error in tokenizer constructor}}
- do_catchsql_test 3.2 {
- CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11");
- } {1 {error in tokenizer constructor}}
- do_catchsql_test 3.3 {
- CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1");
- } {0 {}}
- #-------------------------------------------------------------------------
- reset_db
- do_execsql_test 4.0 {
- CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram");
- }
- do_execsql_test 4.1 {
- INSERT INTO t0 VALUES (x'000b01');
- }
- do_execsql_test 4.2 {
- INSERT INTO t0(t0) VALUES('integrity-check');
- }
- #-------------------------------------------------------------------------
- reset_db
- foreach_detail_mode $::testprefix {
- foreach {ci} {0 1} {
- reset_db
- do_execsql_test 5.cs=$ci.0.1 "
- CREATE VIRTUAL TABLE t1 USING fts5(
- y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL%
- );
- "
- do_execsql_test 5.cs=$ci.0.2 {
- INSERT INTO t1 VALUES('abcdefghijklm');
- INSERT INTO t1 VALUES('กรุงเทพมหานคร');
- }
- foreach {tn like res} {
- 1 {%cDef%} 1
- 2 {cDef%} {}
- 3 {%f%} 1
- 4 {%f_h%} 1
- 5 {%f_g%} {}
- 6 {abc%klm} 1
- 7 {ABCDEFG%} 1
- 8 {%รุงเ%} 2
- } {
- do_execsql_test 5.cs=$ci.1.$tn {
- SELECT rowid FROM t1 WHERE y LIKE $like
- } $res
- }
- }
- }
- do_execsql_test 6.0 {
- CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram");
- CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1");
- }
- # LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works
- # with case-sensitive.
- do_eqp_test 6.1 {
- SELECT * FROM ci0 WHERE x LIKE ?
- } {VIRTUAL TABLE INDEX 0:L0}
- do_eqp_test 6.2 {
- SELECT * FROM ci0 WHERE x GLOB ?
- } {VIRTUAL TABLE INDEX 0:G0}
- do_eqp_test 6.3 {
- SELECT * FROM ci1 WHERE x LIKE ?
- } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
- do_eqp_test 6.4 {
- SELECT * FROM ci1 WHERE x GLOB ?
- } {VIRTUAL TABLE INDEX 0:G0}
- do_eqp_test 6.5 {
- SELECT * FROM ci1 WHERE x < ?
- } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
- do_eqp_test 6.6 {
- SELECT * FROM ci0 WHERE x < ?
- } {{SCAN ci0 VIRTUAL TABLE INDEX 0:}}
- reset_db
- do_execsql_test 7.0 {
- CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram");
- INSERT INTO f (rowid, filename) VALUES
- (10, "giraffe.png"),
- (20, "жираф.png"),
- (30, "cat.png"),
- (40, "кот.png"),
- (50, "misic-ðµ-.mp3");
- }
- do_execsql_test 7.1 {
- SELECT rowid FROM f WHERE +filename GLOB '*ир*';
- } {20}
- do_execsql_test 7.2 {
- SELECT rowid FROM f WHERE filename GLOB '*ир*';
- } {20}
- #-------------------------------------------------------------------------
- reset_db
- do_execsql_test 8.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram);
- INSERT INTO t1 VALUES('abcdefghijklm');
- }
- foreach {tn match res} {
- 1 "abc ghi" "(abc)def(ghi)jklm"
- 2 "def ghi" "abc(defghi)jklm"
- 3 "efg ghi" "abcd(efghi)jklm"
- 4 "efghi" "abcd(efghi)jklm"
- 5 "abcd jklm" "(abcd)efghi(jklm)"
- 6 "ijkl jklm" "abcdefgh(ijklm)"
- 7 "ijk ijkl hijk" "abcdefg(hijkl)m"
- } {
- do_execsql_test 8.1.$tn {
- SELECT highlight(t1, 0, '(', ')') FROM t1($match)
- } $res
- }
- do_execsql_test 8.2 {
- CREATE VIRTUAL TABLE ft2 USING fts5(a, tokenize="trigram");
- INSERT INTO ft2 VALUES('abc x cde');
- INSERT INTO ft2 VALUES('abc cde');
- INSERT INTO ft2 VALUES('abcde');
- }
- do_execsql_test 8.3 {
- SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'abc AND cde';
- } {
- {[abc] x [cde]}
- {[abc] [cde]}
- {[abcde]}
- }
- #-------------------------------------------------------------------------
- reset_db
- do_execsql_test 9.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(
- a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
- tokenize=trigram
- );
- INSERT INTO t1(rowid, a12) VALUES(111, 'thats a tricky case though');
- INSERT INTO t1(rowid, a12) VALUES(222, 'the query planner cannot do');
- }
- do_execsql_test 9.1 {
- SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%'
- } {111}
- do_execsql_test 9.2 {
- SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' AND a12 LIKE '%case%'
- } {111}
- do_execsql_test 9.3 {
- SELECT rowid FROM t1 WHERE a12 LIKE NULL
- } {}
- #-------------------------------------------------------------------------
- reset_db
- do_execsql_test 10.0 {
- CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram);
- }
- do_test 10.1 {
- foreach {val} {
- "abc \UFFjkl\UFF"
- "abc \UFFFjkl\UFFF"
- "abc \UFFFFjkl\UFFFF"
- "abc \UFFFFFjkl\UFFFFF"
- "\UFFjkl\UFF abc"
- "\UFFFjkl\UFFF abc"
- "\UFFFFjkl\UFFFF abc"
- "\UFFFFFjkl\UFFFFF abc"
- "\U10001jkl\U10001 abc"
- } {
- execsql { INSERT INTO t1 VALUES( $val ) }
- }
- } {}
- do_test 10.2 {
- foreach {val} {
- X'E18000626320646566'
- X'61EDA0806320646566'
- X'61EDA0806320646566'
- X'61EFBFBE6320646566'
- X'76686920E18000626320646566'
- X'7668692061EDA0806320646566'
- X'7668692061EDA0806320646566'
- X'7668692061EFBFBE6320646566'
- } {
- execsql " INSERT INTO t1 VALUES( $val ) "
- }
- } {}
- do_test 10.3 {
- set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
- set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
- set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
- set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
- execsql {
- INSERT INTO t1 VALUES($a);
- INSERT INTO t1 VALUES($b);
- INSERT INTO t1 VALUES($c);
- INSERT INTO t1 VALUES($d);
- INSERT INTO t1 VALUES('abcd' || $a);
- INSERT INTO t1 VALUES('abcd' || $b);
- INSERT INTO t1 VALUES('abcd' || $c);
- INSERT INTO t1 VALUES('abcd' || $d);
- }
- } {}
- do_execsql_test 11.0 {
- CREATE VIRTUAL TABLE t4 USING fts5(y, tokenize=trigram);
- }
- sqlite3_fts5_register_str db
- do_execsql_test 11.1 {
- INSERT INTO t4 VALUES( str('') );
- }
- do_test 12.0 {
- sqlite3_fts5_tokenize db trigram "abcd"
- } {abc 0 3 bcd 1 4}
- do_test 12.1 {
- sqlite3_fts5_tokenize db trigram "a"
- } {}
- do_test 12.2 {
- sqlite3_fts5_tokenize db trigram ""
- } {}
- finish_test
|