fts5hash.test 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # 2015 April 21
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # The tests in this file are focused on the code in fts5_hash.c.
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. set testprefix fts5hash
  16. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  17. ifcapable !fts5 {
  18. finish_test
  19. return
  20. }
  21. #-------------------------------------------------------------------------
  22. # Return a list of tokens (a vocabulary) that all share the same hash
  23. # key value. This can be used to test hash collisions.
  24. #
  25. proc build_vocab1 {args} {
  26. set O(-nslot) 1024
  27. set O(-nword) 20
  28. set O(-hash) 88
  29. set O(-prefix) ""
  30. if {[llength $args] % 2} { error "bad args" }
  31. array set O2 $args
  32. foreach {k v} $args {
  33. if {[info exists O($k)]==0} { error "bad option: $k" }
  34. set O($k) $v
  35. }
  36. set L [list]
  37. while {[llength $L] < $O(-nword)} {
  38. set t "$O(-prefix)[random_token]"
  39. set h [sqlite3_fts5_token_hash $O(-nslot) $t]
  40. if {$O(-hash)==$h} { lappend L $t }
  41. }
  42. return $L
  43. }
  44. proc random_token {} {
  45. set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
  46. set iVal [expr int(rand() * 2000000)]
  47. return [string map $map $iVal]
  48. }
  49. proc random_doc {vocab nWord} {
  50. set doc ""
  51. set nVocab [llength $vocab]
  52. for {set i 0} {$i<$nWord} {incr i} {
  53. set j [expr {int(rand() * $nVocab)}]
  54. lappend doc [lindex $vocab $j]
  55. }
  56. return $doc
  57. }
  58. foreach_detail_mode $testprefix {
  59. set vocab [build_vocab1]
  60. db func r random_doc
  61. do_execsql_test 1.0 {
  62. CREATE VIRTUAL TABLE eee USING fts5(e, ee, detail=%DETAIL%);
  63. BEGIN;
  64. WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
  65. INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
  66. INSERT INTO eee(eee) VALUES('integrity-check');
  67. COMMIT;
  68. INSERT INTO eee(eee) VALUES('integrity-check');
  69. }
  70. set hash [sqlite3_fts5_token_hash 1024 xyz]
  71. set vocab [build_vocab1 -prefix xyz -hash $hash]
  72. lappend vocab xyz
  73. do_execsql_test 1.1 {
  74. CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row');
  75. BEGIN;
  76. }
  77. do_test 1.2 {
  78. for {set i 1} {$i <= 100} {incr i} {
  79. execsql { INSERT INTO eee VALUES( r($vocab, 5), r($vocab, 7) ) }
  80. }
  81. } {}
  82. do_test 1.3 {
  83. db eval { SELECT term, doc FROM vocab } {
  84. set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
  85. if {$nRow != $doc} {
  86. error "term=$term fts5vocab=$doc cnt=$nRow"
  87. }
  88. }
  89. set {} {}
  90. } {}
  91. do_execsql_test 1.4 {
  92. COMMIT;
  93. INSERT INTO eee(eee) VALUES('integrity-check');
  94. }
  95. #-----------------------------------------------------------------------
  96. # Add a small and very large token with the same hash value to an
  97. # empty table. At one point this would provoke an asan error.
  98. #
  99. do_test 1.5 {
  100. set big [string repeat 12345 40]
  101. set hash [sqlite3_fts5_token_hash 1024 $big]
  102. while {1} {
  103. set small [random_token]
  104. if {[sqlite3_fts5_token_hash 1024 $small]==$hash} break
  105. }
  106. execsql { CREATE VIRTUAL TABLE t2 USING fts5(x, detail=%DETAIL%) }
  107. execsql {
  108. INSERT INTO t2 VALUES($small || ' ' || $big);
  109. }
  110. } {}
  111. } ;# foreach_detail_mode
  112. #-------------------------------------------------------------------------
  113. reset_db
  114. do_execsql_test 2.1 {
  115. CREATE VIRTUAL TABLE t1 USING fts5(x);
  116. INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
  117. INSERT INTO t1(t1, rank) VALUES('automerge', 0);
  118. INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
  119. }
  120. do_execsql_test 2.2 {
  121. BEGIN;
  122. INSERT INTO t1 VALUES('abc def ghi');
  123. SELECT count(*) FROM t1_data;
  124. } {2}
  125. do_execsql_test 2.3 {
  126. WITH s(i) AS (
  127. SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
  128. )
  129. INSERT INTO t1 SELECT 'abc def ghi' FROM s;
  130. SELECT (SELECT count(*) FROM t1_data) > 10;
  131. } {1}
  132. do_execsql_test 2.4 {
  133. COMMIT;
  134. DROP TABLE t1;
  135. CREATE VIRTUAL TABLE t1 USING fts5(x);
  136. INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
  137. INSERT INTO t1(t1, rank) VALUES('automerge', 0);
  138. INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
  139. WITH s(i) AS (
  140. SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
  141. )
  142. INSERT INTO t1 SELECT 'abc' || i || ' def' || i || ' ghi' || i FROM s;
  143. SELECT (SELECT count(*) FROM t1_data) > 100;
  144. } {1}
  145. finish_test