fts5unicode3.test 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. # 2014 Dec 20
  2. #
  3. # The author disclaims copyright to this source code. In place of
  4. # a legal notice, here is a blessing:
  5. #
  6. # May you do good and not evil.
  7. # May you find forgiveness for yourself and forgive others.
  8. # May you share freely, never taking more than you give.
  9. #
  10. #***********************************************************************
  11. #
  12. # Tests focusing on the fts5 tokenizers
  13. #
  14. source [file join [file dirname [info script]] fts5_common.tcl]
  15. # If SQLITE_ENABLE_FTS5 is not defined, omit this file.
  16. ifcapable !fts5 {
  17. finish_test
  18. return
  19. }
  20. proc fts3_unicode_path {file} {
  21. file join .. [file dirname [info script]] .. .. fts3 unicode $file
  22. }
  23. source [fts3_unicode_path parseunicode.tcl]
  24. set testprefix fts5unicode3
  25. set CF [fts3_unicode_path CaseFolding.txt]
  26. set UD [fts3_unicode_path UnicodeData.txt]
  27. tl_load_casefolding_txt $CF
  28. foreach x [an_load_unicodedata_text $UD] {
  29. set aNotAlnum($x) 1
  30. }
  31. foreach {y} [rd_load_unicodedata_text $UD] {
  32. foreach {code ascii f} $y {}
  33. if {$ascii==""} {
  34. set int 0
  35. } else {
  36. binary scan $ascii c int
  37. }
  38. set aDiacritic($code,$f) $int
  39. if {$f==0} { set aDiacritic($code,1) $int }
  40. }
  41. proc tcl_fold {i {bRemoveDiacritic 0}} {
  42. global tl_lookup_table
  43. global aDiacritic
  44. set f [expr $bRemoveDiacritic==2]
  45. if {[info exists tl_lookup_table($i)]} {
  46. set i $tl_lookup_table($i)
  47. }
  48. if {$bRemoveDiacritic && [info exists aDiacritic($i,$f)]} {
  49. set i $aDiacritic($i,$f)
  50. }
  51. expr $i
  52. }
  53. db func tcl_fold tcl_fold
  54. proc tcl_isalnum {i} {
  55. global aNotAlnum
  56. expr {![info exists aNotAlnum($i)]}
  57. }
  58. db func tcl_isalnum tcl_isalnum
  59. do_catchsql_test 1.0.1 {
  60. SELECT fts5_isalnum(1, 2, 3);
  61. } {1 {wrong number of arguments to function fts5_isalnum}}
  62. do_catchsql_test 1.0.2 {
  63. SELECT fts5_fold();
  64. } {1 {wrong number of arguments to function fts5_fold}}
  65. do_catchsql_test 1.0.3 {
  66. SELECT fts5_fold(1,2,3);
  67. } {1 {wrong number of arguments to function fts5_fold}}
  68. do_execsql_test 1.1 {
  69. WITH ii(i) AS (
  70. SELECT -1
  71. UNION ALL
  72. SELECT i+1 FROM ii WHERE i<100000
  73. )
  74. SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
  75. } {0 {}}
  76. do_execsql_test 1.2.1 {
  77. WITH ii(i) AS (
  78. SELECT -1
  79. UNION ALL
  80. SELECT i+1 FROM ii WHERE i<100000
  81. )
  82. SELECT count(*), min(i) FROM ii
  83. WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
  84. } {0 {}}
  85. do_execsql_test 1.2.2 {
  86. WITH ii(i) AS (
  87. SELECT -1
  88. UNION ALL
  89. SELECT i+1 FROM ii WHERE i<100000
  90. )
  91. SELECT count(*), min(i) FROM ii
  92. WHERE fts5_fold(i,2)!=CAST(tcl_fold(i,2) AS int);
  93. } {0 {}}
  94. do_execsql_test 1.3 {
  95. WITH ii(i) AS (
  96. SELECT -1
  97. UNION ALL
  98. SELECT i+1 FROM ii WHERE i<100000
  99. )
  100. SELECT count(*), min(i) FROM ii
  101. WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
  102. } {0 {}}
  103. do_test 1.4 {
  104. set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
  105. append str {"unicode61 separators '}
  106. for {set i 700} {$i<900} {incr i} {
  107. append str [format %c $i]
  108. }
  109. append str {'");}
  110. execsql $str
  111. } {}
  112. do_test 1.5 {
  113. set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
  114. append str {"unicode61 tokenchars '}
  115. for {set i 700} {$i<900} {incr i} {
  116. append str [format %c $i]
  117. }
  118. append str {'");}
  119. execsql $str
  120. } {}
  121. finish_test