loadfts5.tcl 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. proc loadfile {f} {
  2. set fd [open $f]
  3. set data [read $fd]
  4. close $fd
  5. return $data
  6. }
  7. set ::nRow 0
  8. set ::nRowPerDot 1000
  9. proc load_hierachy {dir} {
  10. foreach f [glob -nocomplain -dir $dir *] {
  11. if {$::O(limit) && $::nRow>=$::O(limit)} break
  12. if {[file isdir $f]} {
  13. load_hierachy $f
  14. } else {
  15. db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
  16. incr ::nRow
  17. if {$::O(trans) && ($::nRow % $::O(trans))==0} {
  18. db eval { COMMIT }
  19. db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
  20. db eval { BEGIN }
  21. }
  22. if {($::nRow % $::nRowPerDot)==0} {
  23. puts -nonewline .
  24. if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
  25. flush stdout
  26. }
  27. }
  28. }
  29. }
  30. proc usage {} {
  31. puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
  32. puts stderr ""
  33. puts stderr "Switches are:"
  34. puts stderr " -fts4 (use fts4 instead of fts5)"
  35. puts stderr " -fts5 (use fts5)"
  36. puts stderr " -porter (use porter tokenizer)"
  37. puts stderr " -delete (delete the database file before starting)"
  38. puts stderr " -limit N (load no more than N documents)"
  39. puts stderr " -automerge N (set the automerge parameter to N)"
  40. puts stderr " -crisismerge N (set the crisismerge parameter to N)"
  41. puts stderr " -prefix PREFIX (comma separated prefix= argument)"
  42. puts stderr " -trans N (commit after N inserts - 0 == never)"
  43. puts stderr " -hashsize N (set the fts5 hashsize parameter to N)"
  44. puts stderr " -detail MODE (detail mode for fts5 tables)"
  45. exit 1
  46. }
  47. set O(vtab) fts5
  48. set O(tok) ""
  49. set O(limit) 0
  50. set O(delete) 0
  51. set O(automerge) -1
  52. set O(crisismerge) -1
  53. set O(prefix) ""
  54. set O(trans) 0
  55. set O(hashsize) -1
  56. set O(detail) full
  57. if {[llength $argv]<2} usage
  58. set nOpt [expr {[llength $argv]-2}]
  59. for {set i 0} {$i < $nOpt} {incr i} {
  60. set arg [lindex $argv $i]
  61. switch -- [lindex $argv $i] {
  62. -fts4 {
  63. set O(vtab) fts4
  64. }
  65. -fts5 {
  66. set O(vtab) fts5
  67. }
  68. -porter {
  69. set O(tok) ", tokenize=porter"
  70. }
  71. -delete {
  72. set O(delete) 1
  73. }
  74. -limit {
  75. if { [incr i]>=$nOpt } usage
  76. set O(limit) [lindex $argv $i]
  77. }
  78. -trans {
  79. if { [incr i]>=$nOpt } usage
  80. set O(trans) [lindex $argv $i]
  81. }
  82. -automerge {
  83. if { [incr i]>=$nOpt } usage
  84. set O(automerge) [lindex $argv $i]
  85. }
  86. -crisismerge {
  87. if { [incr i]>=$nOpt } usage
  88. set O(crisismerge) [lindex $argv $i]
  89. }
  90. -prefix {
  91. if { [incr i]>=$nOpt } usage
  92. set O(prefix) [lindex $argv $i]
  93. }
  94. -hashsize {
  95. if { [incr i]>=$nOpt } usage
  96. set O(hashsize) [lindex $argv $i]
  97. }
  98. -detail {
  99. if { [incr i]>=$nOpt } usage
  100. set O(detail) [lindex $argv $i]
  101. }
  102. default {
  103. usage
  104. }
  105. }
  106. }
  107. set dbfile [lindex $argv end-1]
  108. if {$O(delete)} { file delete -force $dbfile }
  109. sqlite3 db $dbfile
  110. catch { load_static_extension db fts5 }
  111. db func loadfile loadfile
  112. db eval "PRAGMA page_size=4096"
  113. db eval BEGIN
  114. set pref ""
  115. if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  116. if {$O(vtab)=="fts5"} {
  117. append pref ", detail=$O(detail)"
  118. }
  119. catch {
  120. db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
  121. db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  122. }
  123. if {$O(hashsize)>=0} {
  124. catch {
  125. db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
  126. }
  127. }
  128. if {$O(automerge)>=0} {
  129. if {$O(vtab) == "fts5"} {
  130. db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
  131. } else {
  132. db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
  133. }
  134. }
  135. if {$O(crisismerge)>=0} {
  136. if {$O(vtab) == "fts5"} {
  137. db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
  138. } else {
  139. }
  140. }
  141. load_hierachy [lindex $argv end]
  142. db eval COMMIT
  143. puts ""