123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- proc loadfile {f} {
- set fd [open $f]
- set data [read $fd]
- close $fd
- return $data
- }
- set ::nRow 0
- set ::nRowPerDot 1000
- proc load_hierachy {dir} {
- foreach f [glob -nocomplain -dir $dir *] {
- if {$::O(limit) && $::nRow>=$::O(limit)} break
- if {[file isdir $f]} {
- load_hierachy $f
- } else {
- db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
- incr ::nRow
- if {$::O(trans) && ($::nRow % $::O(trans))==0} {
- db eval { COMMIT }
- db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
- db eval { BEGIN }
- }
- if {($::nRow % $::nRowPerDot)==0} {
- puts -nonewline .
- if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
- flush stdout
- }
- }
- }
- }
- proc usage {} {
- puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
- puts stderr ""
- puts stderr "Switches are:"
- puts stderr " -fts4 (use fts4 instead of fts5)"
- puts stderr " -fts5 (use fts5)"
- puts stderr " -porter (use porter tokenizer)"
- puts stderr " -delete (delete the database file before starting)"
- puts stderr " -limit N (load no more than N documents)"
- puts stderr " -automerge N (set the automerge parameter to N)"
- puts stderr " -crisismerge N (set the crisismerge parameter to N)"
- puts stderr " -prefix PREFIX (comma separated prefix= argument)"
- puts stderr " -trans N (commit after N inserts - 0 == never)"
- puts stderr " -hashsize N (set the fts5 hashsize parameter to N)"
- puts stderr " -detail MODE (detail mode for fts5 tables)"
- exit 1
- }
- set O(vtab) fts5
- set O(tok) ""
- set O(limit) 0
- set O(delete) 0
- set O(automerge) -1
- set O(crisismerge) -1
- set O(prefix) ""
- set O(trans) 0
- set O(hashsize) -1
- set O(detail) full
- if {[llength $argv]<2} usage
- set nOpt [expr {[llength $argv]-2}]
- for {set i 0} {$i < $nOpt} {incr i} {
- set arg [lindex $argv $i]
- switch -- [lindex $argv $i] {
- -fts4 {
- set O(vtab) fts4
- }
- -fts5 {
- set O(vtab) fts5
- }
- -porter {
- set O(tok) ", tokenize=porter"
- }
- -delete {
- set O(delete) 1
- }
- -limit {
- if { [incr i]>=$nOpt } usage
- set O(limit) [lindex $argv $i]
- }
- -trans {
- if { [incr i]>=$nOpt } usage
- set O(trans) [lindex $argv $i]
- }
-
- -automerge {
- if { [incr i]>=$nOpt } usage
- set O(automerge) [lindex $argv $i]
- }
- -crisismerge {
- if { [incr i]>=$nOpt } usage
- set O(crisismerge) [lindex $argv $i]
- }
- -prefix {
- if { [incr i]>=$nOpt } usage
- set O(prefix) [lindex $argv $i]
- }
- -hashsize {
- if { [incr i]>=$nOpt } usage
- set O(hashsize) [lindex $argv $i]
- }
- -detail {
- if { [incr i]>=$nOpt } usage
- set O(detail) [lindex $argv $i]
- }
- default {
- usage
- }
- }
- }
- set dbfile [lindex $argv end-1]
- if {$O(delete)} { file delete -force $dbfile }
- sqlite3 db $dbfile
- catch { load_static_extension db fts5 }
- db func loadfile loadfile
- db eval "PRAGMA page_size=4096"
- db eval BEGIN
- set pref ""
- if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
- if {$O(vtab)=="fts5"} {
- append pref ", detail=$O(detail)"
- }
- catch {
- db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
- db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
- }
- if {$O(hashsize)>=0} {
- catch {
- db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
- }
- }
- if {$O(automerge)>=0} {
- if {$O(vtab) == "fts5"} {
- db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
- } else {
- db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
- }
- }
- if {$O(crisismerge)>=0} {
- if {$O(vtab) == "fts5"} {
- db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
- } else {
- }
- }
- load_hierachy [lindex $argv end]
- db eval COMMIT
- puts ""
|