loadfts.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /*
  2. ** 2014-07-28
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. **
  13. ** This file implements a utility program that will load many disk
  14. ** files (all files under a given directory) into a FTS table. This is
  15. ** used for performance testing of FTS3, FTS4, and FTS5.
  16. */
  17. #include <stdio.h>
  18. #include <stdlib.h>
  19. #include <ctype.h>
  20. #include <assert.h>
  21. #include <string.h>
  22. #include <errno.h>
  23. #include <dirent.h>
  24. #include "sqlite3.h"
  25. /*
  26. ** Implementation of the "readtext(X)" SQL function. The entire content
  27. ** of the file named X is read and returned as a TEXT value. It is assumed
  28. ** the file contains UTF-8 text. NULL is returned if the file does not
  29. ** exist or is unreadable.
  30. */
  31. static void readfileFunc(
  32. sqlite3_context *context,
  33. int argc,
  34. sqlite3_value **argv
  35. ){
  36. const char *zName;
  37. FILE *in;
  38. long nIn;
  39. void *pBuf;
  40. zName = (const char*)sqlite3_value_text(argv[0]);
  41. if( zName==0 ) return;
  42. in = fopen(zName, "rb");
  43. if( in==0 ) return;
  44. fseek(in, 0, SEEK_END);
  45. nIn = ftell(in);
  46. rewind(in);
  47. pBuf = sqlite3_malloc( nIn );
  48. if( pBuf && 1==fread(pBuf, nIn, 1, in) ){
  49. sqlite3_result_text(context, pBuf, nIn, sqlite3_free);
  50. }else{
  51. sqlite3_free(pBuf);
  52. }
  53. fclose(in);
  54. }
  55. /*
  56. ** Print usage text for this program and exit.
  57. */
  58. static void showHelp(const char *zArgv0){
  59. printf("\n"
  60. "Usage: %s SWITCHES... DB\n"
  61. "\n"
  62. " This program opens the database named on the command line and attempts to\n"
  63. " create an FTS table named \"fts\" with a single column. If successful, it\n"
  64. " recursively traverses the directory named by the -dir option and inserts\n"
  65. " the contents of each file into the fts table. All files are assumed to\n"
  66. " contain UTF-8 text.\n"
  67. "\n"
  68. "Switches are:\n"
  69. " -fts [345] FTS version to use (default=5)\n"
  70. " -idx [01] Create a mapping from filename to rowid (default=0)\n"
  71. " -dir <path> Root of directory tree to load data from (default=.)\n"
  72. " -trans <integer> Number of inserts per transaction (default=1)\n"
  73. , zArgv0
  74. );
  75. exit(1);
  76. }
  77. /*
  78. ** Exit with a message based on the argument and the current value of errno.
  79. */
  80. static void error_out(const char *zText){
  81. fprintf(stderr, "%s: %s\n", zText, strerror(errno));
  82. exit(-1);
  83. }
  84. /*
  85. ** Exit with a message based on the first argument and the error message
  86. ** currently stored in database handle db.
  87. */
  88. static void sqlite_error_out(const char *zText, sqlite3 *db){
  89. fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db));
  90. exit(-1);
  91. }
  92. /*
  93. ** Context object for visit_file().
  94. */
  95. typedef struct VisitContext VisitContext;
  96. struct VisitContext {
  97. int nRowPerTrans;
  98. sqlite3 *db; /* Database handle */
  99. sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */
  100. };
  101. /*
  102. ** Callback used with traverse(). The first argument points to an object
  103. ** of type VisitContext. This function inserts the contents of the text
  104. ** file zPath into the FTS table.
  105. */
  106. void visit_file(void *pCtx, const char *zPath){
  107. int rc;
  108. VisitContext *p = (VisitContext*)pCtx;
  109. /* printf("%s\n", zPath); */
  110. sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
  111. sqlite3_step(p->pInsert);
  112. rc = sqlite3_reset(p->pInsert);
  113. if( rc!=SQLITE_OK ){
  114. sqlite_error_out("insert", p->db);
  115. }else if( p->nRowPerTrans>0
  116. && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0
  117. ){
  118. sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
  119. }
  120. }
  121. /*
  122. ** Recursively traverse directory zDir. For each file that is not a
  123. ** directory, invoke the supplied callback with its path.
  124. */
  125. static void traverse(
  126. const char *zDir, /* Directory to traverse */
  127. void *pCtx, /* First argument passed to callback */
  128. void (*xCallback)(void*, const char *zPath)
  129. ){
  130. DIR *d;
  131. struct dirent *e;
  132. d = opendir(zDir);
  133. if( d==0 ) error_out("opendir()");
  134. for(e=readdir(d); e; e=readdir(d)){
  135. if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue;
  136. char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name);
  137. if (e->d_type & DT_DIR) {
  138. traverse(zPath, pCtx, xCallback);
  139. }else{
  140. xCallback(pCtx, zPath);
  141. }
  142. sqlite3_free(zPath);
  143. }
  144. closedir(d);
  145. }
  146. int main(int argc, char **argv){
  147. int iFts = 5; /* Value of -fts option */
  148. int bMap = 0; /* True to create mapping table */
  149. const char *zDir = "."; /* Directory to scan */
  150. int i;
  151. int rc;
  152. int nRowPerTrans = 0;
  153. sqlite3 *db;
  154. char *zSql;
  155. VisitContext sCtx;
  156. int nCmd = 0;
  157. char **aCmd = 0;
  158. if( argc % 2 ) showHelp(argv[0]);
  159. for(i=1; i<(argc-1); i+=2){
  160. char *zOpt = argv[i];
  161. char *zArg = argv[i+1];
  162. if( strcmp(zOpt, "-fts")==0 ){
  163. iFts = atoi(zArg);
  164. if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
  165. }
  166. else if( strcmp(zOpt, "-trans")==0 ){
  167. nRowPerTrans = atoi(zArg);
  168. }
  169. else if( strcmp(zOpt, "-idx")==0 ){
  170. bMap = atoi(zArg);
  171. if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
  172. }
  173. else if( strcmp(zOpt, "-dir")==0 ){
  174. zDir = zArg;
  175. }
  176. else if( strcmp(zOpt, "-special")==0 ){
  177. nCmd++;
  178. aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd);
  179. aCmd[nCmd-1] = zArg;
  180. }
  181. else{
  182. showHelp(argv[0]);
  183. }
  184. }
  185. /* Open the database file */
  186. rc = sqlite3_open(argv[argc-1], &db);
  187. if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db);
  188. rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0,
  189. readfileFunc, 0, 0);
  190. if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db);
  191. /* Create the FTS table */
  192. zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts);
  193. rc = sqlite3_exec(db, zSql, 0, 0, 0);
  194. if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
  195. sqlite3_free(zSql);
  196. for(i=0; i<nCmd; i++){
  197. zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]);
  198. rc = sqlite3_exec(db, zSql, 0, 0, 0);
  199. if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
  200. sqlite3_free(zSql);
  201. }
  202. /* Compile the INSERT statement to write data to the FTS table. */
  203. memset(&sCtx, 0, sizeof(VisitContext));
  204. sCtx.db = db;
  205. sCtx.nRowPerTrans = nRowPerTrans;
  206. rc = sqlite3_prepare_v2(db,
  207. "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
  208. );
  209. if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);
  210. /* Load all files in the directory hierarchy into the FTS table. */
  211. if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
  212. traverse(zDir, (void*)&sCtx, visit_file);
  213. if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
  214. /* Clean up and exit. */
  215. sqlite3_finalize(sCtx.pInsert);
  216. sqlite3_close(db);
  217. sqlite3_free(aCmd);
  218. return 0;
  219. }