123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975 |
- /*
- ** 2016-05-28
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- **
- ** This file contains the implementation of an SQLite virtual table for
- ** reading CSV files.
- **
- ** Usage:
- **
- ** .load ./csv
- ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
- ** SELECT * FROM csv;
- **
- ** The columns are named "c1", "c2", "c3", ... by default. Or the
- ** application can define its own CREATE TABLE statement using the
- ** schema= parameter, like this:
- **
- ** CREATE VIRTUAL TABLE temp.csv2 USING csv(
- ** filename = "../http.log",
- ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
- ** );
- **
- ** Instead of specifying a file, the text of the CSV can be loaded using
- ** the data= parameter.
- **
- ** If the columns=N parameter is supplied, then the CSV file is assumed to have
- ** N columns. If both the columns= and schema= parameters are omitted, then
- ** the number and names of the columns is determined by the first line of
- ** the CSV input.
- **
- ** Some extra debugging features (used for testing virtual tables) are available
- ** if this module is compiled with -DSQLITE_TEST.
- */
- #include <sqlite3ext.h>
- SQLITE_EXTENSION_INIT1
- #include <string.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <stdarg.h>
- #include <ctype.h>
- #include <stdio.h>
- #ifndef SQLITE_OMIT_VIRTUALTABLE
- /*
- ** A macro to hint to the compiler that a function should not be
- ** inlined.
- */
- #if defined(__GNUC__)
- # define CSV_NOINLINE __attribute__((noinline))
- #elif defined(_MSC_VER) && _MSC_VER>=1310
- # define CSV_NOINLINE __declspec(noinline)
- #else
- # define CSV_NOINLINE
- #endif
- /* Max size of the error message in a CsvReader */
- #define CSV_MXERR 200
- /* Size of the CsvReader input buffer */
- #define CSV_INBUFSZ 1024
- /* A context object used when read a CSV file. */
- typedef struct CsvReader CsvReader;
- struct CsvReader {
- FILE *in; /* Read the CSV text from this input stream */
- char *z; /* Accumulated text for a field */
- int n; /* Number of bytes in z */
- int nAlloc; /* Space allocated for z[] */
- int nLine; /* Current line number */
- int bNotFirst; /* True if prior text has been seen */
- int cTerm; /* Character that terminated the most recent field */
- size_t iIn; /* Next unread character in the input buffer */
- size_t nIn; /* Number of characters in the input buffer */
- char *zIn; /* The input buffer */
- char zErr[CSV_MXERR]; /* Error message */
- };
- /* Initialize a CsvReader object */
- static void csv_reader_init(CsvReader *p){
- p->in = 0;
- p->z = 0;
- p->n = 0;
- p->nAlloc = 0;
- p->nLine = 0;
- p->bNotFirst = 0;
- p->nIn = 0;
- p->zIn = 0;
- p->zErr[0] = 0;
- }
- /* Close and reset a CsvReader object */
- static void csv_reader_reset(CsvReader *p){
- if( p->in ){
- fclose(p->in);
- sqlite3_free(p->zIn);
- }
- sqlite3_free(p->z);
- csv_reader_init(p);
- }
- /* Report an error on a CsvReader */
- static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
- va_list ap;
- va_start(ap, zFormat);
- sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
- va_end(ap);
- }
- /* Open the file associated with a CsvReader
- ** Return the number of errors.
- */
- static int csv_reader_open(
- CsvReader *p, /* The reader to open */
- const char *zFilename, /* Read from this filename */
- const char *zData /* ... or use this data */
- ){
- if( zFilename ){
- p->zIn = sqlite3_malloc( CSV_INBUFSZ );
- if( p->zIn==0 ){
- csv_errmsg(p, "out of memory");
- return 1;
- }
- p->in = fopen(zFilename, "rb");
- if( p->in==0 ){
- sqlite3_free(p->zIn);
- csv_reader_reset(p);
- csv_errmsg(p, "cannot open '%s' for reading", zFilename);
- return 1;
- }
- }else{
- assert( p->in==0 );
- p->zIn = (char*)zData;
- p->nIn = strlen(zData);
- }
- return 0;
- }
- /* The input buffer has overflowed. Refill the input buffer, then
- ** return the next character
- */
- static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
- size_t got;
- assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
- assert( p->in!=0 ); /* Only called if reading froma file */
- got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
- if( got==0 ) return EOF;
- p->nIn = got;
- p->iIn = 1;
- return p->zIn[0];
- }
- /* Return the next character of input. Return EOF at end of input. */
- static int csv_getc(CsvReader *p){
- if( p->iIn >= p->nIn ){
- if( p->in!=0 ) return csv_getc_refill(p);
- return EOF;
- }
- return ((unsigned char*)p->zIn)[p->iIn++];
- }
- /* Increase the size of p->z and append character c to the end.
- ** Return 0 on success and non-zero if there is an OOM error */
- static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
- char *zNew;
- int nNew = p->nAlloc*2 + 100;
- zNew = sqlite3_realloc64(p->z, nNew);
- if( zNew ){
- p->z = zNew;
- p->nAlloc = nNew;
- p->z[p->n++] = c;
- return 0;
- }else{
- csv_errmsg(p, "out of memory");
- return 1;
- }
- }
- /* Append a single character to the CsvReader.z[] array.
- ** Return 0 on success and non-zero if there is an OOM error */
- static int csv_append(CsvReader *p, char c){
- if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
- p->z[p->n++] = c;
- return 0;
- }
- /* Read a single field of CSV text. Compatible with rfc4180 and extended
- ** with the option of having a separator other than ",".
- **
- ** + Input comes from p->in.
- ** + Store results in p->z of length p->n. Space to hold p->z comes
- ** from sqlite3_malloc64().
- ** + Keep track of the line number in p->nLine.
- ** + Store the character that terminates the field in p->cTerm. Store
- ** EOF on end-of-file.
- **
- ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
- ** been set to EOF.
- */
- static char *csv_read_one_field(CsvReader *p){
- int c;
- p->n = 0;
- c = csv_getc(p);
- if( c==EOF ){
- p->cTerm = EOF;
- return 0;
- }
- if( c=='"' ){
- int pc, ppc;
- int startLine = p->nLine;
- pc = ppc = 0;
- while( 1 ){
- c = csv_getc(p);
- if( c<='"' || pc=='"' ){
- if( c=='\n' ) p->nLine++;
- if( c=='"' ){
- if( pc=='"' ){
- pc = 0;
- continue;
- }
- }
- if( (c==',' && pc=='"')
- || (c=='\n' && pc=='"')
- || (c=='\n' && pc=='\r' && ppc=='"')
- || (c==EOF && pc=='"')
- ){
- do{ p->n--; }while( p->z[p->n]!='"' );
- p->cTerm = (char)c;
- break;
- }
- if( pc=='"' && c!='\r' ){
- csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
- break;
- }
- if( c==EOF ){
- csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
- startLine, '"');
- p->cTerm = (char)c;
- break;
- }
- }
- if( csv_append(p, (char)c) ) return 0;
- ppc = pc;
- pc = c;
- }
- }else{
- /* If this is the first field being parsed and it begins with the
- ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
- if( (c&0xff)==0xef && p->bNotFirst==0 ){
- csv_append(p, (char)c);
- c = csv_getc(p);
- if( (c&0xff)==0xbb ){
- csv_append(p, (char)c);
- c = csv_getc(p);
- if( (c&0xff)==0xbf ){
- p->bNotFirst = 1;
- p->n = 0;
- return csv_read_one_field(p);
- }
- }
- }
- while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
- if( csv_append(p, (char)c) ) return 0;
- c = csv_getc(p);
- }
- if( c=='\n' ){
- p->nLine++;
- if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
- }
- p->cTerm = (char)c;
- }
- assert( p->z==0 || p->n<p->nAlloc );
- if( p->z ) p->z[p->n] = 0;
- p->bNotFirst = 1;
- return p->z;
- }
- /* Forward references to the various virtual table methods implemented
- ** in this file. */
- static int csvtabCreate(sqlite3*, void*, int, const char*const*,
- sqlite3_vtab**,char**);
- static int csvtabConnect(sqlite3*, void*, int, const char*const*,
- sqlite3_vtab**,char**);
- static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
- static int csvtabDisconnect(sqlite3_vtab*);
- static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
- static int csvtabClose(sqlite3_vtab_cursor*);
- static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
- int argc, sqlite3_value **argv);
- static int csvtabNext(sqlite3_vtab_cursor*);
- static int csvtabEof(sqlite3_vtab_cursor*);
- static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
- static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
- /* An instance of the CSV virtual table */
- typedef struct CsvTable {
- sqlite3_vtab base; /* Base class. Must be first */
- char *zFilename; /* Name of the CSV file */
- char *zData; /* Raw CSV data in lieu of zFilename */
- long iStart; /* Offset to start of data in zFilename */
- int nCol; /* Number of columns in the CSV file */
- unsigned int tstFlags; /* Bit values used for testing */
- } CsvTable;
- /* Allowed values for tstFlags */
- #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
- /* A cursor for the CSV virtual table */
- typedef struct CsvCursor {
- sqlite3_vtab_cursor base; /* Base class. Must be first */
- CsvReader rdr; /* The CsvReader object */
- char **azVal; /* Value of the current row */
- int *aLen; /* Length of each entry */
- sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
- } CsvCursor;
- /* Transfer error message text from a reader into a CsvTable */
- static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
- sqlite3_free(pTab->base.zErrMsg);
- pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
- }
- /*
- ** This method is the destructor fo a CsvTable object.
- */
- static int csvtabDisconnect(sqlite3_vtab *pVtab){
- CsvTable *p = (CsvTable*)pVtab;
- sqlite3_free(p->zFilename);
- sqlite3_free(p->zData);
- sqlite3_free(p);
- return SQLITE_OK;
- }
- /* Skip leading whitespace. Return a pointer to the first non-whitespace
- ** character, or to the zero terminator if the string has only whitespace */
- static const char *csv_skip_whitespace(const char *z){
- while( isspace((unsigned char)z[0]) ) z++;
- return z;
- }
- /* Remove trailing whitespace from the end of string z[] */
- static void csv_trim_whitespace(char *z){
- size_t n = strlen(z);
- while( n>0 && isspace((unsigned char)z[n]) ) n--;
- z[n] = 0;
- }
- /* Dequote the string */
- static void csv_dequote(char *z){
- int j;
- char cQuote = z[0];
- size_t i, n;
- if( cQuote!='\'' && cQuote!='"' ) return;
- n = strlen(z);
- if( n<2 || z[n-1]!=z[0] ) return;
- for(i=1, j=0; i<n-1; i++){
- if( z[i]==cQuote && z[i+1]==cQuote ) i++;
- z[j++] = z[i];
- }
- z[j] = 0;
- }
- /* Check to see if the string is of the form: "TAG = VALUE" with optional
- ** whitespace before and around tokens. If it is, return a pointer to the
- ** first character of VALUE. If it is not, return NULL.
- */
- static const char *csv_parameter(const char *zTag, int nTag, const char *z){
- z = csv_skip_whitespace(z);
- if( strncmp(zTag, z, nTag)!=0 ) return 0;
- z = csv_skip_whitespace(z+nTag);
- if( z[0]!='=' ) return 0;
- return csv_skip_whitespace(z+1);
- }
- /* Decode a parameter that requires a dequoted string.
- **
- ** Return 1 if the parameter is seen, or 0 if not. 1 is returned
- ** even if there is an error. If an error occurs, then an error message
- ** is left in p->zErr. If there are no errors, p->zErr[0]==0.
- */
- static int csv_string_parameter(
- CsvReader *p, /* Leave the error message here, if there is one */
- const char *zParam, /* Parameter we are checking for */
- const char *zArg, /* Raw text of the virtual table argment */
- char **pzVal /* Write the dequoted string value here */
- ){
- const char *zValue;
- zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
- if( zValue==0 ) return 0;
- p->zErr[0] = 0;
- if( *pzVal ){
- csv_errmsg(p, "more than one '%s' parameter", zParam);
- return 1;
- }
- *pzVal = sqlite3_mprintf("%s", zValue);
- if( *pzVal==0 ){
- csv_errmsg(p, "out of memory");
- return 1;
- }
- csv_trim_whitespace(*pzVal);
- csv_dequote(*pzVal);
- return 1;
- }
- /* Return 0 if the argument is false and 1 if it is true. Return -1 if
- ** we cannot really tell.
- */
- static int csv_boolean(const char *z){
- if( sqlite3_stricmp("yes",z)==0
- || sqlite3_stricmp("on",z)==0
- || sqlite3_stricmp("true",z)==0
- || (z[0]=='1' && z[1]==0)
- ){
- return 1;
- }
- if( sqlite3_stricmp("no",z)==0
- || sqlite3_stricmp("off",z)==0
- || sqlite3_stricmp("false",z)==0
- || (z[0]=='0' && z[1]==0)
- ){
- return 0;
- }
- return -1;
- }
- /* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
- ** If it is, set *pValue to be the value of the boolean ("true" if there is
- ** not "= BOOLEAN" component) and return non-zero. If the input string
- ** does not begin with TAG, return zero.
- */
- static int csv_boolean_parameter(
- const char *zTag, /* Tag we are looking for */
- int nTag, /* Size of the tag in bytes */
- const char *z, /* Input parameter */
- int *pValue /* Write boolean value here */
- ){
- int b;
- z = csv_skip_whitespace(z);
- if( strncmp(zTag, z, nTag)!=0 ) return 0;
- z = csv_skip_whitespace(z + nTag);
- if( z[0]==0 ){
- *pValue = 1;
- return 1;
- }
- if( z[0]!='=' ) return 0;
- z = csv_skip_whitespace(z+1);
- b = csv_boolean(z);
- if( b>=0 ){
- *pValue = b;
- return 1;
- }
- return 0;
- }
- /*
- ** Parameters:
- ** filename=FILENAME Name of file containing CSV content
- ** data=TEXT Direct CSV content.
- ** schema=SCHEMA Alternative CSV schema.
- ** header=YES|NO First row of CSV defines the names of
- ** columns if "yes". Default "no".
- ** columns=N Assume the CSV file contains N columns.
- **
- ** Only available if compiled with SQLITE_TEST:
- **
- ** testflags=N Bitmask of test flags. Optional
- **
- ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
- ** and so forth. If columns=N is omitted, then the file is opened and
- ** the number of columns in the first row is counted to determine the
- ** column count. If header=YES, then the first row is skipped.
- */
- static int csvtabConnect(
- sqlite3 *db,
- void *pAux,
- int argc, const char *const*argv,
- sqlite3_vtab **ppVtab,
- char **pzErr
- ){
- CsvTable *pNew = 0; /* The CsvTable object to construct */
- int bHeader = -1; /* header= flags. -1 means not seen yet */
- int rc = SQLITE_OK; /* Result code from this routine */
- int i, j; /* Loop counters */
- #ifdef SQLITE_TEST
- int tstFlags = 0; /* Value for testflags=N parameter */
- #endif
- int b; /* Value of a boolean parameter */
- int nCol = -99; /* Value of the columns= parameter */
- CsvReader sRdr; /* A CSV file reader used to store an error
- ** message and/or to count the number of columns */
- static const char *azParam[] = {
- "filename", "data", "schema",
- };
- char *azPValue[3]; /* Parameter values */
- # define CSV_FILENAME (azPValue[0])
- # define CSV_DATA (azPValue[1])
- # define CSV_SCHEMA (azPValue[2])
- assert( sizeof(azPValue)==sizeof(azParam) );
- memset(&sRdr, 0, sizeof(sRdr));
- memset(azPValue, 0, sizeof(azPValue));
- for(i=3; i<argc; i++){
- const char *z = argv[i];
- const char *zValue;
- for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
- if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
- }
- if( j<sizeof(azParam)/sizeof(azParam[0]) ){
- if( sRdr.zErr[0] ) goto csvtab_connect_error;
- }else
- if( csv_boolean_parameter("header",6,z,&b) ){
- if( bHeader>=0 ){
- csv_errmsg(&sRdr, "more than one 'header' parameter");
- goto csvtab_connect_error;
- }
- bHeader = b;
- }else
- #ifdef SQLITE_TEST
- if( (zValue = csv_parameter("testflags",9,z))!=0 ){
- tstFlags = (unsigned int)atoi(zValue);
- }else
- #endif
- if( (zValue = csv_parameter("columns",7,z))!=0 ){
- if( nCol>0 ){
- csv_errmsg(&sRdr, "more than one 'columns' parameter");
- goto csvtab_connect_error;
- }
- nCol = atoi(zValue);
- if( nCol<=0 ){
- csv_errmsg(&sRdr, "column= value must be positive");
- goto csvtab_connect_error;
- }
- }else
- {
- csv_errmsg(&sRdr, "bad parameter: '%s'", z);
- goto csvtab_connect_error;
- }
- }
- if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
- csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
- goto csvtab_connect_error;
- }
- if( (nCol<=0 || bHeader==1)
- && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
- ){
- goto csvtab_connect_error;
- }
- pNew = sqlite3_malloc( sizeof(*pNew) );
- *ppVtab = (sqlite3_vtab*)pNew;
- if( pNew==0 ) goto csvtab_connect_oom;
- memset(pNew, 0, sizeof(*pNew));
- if( CSV_SCHEMA==0 ){
- sqlite3_str *pStr = sqlite3_str_new(0);
- char *zSep = "";
- int iCol = 0;
- sqlite3_str_appendf(pStr, "CREATE TABLE x(");
- if( nCol<0 && bHeader<1 ){
- nCol = 0;
- do{
- csv_read_one_field(&sRdr);
- nCol++;
- }while( sRdr.cTerm==',' );
- }
- if( nCol>0 && bHeader<1 ){
- for(iCol=0; iCol<nCol; iCol++){
- sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
- zSep = ",";
- }
- }else{
- do{
- char *z = csv_read_one_field(&sRdr);
- if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
- sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
- zSep = ",";
- iCol++;
- }
- }while( sRdr.cTerm==',' );
- if( nCol<0 ){
- nCol = iCol;
- }else{
- while( iCol<nCol ){
- sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
- zSep = ",";
- }
- }
- }
- pNew->nCol = nCol;
- sqlite3_str_appendf(pStr, ")");
- CSV_SCHEMA = sqlite3_str_finish(pStr);
- if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
- }else if( nCol<0 ){
- do{
- csv_read_one_field(&sRdr);
- pNew->nCol++;
- }while( sRdr.cTerm==',' );
- }else{
- pNew->nCol = nCol;
- }
- pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
- pNew->zData = CSV_DATA; CSV_DATA = 0;
- #ifdef SQLITE_TEST
- pNew->tstFlags = tstFlags;
- #endif
- if( bHeader!=1 ){
- pNew->iStart = 0;
- }else if( pNew->zData ){
- pNew->iStart = (int)sRdr.iIn;
- }else{
- pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
- }
- csv_reader_reset(&sRdr);
- rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
- if( rc ){
- csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
- goto csvtab_connect_error;
- }
- for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
- sqlite3_free(azPValue[i]);
- }
- /* Rationale for DIRECTONLY:
- ** An attacker who controls a database schema could use this vtab
- ** to exfiltrate sensitive data from other files in the filesystem.
- ** And, recommended practice is to put all CSV virtual tables in the
- ** TEMP namespace, so they should still be usable from within TEMP
- ** views, so there shouldn't be a serious loss of functionality by
- ** prohibiting the use of this vtab from persistent triggers and views.
- */
- sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
- return SQLITE_OK;
- csvtab_connect_oom:
- rc = SQLITE_NOMEM;
- csv_errmsg(&sRdr, "out of memory");
- csvtab_connect_error:
- if( pNew ) csvtabDisconnect(&pNew->base);
- for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
- sqlite3_free(azPValue[i]);
- }
- if( sRdr.zErr[0] ){
- sqlite3_free(*pzErr);
- *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
- }
- csv_reader_reset(&sRdr);
- if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
- return rc;
- }
- /*
- ** Reset the current row content held by a CsvCursor.
- */
- static void csvtabCursorRowReset(CsvCursor *pCur){
- CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
- int i;
- for(i=0; i<pTab->nCol; i++){
- sqlite3_free(pCur->azVal[i]);
- pCur->azVal[i] = 0;
- pCur->aLen[i] = 0;
- }
- }
- /*
- ** The xConnect and xCreate methods do the same thing, but they must be
- ** different so that the virtual table is not an eponymous virtual table.
- */
- static int csvtabCreate(
- sqlite3 *db,
- void *pAux,
- int argc, const char *const*argv,
- sqlite3_vtab **ppVtab,
- char **pzErr
- ){
- return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
- }
- /*
- ** Destructor for a CsvCursor.
- */
- static int csvtabClose(sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- csvtabCursorRowReset(pCur);
- csv_reader_reset(&pCur->rdr);
- sqlite3_free(cur);
- return SQLITE_OK;
- }
- /*
- ** Constructor for a new CsvTable cursor object.
- */
- static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
- CsvTable *pTab = (CsvTable*)p;
- CsvCursor *pCur;
- size_t nByte;
- nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
- pCur = sqlite3_malloc64( nByte );
- if( pCur==0 ) return SQLITE_NOMEM;
- memset(pCur, 0, nByte);
- pCur->azVal = (char**)&pCur[1];
- pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
- *ppCursor = &pCur->base;
- if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
- csv_xfer_error(pTab, &pCur->rdr);
- return SQLITE_ERROR;
- }
- return SQLITE_OK;
- }
- /*
- ** Advance a CsvCursor to its next row of input.
- ** Set the EOF marker if we reach the end of input.
- */
- static int csvtabNext(sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- CsvTable *pTab = (CsvTable*)cur->pVtab;
- int i = 0;
- char *z;
- do{
- z = csv_read_one_field(&pCur->rdr);
- if( z==0 ){
- break;
- }
- if( i<pTab->nCol ){
- if( pCur->aLen[i] < pCur->rdr.n+1 ){
- char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
- if( zNew==0 ){
- csv_errmsg(&pCur->rdr, "out of memory");
- csv_xfer_error(pTab, &pCur->rdr);
- break;
- }
- pCur->azVal[i] = zNew;
- pCur->aLen[i] = pCur->rdr.n+1;
- }
- memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
- i++;
- }
- }while( pCur->rdr.cTerm==',' );
- if( z==0 && i==0 ){
- pCur->iRowid = -1;
- }else{
- pCur->iRowid++;
- while( i<pTab->nCol ){
- sqlite3_free(pCur->azVal[i]);
- pCur->azVal[i] = 0;
- pCur->aLen[i] = 0;
- i++;
- }
- }
- return SQLITE_OK;
- }
- /*
- ** Return values of columns for the row at which the CsvCursor
- ** is currently pointing.
- */
- static int csvtabColumn(
- sqlite3_vtab_cursor *cur, /* The cursor */
- sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
- int i /* Which column to return */
- ){
- CsvCursor *pCur = (CsvCursor*)cur;
- CsvTable *pTab = (CsvTable*)cur->pVtab;
- if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
- sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
- }
- return SQLITE_OK;
- }
- /*
- ** Return the rowid for the current row.
- */
- static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
- CsvCursor *pCur = (CsvCursor*)cur;
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
- }
- /*
- ** Return TRUE if the cursor has been moved off of the last
- ** row of output.
- */
- static int csvtabEof(sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- return pCur->iRowid<0;
- }
- /*
- ** Only a full table scan is supported. So xFilter simply rewinds to
- ** the beginning.
- */
- static int csvtabFilter(
- sqlite3_vtab_cursor *pVtabCursor,
- int idxNum, const char *idxStr,
- int argc, sqlite3_value **argv
- ){
- CsvCursor *pCur = (CsvCursor*)pVtabCursor;
- CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
- pCur->iRowid = 0;
- /* Ensure the field buffer is always allocated. Otherwise, if the
- ** first field is zero bytes in size, this may be mistaken for an OOM
- ** error in csvtabNext(). */
- if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
- if( pCur->rdr.in==0 ){
- assert( pCur->rdr.zIn==pTab->zData );
- assert( pTab->iStart>=0 );
- assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
- pCur->rdr.iIn = pTab->iStart;
- }else{
- fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
- pCur->rdr.iIn = 0;
- pCur->rdr.nIn = 0;
- }
- return csvtabNext(pVtabCursor);
- }
- /*
- ** Only a forward full table scan is supported. xBestIndex is mostly
- ** a no-op. If CSVTEST_FIDX is set, then the presence of equality
- ** constraints lowers the estimated cost, which is fiction, but is useful
- ** for testing certain kinds of virtual table behavior.
- */
- static int csvtabBestIndex(
- sqlite3_vtab *tab,
- sqlite3_index_info *pIdxInfo
- ){
- pIdxInfo->estimatedCost = 1000000;
- #ifdef SQLITE_TEST
- if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
- /* The usual (and sensible) case is to always do a full table scan.
- ** The code in this branch only runs when testflags=1. This code
- ** generates an artifical and unrealistic plan which is useful
- ** for testing virtual table logic but is not helpful to real applications.
- **
- ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
- ** table (even though it is not) and the cost of running the virtual table
- ** is reduced from 1 million to just 10. The constraints are *not* marked
- ** as omittable, however, so the query planner should still generate a
- ** plan that gives a correct answer, even if they plan is not optimal.
- */
- int i;
- int nConst = 0;
- for(i=0; i<pIdxInfo->nConstraint; i++){
- unsigned char op;
- if( pIdxInfo->aConstraint[i].usable==0 ) continue;
- op = pIdxInfo->aConstraint[i].op;
- if( op==SQLITE_INDEX_CONSTRAINT_EQ
- || op==SQLITE_INDEX_CONSTRAINT_LIKE
- || op==SQLITE_INDEX_CONSTRAINT_GLOB
- ){
- pIdxInfo->estimatedCost = 10;
- pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
- nConst++;
- }
- }
- }
- #endif
- return SQLITE_OK;
- }
- static sqlite3_module CsvModule = {
- 0, /* iVersion */
- csvtabCreate, /* xCreate */
- csvtabConnect, /* xConnect */
- csvtabBestIndex, /* xBestIndex */
- csvtabDisconnect, /* xDisconnect */
- csvtabDisconnect, /* xDestroy */
- csvtabOpen, /* xOpen - open a cursor */
- csvtabClose, /* xClose - close a cursor */
- csvtabFilter, /* xFilter - configure scan constraints */
- csvtabNext, /* xNext - advance a cursor */
- csvtabEof, /* xEof - check for end of scan */
- csvtabColumn, /* xColumn - read data */
- csvtabRowid, /* xRowid - read data */
- 0, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindMethod */
- 0, /* xRename */
- 0, /* xSavepoint */
- 0, /* xRelease */
- 0, /* xRollbackTo */
- 0, /* xShadowName */
- 0 /* xIntegrity */
- };
- #ifdef SQLITE_TEST
- /*
- ** For virtual table testing, make a version of the CSV virtual table
- ** available that has an xUpdate function. But the xUpdate always returns
- ** SQLITE_READONLY since the CSV file is not really writable.
- */
- static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
- return SQLITE_READONLY;
- }
- static sqlite3_module CsvModuleFauxWrite = {
- 0, /* iVersion */
- csvtabCreate, /* xCreate */
- csvtabConnect, /* xConnect */
- csvtabBestIndex, /* xBestIndex */
- csvtabDisconnect, /* xDisconnect */
- csvtabDisconnect, /* xDestroy */
- csvtabOpen, /* xOpen - open a cursor */
- csvtabClose, /* xClose - close a cursor */
- csvtabFilter, /* xFilter - configure scan constraints */
- csvtabNext, /* xNext - advance a cursor */
- csvtabEof, /* xEof - check for end of scan */
- csvtabColumn, /* xColumn - read data */
- csvtabRowid, /* xRowid - read data */
- csvtabUpdate, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindMethod */
- 0, /* xRename */
- 0, /* xSavepoint */
- 0, /* xRelease */
- 0, /* xRollbackTo */
- 0, /* xShadowName */
- 0 /* xIntegrity */
- };
- #endif /* SQLITE_TEST */
- #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
- #ifdef _WIN32
- __declspec(dllexport)
- #endif
- /*
- ** This routine is called when the extension is loaded. The new
- ** CSV virtual table module is registered with the calling database
- ** connection.
- */
- int sqlite3_csv_init(
- sqlite3 *db,
- char **pzErrMsg,
- const sqlite3_api_routines *pApi
- ){
- #ifndef SQLITE_OMIT_VIRTUALTABLE
- int rc;
- SQLITE_EXTENSION_INIT2(pApi);
- rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
- #ifdef SQLITE_TEST
- if( rc==SQLITE_OK ){
- rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
- }
- #endif
- return rc;
- #else
- return SQLITE_OK;
- #endif
- }
|