Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals  

csvtable.c

Go to the documentation of this file.
00001 
00019 #ifdef STANDALONE
00020 #include <sqlite3.h>
00021 #else
00022 #include <sqlite3ext.h>
00023 static SQLITE_EXTENSION_INIT1
00024 #endif
00025 
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <limits.h>
00030 
00031 #ifdef _WIN32
00032 #include <windows.h>
00033 #define strcasecmp  _stricmp
00034 #define strncasecmp _strnicmp
00035 #endif
00036 
00043 typedef struct csv_file {
00044     FILE *f;            
00045     char *sep;          
00046     char *quot;         
00047     int isdos;          
00048     int maxl;           
00049     char *line;         
00050     long pos0;          
00051     int maxc;           
00052     int ncols;          
00053     char **cols;        
00054 } csv_file;
00055 
00062 typedef struct csv_guess_fmt {
00063     int nlines;
00064     int hist[256];
00065 } csv_guess_fmt;
00066 
00073 typedef struct csv_vtab {
00074     sqlite3_vtab vtab;  
00075     csv_file *csv;      
00076     int convert;        
00077     char coltypes[1];   
00078 } csv_vtab;
00079 
00086 typedef struct {
00087     sqlite3_vtab_cursor cursor;         
00088     long pos;                           
00089 } csv_cursor;
00090 
00096 static void
00097 append_free(char **in)
00098 {
00099     long *p = (long *) *in;
00100 
00101     if (p) {
00102         p -= 2;
00103         sqlite3_free(p);
00104         *in = 0;
00105     }
00106 }
00107 
00117 static char *
00118 append(char **in, char const *append, char quote)
00119 {
00120     long *p = (long *) *in;
00121     long len, maxlen, actlen;
00122     int i;
00123     char *pp;
00124     int nappend = append ? strlen(append) : 0;
00125 
00126     if (p) {
00127         p -= 2;
00128         maxlen = p[0];
00129         actlen = p[1];
00130     } else {
00131         maxlen = actlen = 0;
00132     }
00133     len = nappend + actlen;
00134     if (quote) {
00135         len += 2;
00136         for (i = 0; i < nappend; i++) {
00137             if (append[i] == quote) {
00138                 len++;
00139             }
00140         }
00141     } else if (!nappend) {
00142         return *in;
00143     }
00144     if (len >= maxlen - 1) {
00145         long *q;
00146 
00147         maxlen = (len + 0x03ff) & (~0x3ff);
00148         q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
00149         if (!q) {
00150             return 0;
00151         }
00152         if (!p) {
00153             q[1] = 0;
00154         }
00155         p = q;
00156         p[0] = maxlen;
00157         *in = (char *) (p + 2);
00158     }
00159     pp = *in + actlen;
00160     if (quote) {
00161         *pp++ = quote;
00162         for (i = 0; i < nappend; i++) {
00163             *pp++ = append[i];
00164             if (append[i] == quote) {
00165                 *pp++ = quote;
00166             }
00167         }
00168         *pp++ = quote;
00169         *pp = '\0';
00170     } else {
00171         if (nappend) {
00172             memcpy(pp, append, nappend);
00173             pp += nappend;
00174             *pp = '\0';
00175         }
00176     }
00177     p[1] = pp - *in;
00178     return *in;
00179 }
00180 
00187 static char *
00188 unquote(char const *in)
00189 {
00190     char c, *ret;
00191     int i;
00192 
00193     ret = sqlite3_malloc(strlen(in) + 1);
00194     if (ret) {
00195         c = in[0];
00196         if ((c == '"') || (c == '\'')) {
00197             i = strlen(in + 1);
00198             if ((i > 0) && (in[i] == c)) {
00199                 strcpy(ret, in + 1);
00200                 ret[i - 1] = '\0';
00201                 return ret;
00202             }
00203         }
00204         strcpy(ret, in);
00205     }
00206     return ret;
00207 }
00208 
00215 static int
00216 maptype(char const *type)
00217 {
00218     int typelen = type ? strlen(type) : 0;
00219 
00220     if ((typelen >= 3) &&
00221         (strncasecmp(type, "integer", 7) == 0)) {
00222         return SQLITE_INTEGER;
00223     }
00224     if ((typelen >= 6) &&
00225         (strncasecmp(type, "double", 6) == 0)) {
00226         return SQLITE_FLOAT;
00227     }
00228     if ((typelen >= 5) &&
00229         (strncasecmp(type, "float", 5) == 0)) {
00230         return SQLITE_FLOAT;
00231     }
00232     if ((typelen >= 4) &&
00233         (strncasecmp(type, "real", 4) == 0)) {
00234         return SQLITE_FLOAT;
00235     }
00236     return SQLITE_TEXT;
00237 }
00238 
00245 static void
00246 conv_names(char **names, int ncols)
00247 {
00248     int i;
00249     char *p, *q;
00250     static const char ws[] = "\n\t\r\b\v ";
00251 
00252     if (!names || ncols <= 0) {
00253         return;
00254     }
00255     for (i = 0; i < ncols; i++) {
00256         p = names[i];
00257 
00258         while (*p) {
00259             if (strchr(ws, *p)) {
00260                 *p++ = '_';
00261                 q = p;
00262                 while (*q && strchr(ws, *q)) {
00263                     ++q;
00264                 }
00265                 if (*q && q > p) {
00266                     strcpy(p, q);
00267                 }
00268                 continue;
00269             }
00270             ++p;
00271         }
00272     }
00273 }
00274 
00285 static void
00286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00287                char *data, int len, int type)
00288 {
00289     char *endp;
00290 
00291     if (!data) {
00292         if (ctx) {
00293             sqlite3_result_null(ctx);
00294         } else {
00295             sqlite3_bind_null(stmt, idx);
00296         }
00297         return;
00298     }
00299     if (type == SQLITE_INTEGER) {
00300         sqlite_int64 val;
00301 #if defined(_WIN32) || defined(_WIN64)
00302         char endc;
00303 
00304         if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
00305             if (ctx) {
00306                 sqlite3_result_int64(ctx, val);
00307             } else {
00308                 sqlite3_bind_int64(stmt, idx, val);
00309             }
00310             return;
00311         }
00312 #else
00313         endp = 0;
00314 #ifdef __osf__
00315         val = strtol(data, &endp, 0);
00316 #else
00317         val = strtoll(data, &endp, 0);
00318 #endif
00319         if (endp && (endp != data) && !*endp) {
00320             if (ctx) {
00321                 sqlite3_result_int64(ctx, val);
00322             } else {
00323                 sqlite3_bind_int64(stmt, idx, val);
00324             }
00325             return;
00326         }
00327 #endif
00328     } else if (type == SQLITE_FLOAT) {
00329         double val;
00330 
00331         endp = 0;
00332         val = strtod(data, &endp);
00333         if (endp && (endp != data) && !*endp) {
00334             if (ctx) {
00335                 sqlite3_result_double(ctx, val);
00336             } else {
00337                 sqlite3_bind_double(stmt, idx, val);
00338             }
00339             return;
00340         }
00341     }
00342     if (ctx) {
00343         sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
00344     } else {
00345         sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
00346     }
00347 }
00348 
00359 static int
00360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00361             char *data, int type, int conv)
00362 {
00363     char c, *p;
00364     const char flchars[] = "Ee+-.,0123456789";
00365 
00366     if (!data) {
00367         goto putdata;
00368     }
00369 
00370     /*
00371      * Floating point number test,
00372      * converts single comma to dot.
00373      */
00374     c = data[0];
00375     if ((c != '\0') && strchr(flchars + 2, c)) {
00376         p = data + 1;
00377         while (*p && strchr(flchars, *p)) {
00378             ++p;
00379         }
00380         if (*p == '\0') {
00381             char *first = 0;
00382             int n = 0;
00383 
00384             p = data;
00385             while (p) {
00386                 p = strchr(p, ',');
00387                 if (!p) {
00388                     break;
00389                 }
00390                 if (++n == 1) {
00391                     first = p;
00392                 }
00393                 ++p;
00394             }
00395             if (first) {
00396                 *first = '.';
00397                 goto putdata;
00398             }
00399         }
00400     }
00401     if (conv) {
00402         char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
00403 
00404         if (utf) {
00405             p = utf;
00406             while ((c = *data) != '\0') {
00407                 if (((conv & 10) == 10) && (c == '\\')) {
00408                     if (data[1] == 'q') {
00409                         *p++ = '\'';
00410                         data += 2;
00411                         continue;
00412                     }
00413                 }
00414                 if ((conv & 2) && (c == '\\')) {
00415                     char c2 = data[1];
00416 
00417                     switch (c2) {
00418                     case '\0':
00419                         goto convdone;
00420                     case 'n':
00421                         *p = '\n';
00422                         break;
00423                     case 't':
00424                         *p = '\t';
00425                         break;
00426                     case 'r':
00427                         *p = '\r';
00428                         break;
00429                     case 'f':
00430                         *p = '\f';
00431                         break;
00432                     case 'v':
00433                         *p = '\v';
00434                         break;
00435                     case 'b':
00436                         *p = '\b';
00437                         break;
00438                     case 'a':
00439                         *p = '\a';
00440                         break;
00441                     case '?':
00442                         *p = '\?';
00443                         break;
00444                     case '\'':
00445                         *p = '\'';
00446                         break;
00447                     case '"':
00448                         *p = '\"';
00449                         break;
00450                     case '\\':
00451                         *p = '\\';
00452                         break;
00453                     default:
00454                         *p++ = c;
00455                         *p = c2;
00456                         break;
00457                     }
00458                     p++;
00459                     data += 2;
00460                     continue;
00461                 }
00462                 if ((conv & 1) && (c & 0x80)) {
00463                     *p++ = 0xc0 | ((c >> 6) & 0x1f);
00464                     *p++ = 0x80 | (c & 0x3f);
00465                 } else {
00466                     *p++ = c;
00467                 }
00468                 data++;
00469             }
00470 convdone:
00471             *p = '\0';
00472             result_or_bind(ctx, stmt, idx, utf, p - utf, type);
00473             sqlite3_free(utf);
00474             return SQLITE_OK;
00475         } else {
00476             if (ctx) {
00477                 sqlite3_result_error(ctx, "out of memory", -1);
00478             }
00479             return SQLITE_NOMEM;
00480         }
00481     }
00482 putdata:
00483     result_or_bind(ctx, stmt, idx, data, -1, type);
00484     return SQLITE_OK;
00485 }
00486 
00495 static csv_file *
00496 csv_open(const char *filename, const char *sep, const char *quot)
00497 {
00498     FILE *f;
00499     csv_file *csv;
00500 
00501 #ifdef _WIN32
00502     f = fopen(filename, "rb");
00503 #else
00504     f = fopen(filename, "r");
00505 #endif
00506     if (!f) {
00507         return 0;
00508     }
00509     csv = sqlite3_malloc(sizeof (csv_file));
00510     if (!csv) {
00511 error0:
00512         fclose(f);
00513         return 0;
00514     }
00515     csv->f = f;
00516     if (sep && sep[0]) {
00517         csv->sep = sqlite3_malloc(strlen(sep) + 1);
00518         if (!csv->sep) {
00519 error1:
00520             sqlite3_free(csv);
00521             goto error0;
00522         }
00523         strcpy(csv->sep, sep);
00524     } else {
00525         csv->sep = 0;
00526     }
00527     if (quot && quot[0]) {
00528         csv->quot = sqlite3_malloc(strlen(quot) + 1);
00529         if (!csv->quot) {
00530             if (csv->sep) {
00531                 sqlite3_free(csv->sep);
00532             }
00533             goto error1;
00534         }
00535         strcpy(csv->quot, quot);
00536     } else {
00537         csv->quot = 0;
00538     }
00539     csv->isdos = 0;
00540     csv->maxl = 0;
00541     csv->line = 0;
00542     csv->pos0 = 0;
00543     csv->maxc = 0;
00544     csv->ncols = 0;
00545     csv->cols = 0;
00546     return csv;
00547 }
00548 
00554 static void
00555 csv_close(csv_file *csv)
00556 {
00557     if (csv) {
00558         if (csv->sep) {
00559             sqlite3_free(csv->sep);
00560         }
00561         if (csv->quot) {
00562             sqlite3_free(csv->quot);
00563         }
00564         if (csv->line) {
00565             sqlite3_free(csv->line);
00566         }
00567         if (csv->cols) {
00568             sqlite3_free(csv->cols);
00569         }
00570         if (csv->f) {
00571             fclose(csv->f);
00572         }
00573         sqlite3_free(csv);
00574     }
00575 }
00576 
00583 static int
00584 csv_eof(csv_file *csv)
00585 {
00586     if (csv && csv->f) {
00587         return feof(csv->f);
00588     }
00589     return 1;
00590 }
00591 
00599 static long
00600 csv_seek(csv_file *csv, long pos)
00601 {
00602     if (csv && csv->f) {
00603         return fseek(csv->f, pos, SEEK_SET);
00604     }
00605     return EOF;
00606 }
00607 
00613 static void
00614 csv_rewind(csv_file *csv)
00615 {
00616     if (csv && csv->f) {
00617         csv_seek(csv, csv->pos0);
00618     }
00619 }
00620 
00627 static long
00628 csv_tell(csv_file *csv)
00629 {
00630     if (csv && csv->f) {
00631         return ftell(csv->f);
00632     }
00633     return EOF;
00634 }
00635 
00643 static int
00644 csv_getline(csv_file *csv, csv_guess_fmt *guess)
00645 {
00646     int i, index = 0, inq = 0, c, col;
00647     char *p, *sep;
00648 
00649     if (!csv || !csv->f) {
00650         return EOF;
00651     }
00652     while (1) {
00653         c = fgetc(csv->f);
00654         if (c == EOF) {
00655             if (index > 0) {
00656                 break;
00657             }
00658             return EOF;
00659         }
00660         if (c == '\0') {
00661             continue;
00662         }
00663         if (c == '\r') {
00664             int c2 = fgetc(csv->f);
00665             c = '\n';
00666 
00667             if (c2 == '\n') {
00668                 csv->isdos = 1;
00669             } else if (c2 != EOF) {
00670                 ungetc(c2, csv->f);
00671             }
00672         }
00673         /* check for DOS EOF (Ctrl-Z) */
00674         if (csv->isdos && (c == '\032')) {
00675             int c2 = fgetc(csv->f);
00676 
00677             if (c2 == EOF) {
00678                 if (index > 0) {
00679                     break;
00680                 }
00681                 return EOF;
00682             }
00683             ungetc(c2, csv->f);
00684         }
00685         if (index >= csv->maxl - 1) {
00686             int n = csv->maxl * 2;
00687             char *line;
00688 
00689             if (n <= 0) {
00690                 n = 4096;
00691             }
00692             line = sqlite3_malloc(n);
00693             if (!line) {
00694                 return EOF;
00695             }
00696             if (csv->line) {
00697                 memcpy(line, csv->line, index);
00698                 sqlite3_free(csv->line);
00699             }
00700             csv->maxl = n;
00701             csv->line = line;
00702         }
00703         csv->line[index++] = c;
00704         if (csv->quot && (p = strchr(csv->quot, c))) {
00705             if (inq) {
00706                 if (*p == inq) {
00707                     inq = 0;
00708                 }
00709             } else {
00710                 inq = *p;
00711             }
00712         }
00713         if (!inq && (c == '\n')) {
00714             break;
00715         }
00716     }
00717     if (guess) {
00718         for (i = 0; i < index; i++) {
00719             guess->hist[csv->line[i] & 0xFF] += 1;
00720         }
00721         guess->nlines += 1;
00722         csv->ncols = 0;
00723         return 0;
00724     }
00725 
00726     for (i = index - 1; i >= 0; i--) {
00727         if (csv->line[i] != '\n') {
00728             break;
00729         }
00730     }
00731     index = i + 1;
00732     csv->line[index] = '\0';
00733     i = inq = col = 0;
00734     sep = csv->sep ? csv->sep : ";";
00735     if (!csv->cols) {
00736         int n = 128;
00737 
00738         csv->cols = sqlite3_malloc(sizeof (char *) * n);
00739         if (!csv->cols) {
00740             return EOF;
00741         }
00742         csv->maxc = n;
00743     }
00744     csv->cols[col++] = csv->line;
00745     while (i < index) {
00746         if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
00747             if (inq) {
00748                 if (*p == inq) {
00749                     inq = 0;
00750                 }
00751             } else {
00752                 inq = *p;
00753             }
00754         }
00755         if (!inq && (p = strchr(sep, csv->line[i]))) {
00756             p = csv->line + i;
00757             *p = '\0';
00758             if (col >= csv->maxc) {
00759                 int n = csv->maxc * 2;
00760                 char **cols;
00761 
00762                 cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
00763                 if (!cols) {
00764                     return EOF;
00765                 }
00766                 csv->cols = cols;
00767                 csv->maxc = n;
00768             }
00769             csv->cols[col++] = p + 1;
00770         }
00771         ++i;
00772     }
00773     csv->ncols = col;
00774 
00775     /* strip off quotes */
00776     if (csv->quot) {
00777         for (i = 0; i < col; i++) {
00778             if (*csv->cols[i]) {
00779                 p = strchr(csv->quot, *csv->cols[i]);
00780                 if (p) {
00781                     char *src, *dst;
00782 
00783                     c = *p;
00784                     csv->cols[i] += 1;
00785                     sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
00786                     if ((sep >= csv->cols[i]) && (*sep == c)) {
00787                         *sep = '\0';
00788                     }
00789                     /* collapse quote escape sequences */
00790                     src = csv->cols[i];
00791                     dst = 0;
00792                     while (*src) {
00793                         if ((*src == c) && (src[1] == c)) {
00794                             if (!dst) {
00795                                 dst = src;
00796                             }
00797                             src++;
00798                             while (*src) {
00799                                 *dst++ = *src++;
00800                                 if (*src == c) {
00801                                     --src;
00802                                     break;
00803                                 }
00804                             }
00805                         }
00806                         ++src;
00807                     }
00808                     if (dst) {
00809                         *dst++ = '\0';
00810                     }
00811                 }
00812             }
00813         }
00814     }
00815     return col;
00816 }
00817 
00824 static int
00825 csv_ncols(csv_file *csv)
00826 {
00827     if (csv && csv->cols) {
00828         return csv->ncols;
00829     }
00830     return 0;
00831 }
00832 
00840 static char *
00841 csv_coldata(csv_file *csv, int n)
00842 {
00843     if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
00844         return csv->cols[n];
00845     }
00846     return 0;
00847 }
00848 
00855 static int
00856 csv_guess(csv_file *csv)
00857 {
00858     csv_guess_fmt guess;
00859     int i, n;
00860     char *p, sep[32], quot[4];
00861     const struct {
00862         int c;
00863         int min;
00864     } sep_test[] = {
00865         { ',', 2 },
00866         { ';', 2 },
00867         { '\t', 2 },
00868         { ' ', 4 },
00869         { '|', 2 }
00870     };
00871 
00872     if (!csv) {
00873         return EOF;
00874     }
00875     memset(&guess, 0, sizeof (guess));
00876     csv->pos0 = 0;
00877     csv_rewind(csv);
00878     for (i = n = 0; i < 10; i++) {
00879         n = csv_getline(csv, &guess);
00880         if (n == EOF) {
00881             break;
00882         }
00883     }
00884     csv_rewind(csv);
00885     if (n && !i) {
00886         return EOF;
00887     }
00888     p = quot;
00889     n = '"';
00890     if (guess.hist[n] > 1) {
00891         *p++ = n;
00892     }
00893     n = '\'';
00894     if (guess.hist[n] > 1) {
00895         *p++ = n;
00896     }
00897     *p = '\0';
00898     p = sep;
00899     for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
00900         if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
00901             *p++ = sep_test[i].c;
00902         }
00903     }
00904     *p = '\0';
00905     if (quot[0]) {
00906         p = sqlite3_malloc(strlen(quot) + 1);
00907         if (p) {
00908             strcpy(p, quot);
00909             if (csv->quot) {
00910                 sqlite3_free(csv->quot);
00911             }
00912             csv->quot = p;
00913         } else {
00914             return EOF;
00915         }
00916     }
00917     if (sep[0]) {
00918         p = sqlite3_malloc(strlen(sep) + 1);
00919         if (p) {
00920             strcpy(p, sep);
00921             if (csv->sep) {
00922                 sqlite3_free(csv->sep);
00923             }
00924             csv->sep = p;
00925         } else {
00926             return EOF;
00927         }
00928     }
00929     return 0;
00930 }
00931 
00965 static int
00966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
00967                  sqlite3_vtab **vtabp, char **errp)
00968 {
00969     csv_file *csv;
00970     int rc = SQLITE_ERROR, i, k, ncnames, row1, *colmap = 0;
00971     char **cnames, *schema = 0, **nargv;
00972     csv_vtab *vtab = 0;
00973 
00974     if (argc < 4) {
00975         *errp = sqlite3_mprintf("input file name missing");
00976         return SQLITE_ERROR;
00977     }
00978     nargv = sqlite3_malloc(sizeof (char *) * argc);
00979     memset(nargv, 0, sizeof (char *) * argc);
00980     for (i = 3; i < argc; i++) {
00981         nargv[i] = unquote(argv[i]);
00982     }
00983     csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
00984                    (argc > 7) ? nargv[7] : 0);
00985     if (!csv) {
00986         *errp = sqlite3_mprintf("unable to open input file");
00987 cleanup:
00988         append_free(&schema);
00989         for (i = 3; i < argc; i++) {
00990             if (nargv[i]) {
00991                 sqlite3_free(nargv[i]);
00992             }
00993         }
00994         if (vtab) {
00995             sqlite3_free(vtab);
00996         }
00997         if (colmap) {
00998             sqlite3_free(colmap);
00999         }
01000         return rc;
01001     }
01002     if (!csv->sep && !csv->quot) {
01003         csv_guess(csv);
01004     }
01005     csv->pos0 = 0;
01006     row1 = 0;
01007     if (argc > 4) {
01008         row1 = strtol(nargv[4], 0, 10);
01009     }
01010     if (row1) {
01011         /* use column names from 1st row */
01012         csv_getline(csv, 0);
01013         if (csv->ncols < 1) {
01014             csv_close(csv);
01015             *errp = sqlite3_mprintf("unable to get column names");
01016             goto cleanup;
01017         }
01018         csv->pos0 = csv_tell(csv);
01019         csv_rewind(csv);
01020         ncnames = csv_ncols(csv);
01021         cnames = csv->cols;
01022     } else if (argc > 8) {
01023         ncnames = argc - 8;
01024         cnames = (char **) nargv + 8;
01025     } else {
01026         /* use number of columns from 1st row */
01027         csv_getline(csv, 0);
01028         if (csv->ncols < 1) {
01029             csv_close(csv);
01030             *errp = sqlite3_mprintf("unable to get column names");
01031             goto cleanup;
01032         }
01033         csv_rewind(csv);
01034         ncnames = csv_ncols(csv);
01035         cnames = 0;
01036     }
01037     colmap = sqlite3_malloc(sizeof (int) * ncnames);
01038     if (!colmap) {
01039         csv_close(csv);
01040         *errp = sqlite3_mprintf("out of memory");
01041         goto cleanup;
01042     }
01043     memset(colmap, 0, sizeof (int) * ncnames);
01044     vtab = sqlite3_malloc(sizeof (csv_vtab) + ncnames);
01045     if (!vtab) {
01046         csv_close(csv);
01047         *errp = sqlite3_mprintf("out of memory");
01048         goto cleanup;
01049     }
01050     memset(vtab, 0, sizeof (*vtab));
01051     vtab->convert = 0;
01052     if (argc > 5) {
01053         vtab->convert = strtol(nargv[5], 0, 10);
01054         if (row1 && (vtab->convert & 4)) {
01055             conv_names(cnames, ncnames);
01056         }
01057     }
01058     vtab->csv = csv;
01059     append(&schema, "CREATE TABLE x(", 0);
01060     for (i = 0; cnames && (i < ncnames); i++) {
01061         if (!cnames[i] || (cnames[i][0] == '\0')) {
01062             continue;
01063         }
01064         k = strlen(cnames[i]);
01065         if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
01066             char c;
01067 
01068             if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
01069                 colmap[i] = k;
01070             }
01071         }
01072     }
01073     for (i = 0; i < ncnames; i++) {
01074         vtab->coltypes[i] = SQLITE_TEXT;
01075         if (!cnames || !cnames[i] || (cnames[i][0] == '\0')) {
01076             int want = i + 1;
01077             char colname[64];
01078 
01079             while (1) {
01080                 for (k = 0; k < ncnames; k++) {
01081                     if ((k != i) && (colmap[k] == want)) {
01082                         want++;
01083                         break;
01084                     }
01085                 }
01086                 if (k >= ncnames) {
01087                     colmap[i] = want;
01088                     break;
01089                 }
01090             }
01091             sprintf(colname, "column_%d", colmap[i]);
01092             append(&schema, colname, '"');
01093         } else if (row1 > 0) {
01094             append(&schema, cnames[i], '"');
01095         } else if (row1 < 0) {
01096             append(&schema, cnames[i], '"');
01097             if (i + 8 < argc) {
01098                 char *type = nargv[i + 8];
01099 
01100                 append(&schema, " ", 0);
01101                 append(&schema, type, 0);
01102                 vtab->coltypes[i] = maptype(type);
01103             }
01104         } else {
01105             char *type = cnames[i];
01106 
01107             append(&schema, cnames[i], 0);
01108             while (*type && !strchr(" \t", *type)) {
01109                 type++;
01110             }
01111             while (*type && strchr(" \t", *type)) {
01112                 type++;
01113             }
01114             vtab->coltypes[i] = maptype(type);
01115         }
01116         if (i < ncnames - 1) {
01117             append(&schema, ",", 0);
01118         }
01119     }
01120     append(&schema, ")", 0);
01121     rc = sqlite3_declare_vtab(db, schema);
01122     if (rc != SQLITE_OK) {
01123         csv_close(csv);
01124         *errp = sqlite3_mprintf("table definition failed, error %d, "
01125                                 "schema '%s'", rc, schema);
01126         goto cleanup;
01127     }
01128     *vtabp = &vtab->vtab;
01129     *errp = 0;
01130     vtab = 0;
01131     goto cleanup;
01132 }
01133 
01145 static int
01146 csv_vtab_create(sqlite3* db, void *aux, int argc,
01147            const char *const *argv,
01148            sqlite3_vtab **vtabp, char **errp)
01149 {
01150     return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
01151 }
01152 
01159 static int
01160 csv_vtab_disconnect(sqlite3_vtab *vtab)
01161 {
01162     csv_vtab *tab = (csv_vtab *) vtab;
01163 
01164     csv_close(tab->csv);
01165     sqlite3_free(tab);
01166     return SQLITE_OK;
01167 }
01168 
01175 static int
01176 csv_vtab_destroy(sqlite3_vtab *vtab)
01177 {
01178     return csv_vtab_disconnect(vtab);
01179 }
01180 
01188 static int
01189 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
01190 {
01191     return SQLITE_OK;
01192 }
01193 
01201 static int
01202 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
01203 {
01204     csv_cursor *cur = sqlite3_malloc(sizeof (*cur));
01205     csv_vtab *tab = (csv_vtab *) vtab;
01206 
01207     if (!cur) {
01208         return SQLITE_ERROR;
01209     }
01210     cur->cursor.pVtab = vtab;
01211     csv_rewind(tab->csv);
01212     cur->pos = csv_tell(tab->csv);
01213     *cursorp = &cur->cursor;
01214     return SQLITE_OK;
01215 }
01216 
01223 static int
01224 csv_vtab_close(sqlite3_vtab_cursor *cursor)
01225 {
01226     sqlite3_free(cursor);
01227     return SQLITE_OK;
01228 }
01229 
01236 static int
01237 csv_vtab_next(sqlite3_vtab_cursor *cursor)
01238 {
01239     csv_cursor *cur = (csv_cursor *) cursor;
01240     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01241 
01242     cur->pos = csv_tell(tab->csv);
01243     csv_getline(tab->csv, 0);
01244     return SQLITE_OK;
01245 }
01246 
01257 static int
01258 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
01259                 const char *idxStr, int argc, sqlite3_value **argv)
01260 {
01261     csv_cursor *cur = (csv_cursor *) cursor;
01262     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01263 
01264     csv_rewind(tab->csv);
01265     return csv_vtab_next(cursor);
01266 }
01267 
01274 static int
01275 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
01276 {
01277     csv_cursor *cur = (csv_cursor *) cursor;
01278     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01279 
01280     return csv_eof(tab->csv);
01281 }
01282 
01291 static int
01292 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
01293 {
01294     csv_cursor *cur = (csv_cursor *) cursor;
01295     csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01296     char *data = csv_coldata(tab->csv, n);
01297 
01298     return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
01299 }
01300 
01308 static int
01309 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
01310 {
01311     csv_cursor *cur = (csv_cursor *) cursor;
01312 
01313     *rowidp = cur->pos;
01314     return SQLITE_OK;
01315 }
01316 
01317 #if (SQLITE_VERSION_NUMBER > 3004000)
01318 
01325 static int
01326 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
01327 {
01328     return SQLITE_OK;
01329 }
01330 
01331 #endif
01332 
01337 static const sqlite3_module csv_vtab_mod = {
01338     1,                   /* iVersion */
01339     csv_vtab_create,     /* xCreate */
01340     csv_vtab_connect,    /* xConnect */
01341     csv_vtab_bestindex,  /* xBestIndex */
01342     csv_vtab_disconnect, /* xDisconnect */
01343     csv_vtab_destroy,    /* xDestroy */
01344     csv_vtab_open,       /* xOpen */
01345     csv_vtab_close,      /* xClose */
01346     csv_vtab_filter,     /* xFilter */
01347     csv_vtab_next,       /* xNext */
01348     csv_vtab_eof,        /* xEof */
01349     csv_vtab_column,     /* xColumn */
01350     csv_vtab_rowid,      /* xRowid */
01351     0,                   /* xUpdate */
01352     0,                   /* xBegin */
01353     0,                   /* xSync */
01354     0,                   /* xCommit */
01355     0,                   /* xRollback */
01356     0,                   /* xFindFunction */
01357 #if (SQLITE_VERSION_NUMBER > 3004000)
01358     csv_vtab_rename,     /* xRename */
01359 #endif
01360 };
01361 
01389 static void
01390 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
01391 {
01392     csv_file *csv;
01393     int rc, i, k, ncnames, row1, convert = 0, useargs = 0, *colmap = 0;
01394     char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
01395     sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
01396     sqlite3_stmt *stmt = 0;
01397 
01398     if (argc < 2) {
01399         sqlite3_result_error(ctx, "need at least 2 arguments", -1);
01400         return;
01401     }
01402     tname = (char *) sqlite3_value_text(argv[0]);
01403     if (!tname) {
01404         sqlite3_result_error(ctx, "table name is NULL", -1);
01405         return;
01406     }
01407     fname = (char *) sqlite3_value_text(argv[1]);
01408     if (!fname) {
01409         sqlite3_result_error(ctx, "file name is NULL", -1);
01410         return;
01411     }
01412     csv = csv_open(fname,
01413                    (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
01414                    (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
01415     if (!csv) {
01416         sqlite3_result_error(ctx, "unable to open input file", -1);
01417 cleanup:
01418         if (stmt) {
01419             sqlite3_finalize(stmt);
01420         }
01421         append_free(&sql);
01422         if (colmap) {
01423             sqlite3_free(colmap);
01424         }
01425         if (csv) {
01426             csv_close(csv);
01427         }
01428         return;
01429     }
01430     if (!csv->sep && !csv->quot) {
01431         csv_guess(csv);
01432     }
01433     csv->pos0 = 0;
01434     row1 = 0;
01435     if (argc > 2) {
01436         row1 = sqlite3_value_int(argv[2]);
01437     }
01438     if (row1) {
01439         /* use column names from 1st row */
01440         csv_getline(csv, 0);
01441         if (csv->ncols < 1) {
01442             sqlite3_result_error(ctx, "unable to get column names", -1);
01443             goto cleanup;
01444         }
01445         csv->pos0 = csv_tell(csv);
01446         csv_rewind(csv);
01447         ncnames = csv_ncols(csv);
01448         cnames = csv->cols;
01449     } else if (argc > 6) {
01450         ncnames = argc - 6;
01451         cnames = 0;
01452         useargs = 1;
01453     } else {
01454         /* use number of columns from 1st row */
01455         csv_getline(csv, 0);
01456         if (csv->ncols < 1) {
01457             sqlite3_result_error(ctx, "unable to get column names", -1);
01458             goto cleanup;
01459         }
01460         csv_rewind(csv);
01461         ncnames = csv_ncols(csv);
01462         cnames = 0;
01463     }
01464     convert = 0;
01465     if (argc > 3) {
01466         convert = sqlite3_value_int(argv[3]);
01467         if (row1 && (convert & 4)) {
01468             conv_names(cnames, ncnames);
01469         }
01470     }
01471     /* test if table exists */
01472     append(&sql, "PRAGMA table_info(", 0);
01473     append(&sql, tname, '"');
01474     append(&sql, ")", 0);
01475     if (!sql) {
01476 oom:
01477         sqlite3_result_error(ctx, "out of memory", -1);
01478         goto cleanup;
01479     }
01480     rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01481     append_free(&sql);
01482     if (rc != SQLITE_OK) {
01483 prepfail:
01484         sqlite3_result_error(ctx, "prepare failed", -1);
01485         goto cleanup;
01486     }
01487     /* find number of colums */
01488     i = 0;
01489     rc = sqlite3_step(stmt);
01490     while (rc == SQLITE_ROW) {
01491         i++;
01492         rc = sqlite3_step(stmt);
01493     }
01494     if (rc != SQLITE_DONE) {
01495 selfail:
01496         sqlite3_result_error(ctx, "select failed", -1);
01497         goto cleanup;
01498     }
01499     if (i > 0) {
01500         /* get column types */
01501         sqlite3_reset(stmt);
01502         ncnames = i;
01503         coltypes = sqlite3_malloc(ncnames);
01504         if (!coltypes) {
01505             goto oom;
01506         }
01507         rc = sqlite3_step(stmt);
01508         i = 0;
01509         while (rc == SQLITE_ROW) {
01510             coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
01511             rc = sqlite3_step(stmt);
01512         }
01513         if (rc != SQLITE_DONE) {
01514             goto selfail;
01515         }
01516     } else {
01517         /* create new table */
01518         sqlite3_finalize(stmt);
01519         stmt = 0;
01520         colmap = (int *) sqlite3_malloc(ncnames + sizeof (int) * ncnames);
01521         if (!colmap) {
01522             goto oom;
01523         }
01524         memset(colmap, 0, sizeof (int) * ncnames);
01525         coltypes = (char *) (colmap + ncnames);
01526         append(&sql, "CREATE TABLE ", 0);
01527         append(&sql, tname, '"');
01528         append(&sql, "(", 0);
01529         for (i = 0; cnames && (i < ncnames); i++) {
01530             if (!cnames[i] || (cnames[i][0] == '\0')) {
01531                 continue;
01532             }
01533             k = strlen(cnames[i]);
01534             if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
01535                 char c;
01536 
01537                 if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
01538                     colmap[i] = k;
01539                 }
01540             }
01541         }
01542         for (i = 0; i < ncnames; i++) {
01543             int want = i + 1;
01544             char colname[64];
01545 
01546             coltypes[i] = SQLITE_TEXT;
01547             if (useargs) {
01548                 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01549 
01550                 if (!type) {
01551                     goto defcol;
01552                 }
01553                 append(&sql, type, 0);
01554                 while (*type && !strchr(" \t", *type)) {
01555                     type++;
01556                 }
01557                 while (*type && strchr(" \t", *type)) {
01558                     type++;
01559                 }
01560                 coltypes[i] = maptype(type);
01561             } else if (!cnames || !cnames[i]) {
01562 defcol:
01563                 while (1) {
01564                     for (k = 0; k < ncnames; k++) {
01565                         if ((k != i) && (colmap[k] == want)) {
01566                             want++;
01567                             break;
01568                         }
01569                     }
01570                     if (k >= ncnames) {
01571                         colmap[i] = want;
01572                         break;
01573                     }
01574                 }
01575                 sprintf(colname, "column_%d", colmap[i]);
01576                 append(&sql, colname, '"');
01577             } else if (row1 > 0) {
01578                 append(&sql, cnames[i], '"');
01579             } else if (row1 < 0) {
01580                 append(&sql, cnames[i], '"');
01581                 if (i + 6 < argc) {
01582                     char *type = (char *) sqlite3_value_text(argv[i + 6]);
01583 
01584                     if (type) {
01585                         append(&sql, " ", 0);
01586                         append(&sql, type, 0);
01587                         coltypes[i] = maptype(type);
01588                     }
01589                 }
01590             }
01591             if (i < ncnames - 1) {
01592                 append(&sql, ",", 0);
01593             }
01594         }
01595         append(&sql, ")", 0);
01596         rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01597         if (rc != SQLITE_OK) {
01598             goto prepfail;
01599         }
01600         rc = sqlite3_step(stmt);
01601         if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01602             sqlite3_result_error(ctx, "create table failed", -1);
01603             goto cleanup;
01604         }
01605         append_free(&sql);
01606     }
01607     sqlite3_finalize(stmt);
01608     stmt = 0;
01609     /* make INSERT statement */
01610     append(&sql, "INSERT INTO ", 0);
01611     append(&sql, tname, '"');
01612     append(&sql, " VALUES(", 0);
01613     for (i = 0; i < ncnames; i++) {
01614         append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
01615     }
01616     rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01617     if (rc != SQLITE_OK) {
01618         goto prepfail;
01619     }
01620     append_free(&sql);
01621     /* import the CSV file */
01622     row1 = 0;
01623     while (csv_getline(csv, 0) != EOF) {
01624         for (i = 0; i < ncnames; i++) {
01625             char *data = csv_coldata(csv, i);
01626 
01627             rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
01628             if (rc != SQLITE_OK) {
01629                 goto inserr;
01630             }
01631         }
01632         rc = sqlite3_step(stmt);
01633         if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01634             if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
01635 inserr:
01636                 sqlite3_result_error(ctx, "insert failed", -1);
01637                 goto cleanup;
01638             }
01639         } else {
01640             row1++;
01641         }
01642         sqlite3_reset(stmt);
01643     }
01644     sqlite3_result_int(ctx, row1);
01645     goto cleanup;
01646 }
01647 
01654 #ifndef STANDALONE
01655 static
01656 #endif
01657 int
01658 csv_vtab_init(sqlite3 *db)
01659 {
01660     sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
01661                             (void *) db, csv_import_func, 0, 0);
01662     return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
01663 }
01664 
01665 #ifndef STANDALONE
01666 
01675 int
01676 sqlite3_extension_init(sqlite3 *db, char **errmsg,
01677                        const sqlite3_api_routines *api)
01678 {
01679     SQLITE_EXTENSION_INIT2(api);
01680     return csv_vtab_init(db);
01681 }
01682 
01683 #endif

Generated on 23 Oct 2023 by doxygen.
Contact: chw@ch-werner.de