00001
00019 #ifdef STANDALONE
00020 #include <sqlite3.h>
00021 #else
00022 #include <sqlite3ext.h>
00023 static SQLITE_EXTENSION_INIT1
00024 #endif
00025
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <limits.h>
00030
00031 #ifdef _WIN32
00032 #include <windows.h>
00033 #define strcasecmp _stricmp
00034 #define strncasecmp _strnicmp
00035 #endif
00036
00043 typedef struct csv_file {
00044 FILE *f;
00045 char *sep;
00046 char *quot;
00047 int isdos;
00048 int maxl;
00049 char *line;
00050 long pos0;
00051 int maxc;
00052 int ncols;
00053 char **cols;
00054 } csv_file;
00055
00062 typedef struct csv_guess_fmt {
00063 int nlines;
00064 int hist[256];
00065 } csv_guess_fmt;
00066
00073 typedef struct csv_vtab {
00074 sqlite3_vtab vtab;
00075 csv_file *csv;
00076 int convert;
00077 char coltypes[1];
00078 } csv_vtab;
00079
00086 typedef struct {
00087 sqlite3_vtab_cursor cursor;
00088 long pos;
00089 } csv_cursor;
00090
00096 static void
00097 append_free(char **in)
00098 {
00099 long *p = (long *) *in;
00100
00101 if (p) {
00102 p -= 2;
00103 sqlite3_free(p);
00104 *in = 0;
00105 }
00106 }
00107
00117 static char *
00118 append(char **in, char const *append, char quote)
00119 {
00120 long *p = (long *) *in;
00121 long len, maxlen, actlen;
00122 int i;
00123 char *pp;
00124 int nappend = append ? strlen(append) : 0;
00125
00126 if (p) {
00127 p -= 2;
00128 maxlen = p[0];
00129 actlen = p[1];
00130 } else {
00131 maxlen = actlen = 0;
00132 }
00133 len = nappend + actlen;
00134 if (quote) {
00135 len += 2;
00136 for (i = 0; i < nappend; i++) {
00137 if (append[i] == quote) {
00138 len++;
00139 }
00140 }
00141 } else if (!nappend) {
00142 return *in;
00143 }
00144 if (len >= maxlen - 1) {
00145 long *q;
00146
00147 maxlen = (len + 0x03ff) & (~0x3ff);
00148 q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
00149 if (!q) {
00150 return 0;
00151 }
00152 if (!p) {
00153 q[1] = 0;
00154 }
00155 p = q;
00156 p[0] = maxlen;
00157 *in = (char *) (p + 2);
00158 }
00159 pp = *in + actlen;
00160 if (quote) {
00161 *pp++ = quote;
00162 for (i = 0; i < nappend; i++) {
00163 *pp++ = append[i];
00164 if (append[i] == quote) {
00165 *pp++ = quote;
00166 }
00167 }
00168 *pp++ = quote;
00169 *pp = '\0';
00170 } else {
00171 if (nappend) {
00172 memcpy(pp, append, nappend);
00173 pp += nappend;
00174 *pp = '\0';
00175 }
00176 }
00177 p[1] = pp - *in;
00178 return *in;
00179 }
00180
00187 static char *
00188 unquote(char const *in)
00189 {
00190 char c, *ret;
00191 int i;
00192
00193 ret = sqlite3_malloc(strlen(in) + 1);
00194 if (ret) {
00195 c = in[0];
00196 if ((c == '"') || (c == '\'')) {
00197 i = strlen(in + 1);
00198 if ((i > 0) && (in[i] == c)) {
00199 strcpy(ret, in + 1);
00200 ret[i - 1] = '\0';
00201 return ret;
00202 }
00203 }
00204 strcpy(ret, in);
00205 }
00206 return ret;
00207 }
00208
00215 static int
00216 maptype(char const *type)
00217 {
00218 int typelen = type ? strlen(type) : 0;
00219
00220 if ((typelen >= 3) &&
00221 (strncasecmp(type, "integer", 7) == 0)) {
00222 return SQLITE_INTEGER;
00223 }
00224 if ((typelen >= 6) &&
00225 (strncasecmp(type, "double", 6) == 0)) {
00226 return SQLITE_FLOAT;
00227 }
00228 if ((typelen >= 5) &&
00229 (strncasecmp(type, "float", 5) == 0)) {
00230 return SQLITE_FLOAT;
00231 }
00232 if ((typelen >= 4) &&
00233 (strncasecmp(type, "real", 4) == 0)) {
00234 return SQLITE_FLOAT;
00235 }
00236 return SQLITE_TEXT;
00237 }
00238
00245 static void
00246 conv_names(char **names, int ncols)
00247 {
00248 int i;
00249 char *p, *q;
00250 static const char ws[] = "\n\t\r\b\v ";
00251
00252 if (!names || ncols <= 0) {
00253 return;
00254 }
00255 for (i = 0; i < ncols; i++) {
00256 p = names[i];
00257
00258 while (*p) {
00259 if (strchr(ws, *p)) {
00260 *p++ = '_';
00261 q = p;
00262 while (*q && strchr(ws, *q)) {
00263 ++q;
00264 }
00265 if (*q && q > p) {
00266 strcpy(p, q);
00267 }
00268 continue;
00269 }
00270 ++p;
00271 }
00272 }
00273 }
00274
00285 static void
00286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00287 char *data, int len, int type)
00288 {
00289 char *endp;
00290
00291 if (!data) {
00292 if (ctx) {
00293 sqlite3_result_null(ctx);
00294 } else {
00295 sqlite3_bind_null(stmt, idx);
00296 }
00297 return;
00298 }
00299 if (type == SQLITE_INTEGER) {
00300 sqlite_int64 val;
00301 #if defined(_WIN32) || defined(_WIN64)
00302 char endc;
00303
00304 if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
00305 if (ctx) {
00306 sqlite3_result_int64(ctx, val);
00307 } else {
00308 sqlite3_bind_int64(stmt, idx, val);
00309 }
00310 return;
00311 }
00312 #else
00313 endp = 0;
00314 #ifdef __osf__
00315 val = strtol(data, &endp, 0);
00316 #else
00317 val = strtoll(data, &endp, 0);
00318 #endif
00319 if (endp && (endp != data) && !*endp) {
00320 if (ctx) {
00321 sqlite3_result_int64(ctx, val);
00322 } else {
00323 sqlite3_bind_int64(stmt, idx, val);
00324 }
00325 return;
00326 }
00327 #endif
00328 } else if (type == SQLITE_FLOAT) {
00329 double val;
00330
00331 endp = 0;
00332 val = strtod(data, &endp);
00333 if (endp && (endp != data) && !*endp) {
00334 if (ctx) {
00335 sqlite3_result_double(ctx, val);
00336 } else {
00337 sqlite3_bind_double(stmt, idx, val);
00338 }
00339 return;
00340 }
00341 }
00342 if (ctx) {
00343 sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
00344 } else {
00345 sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
00346 }
00347 }
00348
00359 static int
00360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00361 char *data, int type, int conv)
00362 {
00363 char c, *p;
00364 const char flchars[] = "Ee+-.,0123456789";
00365
00366 if (!data) {
00367 goto putdata;
00368 }
00369
00370
00371
00372
00373
00374 c = data[0];
00375 if ((c != '\0') && strchr(flchars + 2, c)) {
00376 p = data + 1;
00377 while (*p && strchr(flchars, *p)) {
00378 ++p;
00379 }
00380 if (*p == '\0') {
00381 char *first = 0;
00382 int n = 0;
00383
00384 p = data;
00385 while (p) {
00386 p = strchr(p, ',');
00387 if (!p) {
00388 break;
00389 }
00390 if (++n == 1) {
00391 first = p;
00392 }
00393 ++p;
00394 }
00395 if (first) {
00396 *first = '.';
00397 goto putdata;
00398 }
00399 }
00400 }
00401 if (conv) {
00402 char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
00403
00404 if (utf) {
00405 p = utf;
00406 while ((c = *data) != '\0') {
00407 if (((conv & 10) == 10) && (c == '\\')) {
00408 if (data[1] == 'q') {
00409 *p++ = '\'';
00410 data += 2;
00411 continue;
00412 }
00413 }
00414 if ((conv & 2) && (c == '\\')) {
00415 char c2 = data[1];
00416
00417 switch (c2) {
00418 case '\0':
00419 goto convdone;
00420 case 'n':
00421 *p = '\n';
00422 break;
00423 case 't':
00424 *p = '\t';
00425 break;
00426 case 'r':
00427 *p = '\r';
00428 break;
00429 case 'f':
00430 *p = '\f';
00431 break;
00432 case 'v':
00433 *p = '\v';
00434 break;
00435 case 'b':
00436 *p = '\b';
00437 break;
00438 case 'a':
00439 *p = '\a';
00440 break;
00441 case '?':
00442 *p = '\?';
00443 break;
00444 case '\'':
00445 *p = '\'';
00446 break;
00447 case '"':
00448 *p = '\"';
00449 break;
00450 case '\\':
00451 *p = '\\';
00452 break;
00453 default:
00454 *p++ = c;
00455 *p = c2;
00456 break;
00457 }
00458 p++;
00459 data += 2;
00460 continue;
00461 }
00462 if ((conv & 1) && (c & 0x80)) {
00463 *p++ = 0xc0 | ((c >> 6) & 0x1f);
00464 *p++ = 0x80 | (c & 0x3f);
00465 } else {
00466 *p++ = c;
00467 }
00468 data++;
00469 }
00470 convdone:
00471 *p = '\0';
00472 result_or_bind(ctx, stmt, idx, utf, p - utf, type);
00473 sqlite3_free(utf);
00474 return SQLITE_OK;
00475 } else {
00476 if (ctx) {
00477 sqlite3_result_error(ctx, "out of memory", -1);
00478 }
00479 return SQLITE_NOMEM;
00480 }
00481 }
00482 putdata:
00483 result_or_bind(ctx, stmt, idx, data, -1, type);
00484 return SQLITE_OK;
00485 }
00486
00495 static csv_file *
00496 csv_open(const char *filename, const char *sep, const char *quot)
00497 {
00498 FILE *f;
00499 csv_file *csv;
00500
00501 #ifdef _WIN32
00502 f = fopen(filename, "rb");
00503 #else
00504 f = fopen(filename, "r");
00505 #endif
00506 if (!f) {
00507 return 0;
00508 }
00509 csv = sqlite3_malloc(sizeof (csv_file));
00510 if (!csv) {
00511 error0:
00512 fclose(f);
00513 return 0;
00514 }
00515 csv->f = f;
00516 if (sep && sep[0]) {
00517 csv->sep = sqlite3_malloc(strlen(sep) + 1);
00518 if (!csv->sep) {
00519 error1:
00520 sqlite3_free(csv);
00521 goto error0;
00522 }
00523 strcpy(csv->sep, sep);
00524 } else {
00525 csv->sep = 0;
00526 }
00527 if (quot && quot[0]) {
00528 csv->quot = sqlite3_malloc(strlen(quot) + 1);
00529 if (!csv->quot) {
00530 if (csv->sep) {
00531 sqlite3_free(csv->sep);
00532 }
00533 goto error1;
00534 }
00535 strcpy(csv->quot, quot);
00536 } else {
00537 csv->quot = 0;
00538 }
00539 csv->isdos = 0;
00540 csv->maxl = 0;
00541 csv->line = 0;
00542 csv->pos0 = 0;
00543 csv->maxc = 0;
00544 csv->ncols = 0;
00545 csv->cols = 0;
00546 return csv;
00547 }
00548
00554 static void
00555 csv_close(csv_file *csv)
00556 {
00557 if (csv) {
00558 if (csv->sep) {
00559 sqlite3_free(csv->sep);
00560 }
00561 if (csv->quot) {
00562 sqlite3_free(csv->quot);
00563 }
00564 if (csv->line) {
00565 sqlite3_free(csv->line);
00566 }
00567 if (csv->cols) {
00568 sqlite3_free(csv->cols);
00569 }
00570 if (csv->f) {
00571 fclose(csv->f);
00572 }
00573 sqlite3_free(csv);
00574 }
00575 }
00576
00583 static int
00584 csv_eof(csv_file *csv)
00585 {
00586 if (csv && csv->f) {
00587 return feof(csv->f);
00588 }
00589 return 1;
00590 }
00591
00599 static long
00600 csv_seek(csv_file *csv, long pos)
00601 {
00602 if (csv && csv->f) {
00603 return fseek(csv->f, pos, SEEK_SET);
00604 }
00605 return EOF;
00606 }
00607
00613 static void
00614 csv_rewind(csv_file *csv)
00615 {
00616 if (csv && csv->f) {
00617 csv_seek(csv, csv->pos0);
00618 }
00619 }
00620
00627 static long
00628 csv_tell(csv_file *csv)
00629 {
00630 if (csv && csv->f) {
00631 return ftell(csv->f);
00632 }
00633 return EOF;
00634 }
00635
00643 static int
00644 csv_getline(csv_file *csv, csv_guess_fmt *guess)
00645 {
00646 int i, index = 0, inq = 0, c, col;
00647 char *p, *sep;
00648
00649 if (!csv || !csv->f) {
00650 return EOF;
00651 }
00652 while (1) {
00653 c = fgetc(csv->f);
00654 if (c == EOF) {
00655 if (index > 0) {
00656 break;
00657 }
00658 return EOF;
00659 }
00660 if (c == '\0') {
00661 continue;
00662 }
00663 if (c == '\r') {
00664 int c2 = fgetc(csv->f);
00665 c = '\n';
00666
00667 if (c2 == '\n') {
00668 csv->isdos = 1;
00669 } else if (c2 != EOF) {
00670 ungetc(c2, csv->f);
00671 }
00672 }
00673
00674 if (csv->isdos && (c == '\032')) {
00675 int c2 = fgetc(csv->f);
00676
00677 if (c2 == EOF) {
00678 if (index > 0) {
00679 break;
00680 }
00681 return EOF;
00682 }
00683 ungetc(c2, csv->f);
00684 }
00685 if (index >= csv->maxl - 1) {
00686 int n = csv->maxl * 2;
00687 char *line;
00688
00689 if (n <= 0) {
00690 n = 4096;
00691 }
00692 line = sqlite3_malloc(n);
00693 if (!line) {
00694 return EOF;
00695 }
00696 if (csv->line) {
00697 memcpy(line, csv->line, index);
00698 sqlite3_free(csv->line);
00699 }
00700 csv->maxl = n;
00701 csv->line = line;
00702 }
00703 csv->line[index++] = c;
00704 if (csv->quot && (p = strchr(csv->quot, c))) {
00705 if (inq) {
00706 if (*p == inq) {
00707 inq = 0;
00708 }
00709 } else {
00710 inq = *p;
00711 }
00712 }
00713 if (!inq && (c == '\n')) {
00714 break;
00715 }
00716 }
00717 if (guess) {
00718 for (i = 0; i < index; i++) {
00719 guess->hist[csv->line[i] & 0xFF] += 1;
00720 }
00721 guess->nlines += 1;
00722 csv->ncols = 0;
00723 return 0;
00724 }
00725
00726 for (i = index - 1; i >= 0; i--) {
00727 if (csv->line[i] != '\n') {
00728 break;
00729 }
00730 }
00731 index = i + 1;
00732 csv->line[index] = '\0';
00733 i = inq = col = 0;
00734 sep = csv->sep ? csv->sep : ";";
00735 if (!csv->cols) {
00736 int n = 128;
00737
00738 csv->cols = sqlite3_malloc(sizeof (char *) * n);
00739 if (!csv->cols) {
00740 return EOF;
00741 }
00742 csv->maxc = n;
00743 }
00744 csv->cols[col++] = csv->line;
00745 while (i < index) {
00746 if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
00747 if (inq) {
00748 if (*p == inq) {
00749 inq = 0;
00750 }
00751 } else {
00752 inq = *p;
00753 }
00754 }
00755 if (!inq && (p = strchr(sep, csv->line[i]))) {
00756 p = csv->line + i;
00757 *p = '\0';
00758 if (col >= csv->maxc) {
00759 int n = csv->maxc * 2;
00760 char **cols;
00761
00762 cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
00763 if (!cols) {
00764 return EOF;
00765 }
00766 csv->cols = cols;
00767 csv->maxc = n;
00768 }
00769 csv->cols[col++] = p + 1;
00770 }
00771 ++i;
00772 }
00773 csv->ncols = col;
00774
00775
00776 if (csv->quot) {
00777 for (i = 0; i < col; i++) {
00778 if (*csv->cols[i]) {
00779 p = strchr(csv->quot, *csv->cols[i]);
00780 if (p) {
00781 char *src, *dst;
00782
00783 c = *p;
00784 csv->cols[i] += 1;
00785 sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
00786 if ((sep >= csv->cols[i]) && (*sep == c)) {
00787 *sep = '\0';
00788 }
00789
00790 src = csv->cols[i];
00791 dst = 0;
00792 while (*src) {
00793 if ((*src == c) && (src[1] == c)) {
00794 if (!dst) {
00795 dst = src;
00796 }
00797 src++;
00798 while (*src) {
00799 *dst++ = *src++;
00800 if (*src == c) {
00801 --src;
00802 break;
00803 }
00804 }
00805 }
00806 ++src;
00807 }
00808 if (dst) {
00809 *dst++ = '\0';
00810 }
00811 }
00812 }
00813 }
00814 }
00815 return col;
00816 }
00817
00824 static int
00825 csv_ncols(csv_file *csv)
00826 {
00827 if (csv && csv->cols) {
00828 return csv->ncols;
00829 }
00830 return 0;
00831 }
00832
00840 static char *
00841 csv_coldata(csv_file *csv, int n)
00842 {
00843 if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
00844 return csv->cols[n];
00845 }
00846 return 0;
00847 }
00848
00855 static int
00856 csv_guess(csv_file *csv)
00857 {
00858 csv_guess_fmt guess;
00859 int i, n;
00860 char *p, sep[32], quot[4];
00861 const struct {
00862 int c;
00863 int min;
00864 } sep_test[] = {
00865 { ',', 2 },
00866 { ';', 2 },
00867 { '\t', 2 },
00868 { ' ', 4 },
00869 { '|', 2 }
00870 };
00871
00872 if (!csv) {
00873 return EOF;
00874 }
00875 memset(&guess, 0, sizeof (guess));
00876 csv->pos0 = 0;
00877 csv_rewind(csv);
00878 for (i = n = 0; i < 10; i++) {
00879 n = csv_getline(csv, &guess);
00880 if (n == EOF) {
00881 break;
00882 }
00883 }
00884 csv_rewind(csv);
00885 if (n && !i) {
00886 return EOF;
00887 }
00888 p = quot;
00889 n = '"';
00890 if (guess.hist[n] > 1) {
00891 *p++ = n;
00892 }
00893 n = '\'';
00894 if (guess.hist[n] > 1) {
00895 *p++ = n;
00896 }
00897 *p = '\0';
00898 p = sep;
00899 for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
00900 if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
00901 *p++ = sep_test[i].c;
00902 }
00903 }
00904 *p = '\0';
00905 if (quot[0]) {
00906 p = sqlite3_malloc(strlen(quot) + 1);
00907 if (p) {
00908 strcpy(p, quot);
00909 if (csv->quot) {
00910 sqlite3_free(csv->quot);
00911 }
00912 csv->quot = p;
00913 } else {
00914 return EOF;
00915 }
00916 }
00917 if (sep[0]) {
00918 p = sqlite3_malloc(strlen(sep) + 1);
00919 if (p) {
00920 strcpy(p, sep);
00921 if (csv->sep) {
00922 sqlite3_free(csv->sep);
00923 }
00924 csv->sep = p;
00925 } else {
00926 return EOF;
00927 }
00928 }
00929 return 0;
00930 }
00931
00965 static int
00966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
00967 sqlite3_vtab **vtabp, char **errp)
00968 {
00969 csv_file *csv;
00970 int rc = SQLITE_ERROR, i, k, ncnames, row1, *colmap = 0;
00971 char **cnames, *schema = 0, **nargv;
00972 csv_vtab *vtab = 0;
00973
00974 if (argc < 4) {
00975 *errp = sqlite3_mprintf("input file name missing");
00976 return SQLITE_ERROR;
00977 }
00978 nargv = sqlite3_malloc(sizeof (char *) * argc);
00979 memset(nargv, 0, sizeof (char *) * argc);
00980 for (i = 3; i < argc; i++) {
00981 nargv[i] = unquote(argv[i]);
00982 }
00983 csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
00984 (argc > 7) ? nargv[7] : 0);
00985 if (!csv) {
00986 *errp = sqlite3_mprintf("unable to open input file");
00987 cleanup:
00988 append_free(&schema);
00989 for (i = 3; i < argc; i++) {
00990 if (nargv[i]) {
00991 sqlite3_free(nargv[i]);
00992 }
00993 }
00994 if (vtab) {
00995 sqlite3_free(vtab);
00996 }
00997 if (colmap) {
00998 sqlite3_free(colmap);
00999 }
01000 return rc;
01001 }
01002 if (!csv->sep && !csv->quot) {
01003 csv_guess(csv);
01004 }
01005 csv->pos0 = 0;
01006 row1 = 0;
01007 if (argc > 4) {
01008 row1 = strtol(nargv[4], 0, 10);
01009 }
01010 if (row1) {
01011
01012 csv_getline(csv, 0);
01013 if (csv->ncols < 1) {
01014 csv_close(csv);
01015 *errp = sqlite3_mprintf("unable to get column names");
01016 goto cleanup;
01017 }
01018 csv->pos0 = csv_tell(csv);
01019 csv_rewind(csv);
01020 ncnames = csv_ncols(csv);
01021 cnames = csv->cols;
01022 } else if (argc > 8) {
01023 ncnames = argc - 8;
01024 cnames = (char **) nargv + 8;
01025 } else {
01026
01027 csv_getline(csv, 0);
01028 if (csv->ncols < 1) {
01029 csv_close(csv);
01030 *errp = sqlite3_mprintf("unable to get column names");
01031 goto cleanup;
01032 }
01033 csv_rewind(csv);
01034 ncnames = csv_ncols(csv);
01035 cnames = 0;
01036 }
01037 colmap = sqlite3_malloc(sizeof (int) * ncnames);
01038 if (!colmap) {
01039 csv_close(csv);
01040 *errp = sqlite3_mprintf("out of memory");
01041 goto cleanup;
01042 }
01043 memset(colmap, 0, sizeof (int) * ncnames);
01044 vtab = sqlite3_malloc(sizeof (csv_vtab) + ncnames);
01045 if (!vtab) {
01046 csv_close(csv);
01047 *errp = sqlite3_mprintf("out of memory");
01048 goto cleanup;
01049 }
01050 memset(vtab, 0, sizeof (*vtab));
01051 vtab->convert = 0;
01052 if (argc > 5) {
01053 vtab->convert = strtol(nargv[5], 0, 10);
01054 if (row1 && (vtab->convert & 4)) {
01055 conv_names(cnames, ncnames);
01056 }
01057 }
01058 vtab->csv = csv;
01059 append(&schema, "CREATE TABLE x(", 0);
01060 for (i = 0; cnames && (i < ncnames); i++) {
01061 if (!cnames[i] || (cnames[i][0] == '\0')) {
01062 continue;
01063 }
01064 k = strlen(cnames[i]);
01065 if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
01066 char c;
01067
01068 if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
01069 colmap[i] = k;
01070 }
01071 }
01072 }
01073 for (i = 0; i < ncnames; i++) {
01074 vtab->coltypes[i] = SQLITE_TEXT;
01075 if (!cnames || !cnames[i] || (cnames[i][0] == '\0')) {
01076 int want = i + 1;
01077 char colname[64];
01078
01079 while (1) {
01080 for (k = 0; k < ncnames; k++) {
01081 if ((k != i) && (colmap[k] == want)) {
01082 want++;
01083 break;
01084 }
01085 }
01086 if (k >= ncnames) {
01087 colmap[i] = want;
01088 break;
01089 }
01090 }
01091 sprintf(colname, "column_%d", colmap[i]);
01092 append(&schema, colname, '"');
01093 } else if (row1 > 0) {
01094 append(&schema, cnames[i], '"');
01095 } else if (row1 < 0) {
01096 append(&schema, cnames[i], '"');
01097 if (i + 8 < argc) {
01098 char *type = nargv[i + 8];
01099
01100 append(&schema, " ", 0);
01101 append(&schema, type, 0);
01102 vtab->coltypes[i] = maptype(type);
01103 }
01104 } else {
01105 char *type = cnames[i];
01106
01107 append(&schema, cnames[i], 0);
01108 while (*type && !strchr(" \t", *type)) {
01109 type++;
01110 }
01111 while (*type && strchr(" \t", *type)) {
01112 type++;
01113 }
01114 vtab->coltypes[i] = maptype(type);
01115 }
01116 if (i < ncnames - 1) {
01117 append(&schema, ",", 0);
01118 }
01119 }
01120 append(&schema, ")", 0);
01121 rc = sqlite3_declare_vtab(db, schema);
01122 if (rc != SQLITE_OK) {
01123 csv_close(csv);
01124 *errp = sqlite3_mprintf("table definition failed, error %d, "
01125 "schema '%s'", rc, schema);
01126 goto cleanup;
01127 }
01128 *vtabp = &vtab->vtab;
01129 *errp = 0;
01130 vtab = 0;
01131 goto cleanup;
01132 }
01133
01145 static int
01146 csv_vtab_create(sqlite3* db, void *aux, int argc,
01147 const char *const *argv,
01148 sqlite3_vtab **vtabp, char **errp)
01149 {
01150 return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
01151 }
01152
01159 static int
01160 csv_vtab_disconnect(sqlite3_vtab *vtab)
01161 {
01162 csv_vtab *tab = (csv_vtab *) vtab;
01163
01164 csv_close(tab->csv);
01165 sqlite3_free(tab);
01166 return SQLITE_OK;
01167 }
01168
01175 static int
01176 csv_vtab_destroy(sqlite3_vtab *vtab)
01177 {
01178 return csv_vtab_disconnect(vtab);
01179 }
01180
01188 static int
01189 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
01190 {
01191 return SQLITE_OK;
01192 }
01193
01201 static int
01202 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
01203 {
01204 csv_cursor *cur = sqlite3_malloc(sizeof (*cur));
01205 csv_vtab *tab = (csv_vtab *) vtab;
01206
01207 if (!cur) {
01208 return SQLITE_ERROR;
01209 }
01210 cur->cursor.pVtab = vtab;
01211 csv_rewind(tab->csv);
01212 cur->pos = csv_tell(tab->csv);
01213 *cursorp = &cur->cursor;
01214 return SQLITE_OK;
01215 }
01216
01223 static int
01224 csv_vtab_close(sqlite3_vtab_cursor *cursor)
01225 {
01226 sqlite3_free(cursor);
01227 return SQLITE_OK;
01228 }
01229
01236 static int
01237 csv_vtab_next(sqlite3_vtab_cursor *cursor)
01238 {
01239 csv_cursor *cur = (csv_cursor *) cursor;
01240 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01241
01242 cur->pos = csv_tell(tab->csv);
01243 csv_getline(tab->csv, 0);
01244 return SQLITE_OK;
01245 }
01246
01257 static int
01258 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
01259 const char *idxStr, int argc, sqlite3_value **argv)
01260 {
01261 csv_cursor *cur = (csv_cursor *) cursor;
01262 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01263
01264 csv_rewind(tab->csv);
01265 return csv_vtab_next(cursor);
01266 }
01267
01274 static int
01275 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
01276 {
01277 csv_cursor *cur = (csv_cursor *) cursor;
01278 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01279
01280 return csv_eof(tab->csv);
01281 }
01282
01291 static int
01292 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
01293 {
01294 csv_cursor *cur = (csv_cursor *) cursor;
01295 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01296 char *data = csv_coldata(tab->csv, n);
01297
01298 return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
01299 }
01300
01308 static int
01309 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
01310 {
01311 csv_cursor *cur = (csv_cursor *) cursor;
01312
01313 *rowidp = cur->pos;
01314 return SQLITE_OK;
01315 }
01316
01317 #if (SQLITE_VERSION_NUMBER > 3004000)
01318
01325 static int
01326 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
01327 {
01328 return SQLITE_OK;
01329 }
01330
01331 #endif
01332
01337 static const sqlite3_module csv_vtab_mod = {
01338 1,
01339 csv_vtab_create,
01340 csv_vtab_connect,
01341 csv_vtab_bestindex,
01342 csv_vtab_disconnect,
01343 csv_vtab_destroy,
01344 csv_vtab_open,
01345 csv_vtab_close,
01346 csv_vtab_filter,
01347 csv_vtab_next,
01348 csv_vtab_eof,
01349 csv_vtab_column,
01350 csv_vtab_rowid,
01351 0,
01352 0,
01353 0,
01354 0,
01355 0,
01356 0,
01357 #if (SQLITE_VERSION_NUMBER > 3004000)
01358 csv_vtab_rename,
01359 #endif
01360 };
01361
01389 static void
01390 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
01391 {
01392 csv_file *csv;
01393 int rc, i, k, ncnames, row1, convert = 0, useargs = 0, *colmap = 0;
01394 char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
01395 sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
01396 sqlite3_stmt *stmt = 0;
01397
01398 if (argc < 2) {
01399 sqlite3_result_error(ctx, "need at least 2 arguments", -1);
01400 return;
01401 }
01402 tname = (char *) sqlite3_value_text(argv[0]);
01403 if (!tname) {
01404 sqlite3_result_error(ctx, "table name is NULL", -1);
01405 return;
01406 }
01407 fname = (char *) sqlite3_value_text(argv[1]);
01408 if (!fname) {
01409 sqlite3_result_error(ctx, "file name is NULL", -1);
01410 return;
01411 }
01412 csv = csv_open(fname,
01413 (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
01414 (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
01415 if (!csv) {
01416 sqlite3_result_error(ctx, "unable to open input file", -1);
01417 cleanup:
01418 if (stmt) {
01419 sqlite3_finalize(stmt);
01420 }
01421 append_free(&sql);
01422 if (colmap) {
01423 sqlite3_free(colmap);
01424 }
01425 if (csv) {
01426 csv_close(csv);
01427 }
01428 return;
01429 }
01430 if (!csv->sep && !csv->quot) {
01431 csv_guess(csv);
01432 }
01433 csv->pos0 = 0;
01434 row1 = 0;
01435 if (argc > 2) {
01436 row1 = sqlite3_value_int(argv[2]);
01437 }
01438 if (row1) {
01439
01440 csv_getline(csv, 0);
01441 if (csv->ncols < 1) {
01442 sqlite3_result_error(ctx, "unable to get column names", -1);
01443 goto cleanup;
01444 }
01445 csv->pos0 = csv_tell(csv);
01446 csv_rewind(csv);
01447 ncnames = csv_ncols(csv);
01448 cnames = csv->cols;
01449 } else if (argc > 6) {
01450 ncnames = argc - 6;
01451 cnames = 0;
01452 useargs = 1;
01453 } else {
01454
01455 csv_getline(csv, 0);
01456 if (csv->ncols < 1) {
01457 sqlite3_result_error(ctx, "unable to get column names", -1);
01458 goto cleanup;
01459 }
01460 csv_rewind(csv);
01461 ncnames = csv_ncols(csv);
01462 cnames = 0;
01463 }
01464 convert = 0;
01465 if (argc > 3) {
01466 convert = sqlite3_value_int(argv[3]);
01467 if (row1 && (convert & 4)) {
01468 conv_names(cnames, ncnames);
01469 }
01470 }
01471
01472 append(&sql, "PRAGMA table_info(", 0);
01473 append(&sql, tname, '"');
01474 append(&sql, ")", 0);
01475 if (!sql) {
01476 oom:
01477 sqlite3_result_error(ctx, "out of memory", -1);
01478 goto cleanup;
01479 }
01480 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01481 append_free(&sql);
01482 if (rc != SQLITE_OK) {
01483 prepfail:
01484 sqlite3_result_error(ctx, "prepare failed", -1);
01485 goto cleanup;
01486 }
01487
01488 i = 0;
01489 rc = sqlite3_step(stmt);
01490 while (rc == SQLITE_ROW) {
01491 i++;
01492 rc = sqlite3_step(stmt);
01493 }
01494 if (rc != SQLITE_DONE) {
01495 selfail:
01496 sqlite3_result_error(ctx, "select failed", -1);
01497 goto cleanup;
01498 }
01499 if (i > 0) {
01500
01501 sqlite3_reset(stmt);
01502 ncnames = i;
01503 coltypes = sqlite3_malloc(ncnames);
01504 if (!coltypes) {
01505 goto oom;
01506 }
01507 rc = sqlite3_step(stmt);
01508 i = 0;
01509 while (rc == SQLITE_ROW) {
01510 coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
01511 rc = sqlite3_step(stmt);
01512 }
01513 if (rc != SQLITE_DONE) {
01514 goto selfail;
01515 }
01516 } else {
01517
01518 sqlite3_finalize(stmt);
01519 stmt = 0;
01520 colmap = (int *) sqlite3_malloc(ncnames + sizeof (int) * ncnames);
01521 if (!colmap) {
01522 goto oom;
01523 }
01524 memset(colmap, 0, sizeof (int) * ncnames);
01525 coltypes = (char *) (colmap + ncnames);
01526 append(&sql, "CREATE TABLE ", 0);
01527 append(&sql, tname, '"');
01528 append(&sql, "(", 0);
01529 for (i = 0; cnames && (i < ncnames); i++) {
01530 if (!cnames[i] || (cnames[i][0] == '\0')) {
01531 continue;
01532 }
01533 k = strlen(cnames[i]);
01534 if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
01535 char c;
01536
01537 if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
01538 colmap[i] = k;
01539 }
01540 }
01541 }
01542 for (i = 0; i < ncnames; i++) {
01543 int want = i + 1;
01544 char colname[64];
01545
01546 coltypes[i] = SQLITE_TEXT;
01547 if (useargs) {
01548 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01549
01550 if (!type) {
01551 goto defcol;
01552 }
01553 append(&sql, type, 0);
01554 while (*type && !strchr(" \t", *type)) {
01555 type++;
01556 }
01557 while (*type && strchr(" \t", *type)) {
01558 type++;
01559 }
01560 coltypes[i] = maptype(type);
01561 } else if (!cnames || !cnames[i]) {
01562 defcol:
01563 while (1) {
01564 for (k = 0; k < ncnames; k++) {
01565 if ((k != i) && (colmap[k] == want)) {
01566 want++;
01567 break;
01568 }
01569 }
01570 if (k >= ncnames) {
01571 colmap[i] = want;
01572 break;
01573 }
01574 }
01575 sprintf(colname, "column_%d", colmap[i]);
01576 append(&sql, colname, '"');
01577 } else if (row1 > 0) {
01578 append(&sql, cnames[i], '"');
01579 } else if (row1 < 0) {
01580 append(&sql, cnames[i], '"');
01581 if (i + 6 < argc) {
01582 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01583
01584 if (type) {
01585 append(&sql, " ", 0);
01586 append(&sql, type, 0);
01587 coltypes[i] = maptype(type);
01588 }
01589 }
01590 }
01591 if (i < ncnames - 1) {
01592 append(&sql, ",", 0);
01593 }
01594 }
01595 append(&sql, ")", 0);
01596 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01597 if (rc != SQLITE_OK) {
01598 goto prepfail;
01599 }
01600 rc = sqlite3_step(stmt);
01601 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01602 sqlite3_result_error(ctx, "create table failed", -1);
01603 goto cleanup;
01604 }
01605 append_free(&sql);
01606 }
01607 sqlite3_finalize(stmt);
01608 stmt = 0;
01609
01610 append(&sql, "INSERT INTO ", 0);
01611 append(&sql, tname, '"');
01612 append(&sql, " VALUES(", 0);
01613 for (i = 0; i < ncnames; i++) {
01614 append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
01615 }
01616 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01617 if (rc != SQLITE_OK) {
01618 goto prepfail;
01619 }
01620 append_free(&sql);
01621
01622 row1 = 0;
01623 while (csv_getline(csv, 0) != EOF) {
01624 for (i = 0; i < ncnames; i++) {
01625 char *data = csv_coldata(csv, i);
01626
01627 rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
01628 if (rc != SQLITE_OK) {
01629 goto inserr;
01630 }
01631 }
01632 rc = sqlite3_step(stmt);
01633 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01634 if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
01635 inserr:
01636 sqlite3_result_error(ctx, "insert failed", -1);
01637 goto cleanup;
01638 }
01639 } else {
01640 row1++;
01641 }
01642 sqlite3_reset(stmt);
01643 }
01644 sqlite3_result_int(ctx, row1);
01645 goto cleanup;
01646 }
01647
01654 #ifndef STANDALONE
01655 static
01656 #endif
01657 int
01658 csv_vtab_init(sqlite3 *db)
01659 {
01660 sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
01661 (void *) db, csv_import_func, 0, 0);
01662 return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
01663 }
01664
01665 #ifndef STANDALONE
01666
01675 int
01676 sqlite3_extension_init(sqlite3 *db, char **errmsg,
01677 const sqlite3_api_routines *api)
01678 {
01679 SQLITE_EXTENSION_INIT2(api);
01680 return csv_vtab_init(db);
01681 }
01682
01683 #endif