Back to home page

Enduro/X

 
 

    


0001 /* edb_load.c - memory-mapped database load tool */
0002 /*
0003  * Copyright 2011-2020 Howard Chu, Symas Corp.
0004  * All rights reserved.
0005  *
0006  * Redistribution and use in source and binary forms, with or without
0007  * modification, are permitted only as authorized by the OpenLDAP
0008  * Public License.
0009  *
0010  * A copy of this license is available in the file LICENSE in the
0011  * top-level directory of the distribution or, alternatively, at
0012  * <http://www.OpenLDAP.org/license.html>.
0013  */
0014 #include <stdio.h>
0015 #include <stdlib.h>
0016 #include <errno.h>
0017 #include <string.h>
0018 #include <ctype.h>
0019 #include <unistd.h>
0020 #include "exdb.h"
0021 
0022 #define PRINT   1
0023 #define NOHDR   2
0024 static int mode;
0025 
0026 static char *subname = NULL;
0027 
0028 static edb_size_t lineno;
0029 static int version;
0030 
0031 static int flags;
0032 
0033 static char *prog;
0034 
0035 static int Eof;
0036 
0037 static EDB_envinfo info;
0038 
0039 static EDB_val kbuf, dbuf;
0040 static EDB_val k0buf;
0041 
0042 #define Yu  EDB_PRIy(u)
0043 
0044 #define STRLENOF(s) (sizeof(s)-1)
0045 
0046 typedef struct flagbit {
0047     int bit;
0048     char *name;
0049     int len;
0050 } flagbit;
0051 
0052 #define S(s)    s, STRLENOF(s)
0053 
0054 flagbit dbflags[] = {
0055     { EDB_REVERSEKEY, S("reversekey") },
0056     { EDB_DUPSORT, S("dupsort") },
0057     { EDB_INTEGERKEY, S("integerkey") },
0058     { EDB_DUPFIXED, S("dupfixed") },
0059     { EDB_INTEGERDUP, S("integerdup") },
0060     { EDB_REVERSEDUP, S("reversedup") },
0061     { 0, NULL, 0 }
0062 };
0063 
0064 static void readhdr(void)
0065 {
0066     char *ptr;
0067 
0068     flags = 0;
0069     while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
0070         lineno++;
0071         if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
0072             version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
0073             if (version > 3) {
0074                 fprintf(stderr, "%s: line %"Yu": unsupported VERSION %d\n",
0075                     prog, lineno, version);
0076                 exit(EXIT_FAILURE);
0077             }
0078         } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
0079             break;
0080         } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
0081             if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
0082                 mode |= PRINT;
0083             else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
0084                 fprintf(stderr, "%s: line %"Yu": unsupported FORMAT %s\n",
0085                     prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
0086                 exit(EXIT_FAILURE);
0087             }
0088         } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
0089             ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
0090             if (ptr) *ptr = '\0';
0091             if (subname) free(subname);
0092             subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
0093         } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
0094             if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree")))  {
0095                 fprintf(stderr, "%s: line %"Yu": unsupported type %s\n",
0096                     prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
0097                 exit(EXIT_FAILURE);
0098             }
0099         } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
0100             int i;
0101             ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
0102             if (ptr) *ptr = '\0';
0103             i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
0104             if (i != 1) {
0105                 fprintf(stderr, "%s: line %"Yu": invalid mapaddr %s\n",
0106                     prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
0107                 exit(EXIT_FAILURE);
0108             }
0109         } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
0110             int i;
0111             ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
0112             if (ptr) *ptr = '\0';
0113             i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="),
0114                 "%" EDB_SCNy(u), &info.me_mapsize);
0115             if (i != 1) {
0116                 fprintf(stderr, "%s: line %"Yu": invalid mapsize %s\n",
0117                     prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
0118                 exit(EXIT_FAILURE);
0119             }
0120         } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
0121             int i;
0122             ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
0123             if (ptr) *ptr = '\0';
0124             i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
0125             if (i != 1) {
0126                 fprintf(stderr, "%s: line %"Yu": invalid maxreaders %s\n",
0127                     prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
0128                 exit(EXIT_FAILURE);
0129             }
0130         } else {
0131             int i;
0132             for (i=0; dbflags[i].bit; i++) {
0133                 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
0134                     ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
0135                     flags |= dbflags[i].bit;
0136                     break;
0137                 }
0138             }
0139             if (!dbflags[i].bit) {
0140                 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
0141                 if (!ptr) {
0142                     fprintf(stderr, "%s: line %"Yu": unexpected format\n",
0143                         prog, lineno);
0144                     exit(EXIT_FAILURE);
0145                 } else {
0146                     *ptr = '\0';
0147                     fprintf(stderr, "%s: line %"Yu": unrecognized keyword ignored: %s\n",
0148                         prog, lineno, (char *)dbuf.mv_data);
0149                 }
0150             }
0151         }
0152     }
0153 }
0154 
0155 static void badend(void)
0156 {
0157     fprintf(stderr, "%s: line %"Yu": unexpected end of input\n",
0158         prog, lineno);
0159 }
0160 
0161 static int unhex(unsigned char *c2)
0162 {
0163     int x, c;
0164     x = *c2++ & 0x4f;
0165     if (x & 0x40)
0166         x -= 55;
0167     c = x << 4;
0168     x = *c2 & 0x4f;
0169     if (x & 0x40)
0170         x -= 55;
0171     c |= x;
0172     return c;
0173 }
0174 
0175 static int readline(EDB_val *out, EDB_val *buf)
0176 {
0177     unsigned char *c1, *c2, *end;
0178     size_t len, l2;
0179     int c;
0180 
0181     if (!(mode & NOHDR)) {
0182         c = fgetc(stdin);
0183         if (c == EOF) {
0184             Eof = 1;
0185             return EOF;
0186         }
0187         if (c != ' ') {
0188             lineno++;
0189             if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
0190 badend:
0191                 Eof = 1;
0192                 badend();
0193                 return EOF;
0194             }
0195             if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
0196                 return EOF;
0197             goto badend;
0198         }
0199     }
0200     if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
0201         Eof = 1;
0202         return EOF;
0203     }
0204     lineno++;
0205 
0206     c1 = buf->mv_data;
0207     len = strlen((char *)c1);
0208     l2 = len;
0209 
0210     /* Is buffer too short? */
0211     while (c1[len-1] != '\n') {
0212         buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
0213         if (!buf->mv_data) {
0214             Eof = 1;
0215             fprintf(stderr, "%s: line %"Yu": out of memory, line too long\n",
0216                 prog, lineno);
0217             return EOF;
0218         }
0219         c1 = buf->mv_data;
0220         c1 += l2;
0221         if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
0222             Eof = 1;
0223             badend();
0224             return EOF;
0225         }
0226         buf->mv_size *= 2;
0227         len = strlen((char *)c1);
0228         l2 += len;
0229     }
0230     c1 = c2 = buf->mv_data;
0231     len = l2;
0232     c1[--len] = '\0';
0233     end = c1 + len;
0234 
0235     if (mode & PRINT) {
0236         while (c2 < end) {
0237             if (*c2 == '\\') {
0238                 if (c2[1] == '\\') {
0239                     *c1++ = *c2;
0240                 } else {
0241                     if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
0242                         Eof = 1;
0243                         badend();
0244                         return EOF;
0245                     }
0246                     *c1++ = unhex(++c2);
0247                 }
0248                 c2 += 2;
0249             } else {
0250                 /* copies are redundant when no escapes were used */
0251                 *c1++ = *c2++;
0252             }
0253         }
0254     } else {
0255         /* odd length not allowed */
0256         if (len & 1) {
0257             Eof = 1;
0258             badend();
0259             return EOF;
0260         }
0261         while (c2 < end) {
0262             if (!isxdigit(*c2) || !isxdigit(c2[1])) {
0263                 Eof = 1;
0264                 badend();
0265                 return EOF;
0266             }
0267             *c1++ = unhex(c2);
0268             c2 += 2;
0269         }
0270     }
0271     c2 = out->mv_data = buf->mv_data;
0272     out->mv_size = c1 - c2;
0273 
0274     return 0;
0275 }
0276 
0277 static void usage(void)
0278 {
0279     fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
0280     exit(EXIT_FAILURE);
0281 }
0282 
0283 static int greater(const EDB_val *a, const EDB_val *b)
0284 {
0285     return 1;
0286 }
0287 
0288 int main(int argc, char *argv[])
0289 {
0290     int i, rc;
0291     EDB_env *env;
0292     EDB_txn *txn;
0293     EDB_cursor *mc;
0294     EDB_dbi dbi;
0295     char *envname;
0296     int envflags = EDB_NOSYNC, putflags = 0;
0297     int dohdr = 0, append = 0;
0298     EDB_val prevk;
0299 
0300     prog = argv[0];
0301 
0302     if (argc < 2) {
0303         usage();
0304     }
0305 
0306     /* -a: append records in input order
0307      * -f: load file instead of stdin
0308      * -n: use NOSUBDIR flag on env_open
0309      * -s: load into named subDB
0310      * -N: use NOOVERWRITE on puts
0311      * -T: read plaintext
0312      * -V: print version and exit
0313      */
0314     while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
0315         switch(i) {
0316         case 'V':
0317             printf("%s\n", EDB_VERSION_STRING);
0318             exit(0);
0319             break;
0320         case 'a':
0321             append = 1;
0322             break;
0323         case 'f':
0324             if (freopen(optarg, "r", stdin) == NULL) {
0325                 fprintf(stderr, "%s: %s: reopen: %s\n",
0326                     prog, optarg, strerror(errno));
0327                 exit(EXIT_FAILURE);
0328             }
0329             break;
0330         case 'n':
0331             envflags |= EDB_NOSUBDIR;
0332             break;
0333         case 's':
0334             subname = strdup(optarg);
0335             break;
0336         case 'N':
0337             putflags = EDB_NOOVERWRITE|EDB_NODUPDATA;
0338             break;
0339         case 'T':
0340             mode |= NOHDR | PRINT;
0341             break;
0342         default:
0343             usage();
0344         }
0345     }
0346 
0347     if (optind != argc - 1)
0348         usage();
0349 
0350     dbuf.mv_size = 4096;
0351     dbuf.mv_data = malloc(dbuf.mv_size);
0352 
0353     if (!(mode & NOHDR))
0354         readhdr();
0355 
0356     envname = argv[optind];
0357     rc = edb_env_create(&env);
0358     if (rc) {
0359         fprintf(stderr, "edb_env_create failed, error %d %s\n", rc, edb_strerror(rc));
0360         return EXIT_FAILURE;
0361     }
0362 
0363     edb_env_set_maxdbs(env, 2);
0364 
0365     if (info.me_maxreaders)
0366         edb_env_set_maxreaders(env, info.me_maxreaders);
0367 
0368     if (info.me_mapsize)
0369         edb_env_set_mapsize(env, info.me_mapsize);
0370 
0371     if (info.me_mapaddr)
0372         envflags |= EDB_FIXEDMAP;
0373 
0374     rc = edb_env_open(env, envname, envflags, 0664);
0375     if (rc) {
0376         fprintf(stderr, "edb_env_open failed, error %d %s\n", rc, edb_strerror(rc));
0377         goto env_close;
0378     }
0379 
0380     kbuf.mv_size = edb_env_get_maxkeysize(env) * 2 + 2;
0381     kbuf.mv_data = malloc(kbuf.mv_size * 2);
0382     k0buf.mv_size = kbuf.mv_size;
0383     k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size;
0384     prevk.mv_data = k0buf.mv_data;
0385 
0386     while(!Eof) {
0387         EDB_val key, data;
0388         int batch = 0;
0389         int appflag;
0390 
0391         if (!dohdr) {
0392             dohdr = 1;
0393         } else if (!(mode & NOHDR))
0394             readhdr();
0395         
0396         rc = edb_txn_begin(env, NULL, 0, &txn);
0397         if (rc) {
0398             fprintf(stderr, "edb_txn_begin failed, error %d %s\n", rc, edb_strerror(rc));
0399             goto env_close;
0400         }
0401 
0402         rc = edb_open(txn, subname, flags|EDB_CREATE, &dbi);
0403         if (rc) {
0404             fprintf(stderr, "edb_open failed, error %d %s\n", rc, edb_strerror(rc));
0405             goto txn_abort;
0406         }
0407         prevk.mv_size = 0;
0408         if (append) {
0409             edb_set_compare(txn, dbi, greater);
0410             if (flags & EDB_DUPSORT)
0411                 edb_set_dupsort(txn, dbi, greater);
0412         }
0413 
0414         rc = edb_cursor_open(txn, dbi, &mc);
0415         if (rc) {
0416             fprintf(stderr, "edb_cursor_open failed, error %d %s\n", rc, edb_strerror(rc));
0417             goto txn_abort;
0418         }
0419 
0420         while(1) {
0421             rc = readline(&key, &kbuf);
0422             if (rc)  /* rc == EOF */
0423                 break;
0424 
0425             rc = readline(&data, &dbuf);
0426             if (rc) {
0427                 fprintf(stderr, "%s: line %"Yu": failed to read key value\n", prog, lineno);
0428                 goto txn_abort;
0429             }
0430 
0431             if (append) {
0432                 appflag = EDB_APPEND;
0433                 if (flags & EDB_DUPSORT) {
0434                     if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size))
0435                         appflag = EDB_CURRENT|EDB_APPENDDUP;
0436                     else {
0437                         memcpy(prevk.mv_data, key.mv_data, key.mv_size);
0438                         prevk.mv_size = key.mv_size;
0439                     }
0440                 }
0441             } else {
0442                 appflag = 0;
0443             }
0444             rc = edb_cursor_put(mc, &key, &data, putflags|appflag);
0445             if (rc == EDB_KEYEXIST && putflags)
0446                 continue;
0447             if (rc) {
0448                 fprintf(stderr, "edb_cursor_put failed, error %d %s\n", rc, edb_strerror(rc));
0449                 goto txn_abort;
0450             }
0451             batch++;
0452             if (batch == 100) {
0453                 rc = edb_txn_commit(txn);
0454                 if (rc) {
0455                     fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n",
0456                         prog, lineno, edb_strerror(rc));
0457                     goto env_close;
0458                 }
0459                 rc = edb_txn_begin(env, NULL, 0, &txn);
0460                 if (rc) {
0461                     fprintf(stderr, "edb_txn_begin failed, error %d %s\n", rc, edb_strerror(rc));
0462                     goto env_close;
0463                 }
0464                 rc = edb_cursor_open(txn, dbi, &mc);
0465                 if (rc) {
0466                     fprintf(stderr, "edb_cursor_open failed, error %d %s\n", rc, edb_strerror(rc));
0467                     goto txn_abort;
0468                 }
0469                 if (appflag & EDB_APPENDDUP) {
0470                     EDB_val k, d;
0471                     edb_cursor_get(mc, &k, &d, EDB_LAST);
0472                 }
0473                 batch = 0;
0474             }
0475         }
0476         rc = edb_txn_commit(txn);
0477         txn = NULL;
0478         if (rc) {
0479             fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n",
0480                 prog, lineno, edb_strerror(rc));
0481             goto env_close;
0482         }
0483         edb_dbi_close(env, dbi);
0484     }
0485 
0486 txn_abort:
0487     edb_txn_abort(txn);
0488 env_close:
0489     edb_env_close(env);
0490 
0491     return rc ? EXIT_FAILURE : EXIT_SUCCESS;
0492 }