NCBI C++ ToolKit
mdb_load.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* mdb_load.c - memory-mapped database load tool */
2 /*
3  * Copyright 2011-2018 Howard Chu, Symas Corp.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted only as authorized by the OpenLDAP
8  * Public License.
9  *
10  * A copy of this license is available in the file LICENSE in the
11  * top-level directory of the distribution or, alternatively, at
12  * <http://www.OpenLDAP.org/license.html>.
13  */
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include <unistd.h>
20 #include "lmdb.h"
21 
22 #define PRINT 1
23 #define NOHDR 2
24 static int mode;
25 
26 static char *subname = NULL;
27 
28 static size_t lineno;
29 static int version;
30 
31 static int flags;
32 
33 static char *prog;
34 
35 static int Eof;
36 
38 
39 static MDB_val kbuf, dbuf;
40 
41 #ifdef _WIN32
42 #define Z "I"
43 #else
44 #define Z "z"
45 #endif
46 
47 #define STRLENOF(s) (sizeof(s)-1)
48 
49 typedef struct flagbit {
50  int bit;
51  char *name;
52  int len;
54 
55 #define S(s) s, STRLENOF(s)
56 
58  { MDB_REVERSEKEY, S("reversekey") },
59  { MDB_DUPSORT, S("dupsort") },
60  { MDB_INTEGERKEY, S("integerkey") },
61  { MDB_DUPFIXED, S("dupfixed") },
62  { MDB_INTEGERDUP, S("integerdup") },
63  { MDB_REVERSEDUP, S("reversedup") },
64  { 0, NULL, 0 }
65 };
66 
67 static void readhdr(void)
68 {
69  char *ptr;
70 
71  flags = 0;
72  while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
73  lineno++;
74  if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
75  version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
76  if (version > 3) {
77  fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n",
78  prog, lineno, version);
80  }
81  } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
82  break;
83  } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
84  if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
85  mode |= PRINT;
86  else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
87  fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n",
88  prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
90  }
91  } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
92  ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
93  if (ptr) *ptr = '\0';
94  if (subname) free(subname);
95  subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
96  } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
97  if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
98  fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n",
99  prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
101  }
102  } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
103  int i;
104  ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
105  if (ptr) *ptr = '\0';
106  i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
107  if (i != 1) {
108  fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n",
109  prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
111  }
112  } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
113  int i;
114  ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
115  if (ptr) *ptr = '\0';
116  i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize);
117  if (i != 1) {
118  fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n",
119  prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
121  }
122  } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
123  int i;
124  ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
125  if (ptr) *ptr = '\0';
126  i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
127  if (i != 1) {
128  fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n",
129  prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
131  }
132  } else {
133  int i;
134  for (i=0; dbflags[i].bit; i++) {
135  if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
136  ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
137  flags |= dbflags[i].bit;
138  break;
139  }
140  }
141  if (!dbflags[i].bit) {
142  ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
143  if (!ptr) {
144  fprintf(stderr, "%s: line %" Z "d: unexpected format\n",
145  prog, lineno);
147  } else {
148  *ptr = '\0';
149  fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n",
150  prog, lineno, (char *)dbuf.mv_data);
151  }
152  }
153  }
154  }
155 }
156 
157 static void badend(void)
158 {
159  fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n",
160  prog, lineno);
161 }
162 
163 static int unhex(unsigned char *c2)
164 {
165  int x, c;
166  x = *c2++ & 0x4f;
167  if (x & 0x40)
168  x -= 55;
169  c = x << 4;
170  x = *c2 & 0x4f;
171  if (x & 0x40)
172  x -= 55;
173  c |= x;
174  return c;
175 }
176 
178 {
179  unsigned char *c1, *c2, *end;
180  size_t len, l2;
181  int c;
182 
183  if (!(mode & NOHDR)) {
184  c = fgetc(stdin);
185  if (c == EOF) {
186  Eof = 1;
187  return EOF;
188  }
189  if (c != ' ') {
190  lineno++;
191  if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
192 badend:
193  Eof = 1;
194  badend();
195  return EOF;
196  }
197  if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
198  return EOF;
199  goto badend;
200  }
201  }
202  if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
203  Eof = 1;
204  return EOF;
205  }
206  lineno++;
207 
208  c1 = buf->mv_data;
209  len = strlen((char *)c1);
210  l2 = len;
211 
212  /* Is buffer too short? */
213  while (c1[len-1] != '\n') {
214  buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
215  if (!buf->mv_data) {
216  Eof = 1;
217  fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n",
218  prog, lineno);
219  return EOF;
220  }
221  c1 = buf->mv_data;
222  c1 += l2;
223  if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
224  Eof = 1;
225  badend();
226  return EOF;
227  }
228  buf->mv_size *= 2;
229  len = strlen((char *)c1);
230  l2 += len;
231  }
232  c1 = c2 = buf->mv_data;
233  len = l2;
234  c1[--len] = '\0';
235  end = c1 + len;
236 
237  if (mode & PRINT) {
238  while (c2 < end) {
239  if (*c2 == '\\') {
240  if (c2[1] == '\\') {
241  c1++; c2 += 2;
242  } else {
243  if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
244  Eof = 1;
245  badend();
246  return EOF;
247  }
248  *c1++ = unhex(++c2);
249  c2 += 2;
250  }
251  } else {
252  /* copies are redundant when no escapes were used */
253  *c1++ = *c2++;
254  }
255  }
256  } else {
257  /* odd length not allowed */
258  if (len & 1) {
259  Eof = 1;
260  badend();
261  return EOF;
262  }
263  while (c2 < end) {
264  if (!isxdigit(*c2) || !isxdigit(c2[1])) {
265  Eof = 1;
266  badend();
267  return EOF;
268  }
269  *c1++ = unhex(c2);
270  c2 += 2;
271  }
272  }
273  c2 = out->mv_data = buf->mv_data;
274  out->mv_size = c1 - c2;
275 
276  return 0;
277 }
278 
279 static void usage(void)
280 {
281  fprintf(stderr, "usage: %s [-V] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
283 }
284 
285 int main(int argc, char *argv[])
286 {
287  int i, rc;
288  MDB_env *env;
289  MDB_txn *txn;
290  MDB_cursor *mc;
291  MDB_dbi dbi;
292  char *envname;
293  int envflags = 0, putflags = 0;
294  int dohdr = 0;
295 
296  prog = argv[0];
297 
298  if (argc < 2) {
299  usage();
300  }
301 
302  /* -f: load file instead of stdin
303  * -n: use NOSUBDIR flag on env_open
304  * -s: load into named subDB
305  * -N: use NOOVERWRITE on puts
306  * -T: read plaintext
307  * -V: print version and exit
308  */
309  while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) {
310  switch(i) {
311  case 'V':
312  printf("%s\n", MDB_VERSION_STRING);
313  exit(0);
314  break;
315  case 'f':
316  if (freopen(optarg, "r", stdin) == NULL) {
317  fprintf(stderr, "%s: %s: reopen: %s\n",
318  prog, optarg, strerror(errno));
320  }
321  break;
322  case 'n':
323  envflags |= MDB_NOSUBDIR;
324  break;
325  case 's':
326  subname = strdup(optarg);
327  break;
328  case 'N':
329  putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
330  break;
331  case 'T':
332  mode |= NOHDR | PRINT;
333  break;
334  default:
335  usage();
336  }
337  }
338 
339  if (optind != argc - 1)
340  usage();
341 
342  dbuf.mv_size = 4096;
344 
345  if (!(mode & NOHDR))
346  readhdr();
347 
348  envname = argv[optind];
349  rc = mdb_env_create(&env);
350  if (rc) {
351  fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
352  return EXIT_FAILURE;
353  }
354 
356 
357  if (info.me_maxreaders)
359 
360  if (info.me_mapsize)
362 
363  if (info.me_mapaddr)
364  envflags |= MDB_FIXEDMAP;
365 
366  rc = mdb_env_open(env, envname, envflags, 0664);
367  if (rc) {
368  fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
369  goto env_close;
370  }
371 
374 
375  while(!Eof) {
376  MDB_val key, data;
377  int batch = 0;
378 
379  if (!dohdr) {
380  dohdr = 1;
381  } else if (!(mode & NOHDR))
382  readhdr();
383 
384  rc = mdb_txn_begin(env, NULL, 0, &txn);
385  if (rc) {
386  fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
387  goto env_close;
388  }
389 
390  rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
391  if (rc) {
392  fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
393  goto txn_abort;
394  }
395 
396  rc = mdb_cursor_open(txn, dbi, &mc);
397  if (rc) {
398  fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
399  goto txn_abort;
400  }
401 
402  while(1) {
403  rc = readline(&key, &kbuf);
404  if (rc) /* rc == EOF */
405  break;
406 
407  rc = readline(&data, &dbuf);
408  if (rc) {
409  fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
410  goto txn_abort;
411  }
412 
413  rc = mdb_cursor_put(mc, &key, &data, putflags);
414  if (rc == MDB_KEYEXIST && putflags)
415  continue;
416  if (rc) {
417  fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
418  goto txn_abort;
419  }
420  batch++;
421  if (batch == 100) {
422  rc = mdb_txn_commit(txn);
423  if (rc) {
424  fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
425  prog, lineno, mdb_strerror(rc));
426  goto env_close;
427  }
428  rc = mdb_txn_begin(env, NULL, 0, &txn);
429  if (rc) {
430  fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
431  goto env_close;
432  }
433  rc = mdb_cursor_open(txn, dbi, &mc);
434  if (rc) {
435  fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
436  goto txn_abort;
437  }
438  batch = 0;
439  }
440  }
441  rc = mdb_txn_commit(txn);
442  txn = NULL;
443  if (rc) {
444  fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
445  prog, lineno, mdb_strerror(rc));
446  goto env_close;
447  }
448  mdb_dbi_close(env, dbi);
449  }
450 
451 txn_abort:
452  mdb_txn_abort(txn);
453 env_close:
455 
456  return rc ? EXIT_FAILURE : EXIT_SUCCESS;
457 }
#define EXIT_SUCCESS
Definition: common.h:39
std::ofstream out("events_result.xml")
main entry point for tests
#define EXIT_FAILURE
Definition: fastme.h:73
static HENV env
Definition: transaction2.c:38
#define getopt
Definition: replacements.h:157
#define optarg
#define optind
char data[12]
Definition: iconv.c:80
#define NULL
Definition: ncbistd.hpp:225
#define MDB_VERSION_STRING
The full library version as a C string.
Definition: lmdb.h:222
#define MDB_KEYEXIST
key/data pair already exists
Definition: lmdb.h:405
int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, unsigned int flags)
Store by cursor.
Definition: mdb.c:6540
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
Open an environment handle.
Definition: mdb.c:4959
void mdb_env_close(MDB_env *env)
Close the environment and release the memory map.
Definition: mdb.c:5156
void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
Close a database handle.
Definition: mdb.c:9867
void mdb_txn_abort(MDB_txn *txn)
Abandon all the operations of the transaction instead of saving them.
Definition: mdb.c:3061
int mdb_txn_commit(MDB_txn *txn)
Commit all the operations of a transaction into the database.
Definition: mdb.c:3448
char * mdb_strerror(int err)
Return a string describing a given error code.
Definition: mdb.c:1479
int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor)
Create a cursor handle.
Definition: mdb.c:7634
int mdb_env_set_mapsize(MDB_env *env, size_t size)
Set the size of the memory map to use for this environment.
Definition: mdb.c:4060
int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
Set the maximum number of named databases for the environment.
Definition: mdb.c:4094
int mdb_env_create(MDB_env **env)
Create an LMDB environment handle.
Definition: mdb.c:3951
int mdb_env_get_maxkeysize(MDB_env *env)
Get the maximum size of keys and MDB_DUPSORT data we can write.
Definition: mdb.c:10079
#define mdb_open(txn, name, flags, dbi)
Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project.
Definition: lmdb.h:1056
int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn)
Create a transaction for use with the environment.
Definition: mdb.c:2829
int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
Set the maximum number of threads/reader slots for the environment.
Definition: mdb.c:4103
#define MDB_INTEGERKEY
numeric keys in native byte order: either unsigned int or size_t.
Definition: lmdb.h:317
#define MDB_DUPFIXED
with MDB_DUPSORT, sorted dup items have fixed size
Definition: lmdb.h:319
#define MDB_INTEGERDUP
with MDB_DUPSORT, dups are MDB_INTEGERKEY-style integers
Definition: lmdb.h:321
#define MDB_DUPSORT
use sorted duplicates
Definition: lmdb.h:314
#define MDB_REVERSEKEY
use reverse string keys
Definition: lmdb.h:312
#define MDB_REVERSEDUP
with MDB_DUPSORT, use reverse string dups
Definition: lmdb.h:323
#define MDB_CREATE
create DB if not already existing
Definition: lmdb.h:325
#define MDB_FIXEDMAP
mmap at a fixed address (experimental)
Definition: lmdb.h:285
#define MDB_NOSUBDIR
no environment directory
Definition: lmdb.h:287
unsigned int me_maxreaders
max reader slots in the environment
Definition: lmdb.h:472
#define MDB_NOOVERWRITE
For put: Don't write if the key already exists.
Definition: lmdb.h:332
size_t me_mapsize
Size of the data memory map.
Definition: lmdb.h:469
#define MDB_NODUPDATA
Only for MDB_DUPSORT For put: don't write if the key and data pair already exist.
Definition: lmdb.h:337
void * me_mapaddr
Address of map, if fixed.
Definition: lmdb.h:468
size_t mv_size
size of the data item
Definition: lmdb.h:258
void * mv_data
address of the data item
Definition: lmdb.h:259
unsigned int MDB_dbi
A handle for an individual database in the DB environment.
Definition: lmdb.h:241
exit(2)
char * buf
int i
int len
int main(int argc, char *argv[])
Definition: mdb_load.c:285
static int unhex(unsigned char *c2)
Definition: mdb_load.c:163
static size_t lineno
Definition: mdb_load.c:28
static int mode
Definition: mdb_load.c:24
static int Eof
Definition: mdb_load.c:35
static MDB_val kbuf
Definition: mdb_load.c:39
#define STRLENOF(s)
Definition: mdb_load.c:47
flagbit dbflags[]
Definition: mdb_load.c:57
#define Z
Definition: mdb_load.c:44
static void readhdr(void)
Definition: mdb_load.c:67
#define NOHDR
Definition: mdb_load.c:23
static MDB_envinfo info
Definition: mdb_load.c:37
#define PRINT
Definition: mdb_load.c:22
#define S(s)
Definition: mdb_load.c:55
static int readline(MDB_val *out, MDB_val *buf)
Definition: mdb_load.c:177
static void badend(void)
Definition: mdb_load.c:157
static int version
Definition: mdb_load.c:29
static MDB_val dbuf
Definition: mdb_load.c:39
struct flagbit flagbit
static int flags
Definition: mdb_load.c:31
static char * prog
Definition: mdb_load.c:33
static void usage(void)
Definition: mdb_load.c:279
static char * subname
Definition: mdb_load.c:26
static void env_close(MDB_env *env) noexcept
Definition: lmdb++.h:381
static void txn_abort(MDB_txn *txn) noexcept
Definition: lmdb++.h:594
const struct ncbi::grid::netcache::search::fields::KEY key
int strncmp(const char *str1, const char *str2, size_t count)
Definition: odbc_utils.hpp:133
#define strdup
Definition: ncbi_ansi_ext.h:70
int isxdigit(Uchar c)
Definition: ncbictype.hpp:71
char * strerror(int n)
Definition: pcregrep.c:835
Cursors are used for all DB operations.
Definition: mdb.c:1184
The database environment.
Definition: mdb.c:1259
Information about the environment.
Definition: lmdb.h:467
A database transaction.
Definition: mdb.c:1084
Generic structure used for passing keys and data in and out of the database.
Definition: lmdb.h:257
int bit
Definition: mdb_dump.c:33
char * name
Definition: mdb_dump.c:34
int len
Definition: mdb_load.c:52
void free(voidpf ptr)
voidp malloc(uInt size)
Modified on Tue Apr 23 07:39:15 2024 by modify_doxy.py rev. 669887