NCBI C++ ToolKit
lds2_indexer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: lds2_indexer.cpp 92174 2020-12-22 17:47:25Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Maxim Didenko
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 
38 #include <objtools/lds2/lds2.hpp>
39 
40 using namespace ncbi;
41 using namespace objects;
42 
43 
44 /////////////////////////////////////////////////////////////////////////////
45 //
46 // Demo application
47 //
48 
49 
51 {
52 public:
53  virtual void Init(void);
54  virtual int Run (void);
55 
56 private:
57 private:
58 };
59 
60 
61 #define GB_RELEASE_MODE_NONE "none"
62 #define GB_RELEASE_MODE_GUESS "guess"
63 #define GB_RELEASE_MODE_FORCE "force"
64 
65 
67 {
68  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
69 
70  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
71  "LDS2 Indexer", false);
72 
73  arg_desc->AddKey("source", "path_name",
74  "Paht to the directory with source data files",
76 
77  arg_desc->AddOptionalKey("db", "db_name",
78  "Path to the LDS2 database file",
80 
81  arg_desc->AddFlag("norecursive",
82  "Do not search for source files recursively");
83 
84  arg_desc->AddOptionalKey("gb_release", "gb_release_mode",
85  "Mode of GB release file detection",
87  arg_desc->SetConstraint("gb_release",
88  &(*new CArgAllow_Strings,
92 
93 /*
94  arg_desc->AddFlag("abs_path",
95  "Use absolute path to data files (default)");
96  arg_desc->AddFlag("keep_path",
97  "Keep original path to data files");
98  arg_desc->AddFlag("keep_other",
99  "Keep files outside source dir indexed.");
100 
101 */
102 
103  arg_desc->AddOptionalKey("group_aligns", "group_size",
104  "Group standalone seq-aligns into blobs",
106 
107  arg_desc->AddOptionalKey("dump_table", "table_name",
108  "Dump LDS2 table content",
110  arg_desc->AddDefaultKey("dump_file", "file_name",
111  "Dump destination", CArgDescriptions::eOutputFile, "-");
112  SetupArgDescriptions(arg_desc.release());
113 }
114 
115 
117 {
118  string lds2_section_name = "lds2";
119  const CNcbiRegistry& reg = GetConfig();
120  string db_path = reg.Get(lds2_section_name, "Path",
122  string source_path = reg.Get(lds2_section_name, "Source",
124 
125  const CArgs& args = GetArgs();
126  if (args["source"]) {
127  source_path = args["source"].AsString();
128  }
129 
130  if (args["db"]) {
131  db_path = args["db"].AsString();
132  }
133  else {
134  db_path = CDirEntry::ConcatPath(source_path, "lds2.db");
135  }
136 
137  CLDS2_Manager mgr(db_path);
138 
139  if ( args["gb_release"] ) {
140  string mode = args["gb_release"].AsString();
141  if ( mode == GB_RELEASE_MODE_NONE ) {
143  }
144  if ( mode == GB_RELEASE_MODE_GUESS ) {
146  }
147  if ( mode == GB_RELEASE_MODE_FORCE ) {
149  }
150  }
151 
152  if ( args["group_aligns"] ) {
153  mgr.SetSeqAlignGroupSize(args["group_aligns"].AsInteger());
154  }
155 
156  if ( args["dump_table"] ) {
157  mgr.GetDatabase()->Dump(args["dump_table"].AsString(), args["dump_file"].AsOutputFile());
158  }
159  else {
160  mgr.AddDataDir(source_path, args["norecursive"] ?
162  mgr.UpdateData();
163  }
164 
165 
166 
167 /*
168  if ( args["keep_path"] ) {
169  if ( args["abs_path"] ) {
170  ERR_FATAL("Conflicting options: -abs_path and -keep_path");
171  }
172  flags = (flags & ~CLDS_Manager::fPathMask) |
173  CLDS_Manager::fOriginalPath;
174  }
175  if ( args["abs_path"] ) {
176  flags = (flags & ~CLDS_Manager::fPathMask) |
177  CLDS_Manager::fAbsolutePath;
178  }
179 
180  if ( args["keep_other"] ) {
181  flags = (flags & ~CLDS_Manager::fOtherFilesMask) |
182  CLDS_Manager::fKeepOtherFiles;
183  }
184 
185 */
186 
187  return 0;
188 }
189 
190 
191 
192 /////////////////////////////////////////////////////////////////////////////
193 // MAIN
194 
195 
196 int main(int argc, const char* argv[])
197 {
198  return CLDS2IndexerApplication().AppMain(argc, argv);
199 }
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
virtual void Init(void)
virtual int Run(void)
void Dump(const string &table, CNcbiOstream &out)
Dump the selected table (use empty string to dump table names or * to dump all tables.
Definition: lds2_db.cpp:1060
Class for managing LDS2 database and related data files.
Definition: lds2.hpp:46
@ eGB_Force
Split all top-level bioseq-sets into seq-entries.
Definition: lds2.hpp:102
@ eGB_Guess
Try to autodetect and split GB release bioseq-sets.
Definition: lds2.hpp:101
@ eGB_Ignore
Do not split bioseq-sets (default)
Definition: lds2.hpp:100
void AddDataDir(const string &data_dir, EDirMode mode=eDir_Recurse)
Add data directory.
Definition: lds2.cpp:930
void SetSeqAlignGroupSize(int sz)
Definition: lds2.hpp:126
CLDS2_Database * GetDatabase(void)
Get the current database object.
Definition: lds2.hpp:59
void UpdateData(void)
Rescan all indexed files, check for modifications, update the database.
Definition: lds2.cpp:1016
@ eDir_Recurse
Automatically scan sub-directories (default).
Definition: lds2.hpp:73
@ eDir_NoRecurse
Do not parse sub-dirs automatically.
Definition: lds2.hpp:72
void SetGBReleaseMode(EGBReleaseMode mode)
Definition: lds2.hpp:106
CNcbiRegistry –.
Definition: ncbireg.hpp:913
Include a standard set of the NCBI C++ Toolkit most basic headers.
static void Init(void)
Definition: cursor6.c:76
#define CNcbiApplication
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
@ fTruncate
Leading, trailing blanks can be truncated.
Definition: ncbireg.hpp:87
void Run(void)
Enter the main loop.
#define GB_RELEASE_MODE_GUESS
#define GB_RELEASE_MODE_NONE
#define GB_RELEASE_MODE_FORCE
int main(int argc, const char *argv[])
mdb_mode_t mode
Definition: lmdb++.h:38
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Definition: ncbiapp_api.hpp:54
Defines command line argument related classes.
Defines unified interface to application:
Modified on Sat May 11 13:53:07 2024 by modify_doxy.py rev. 669887