NCBI C++ ToolKit
lds2.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef LDS2_HPP__
2 #define LDS2_HPP__
3 /* $Id: lds2.hpp 54723 2012-06-06 16:01:27Z grichenk $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Author: Aleksey Grichenko
29  *
30  * File Description: LDS v.2 data manager.
31  *
32  */
33 
34 #include <corelib/ncbiobj.hpp>
38 
39 
42 
43 
44 /// Class for managing LDS2 database and related data files.
46 {
47 public:
48  /// Create LDS2 manager for the specified db file.
49  /// If the file does not exist, it will be created only after adding
50  /// at least one data file and indexing it.
51  CLDS2_Manager(const string& db_file);
52 
53  virtual ~CLDS2_Manager(void);
54 
55  /// Get currently selected database name.
56  const string& GetDbFile(void) const;
57 
58  /// Get the current database object.
59  CLDS2_Database* GetDatabase(void) { return m_Db.GetPointerOrNull(); }
60 
61  /// Select new database. If the database does not yet exist,
62  /// it is not created immediately. The list of data files is
63  /// cleared.
64  void SetDbFile(const string& db_file);
65 
66  /// Add new data file to the list. This will not parse and index the
67  /// new file - call UpdateData().
68  void AddDataFile(const string& data_file);
69 
70  /// Directory parsing mode while indexing files
71  enum EDirMode {
72  eDir_NoRecurse, ///< Do not parse sub-dirs automatically.
73  eDir_Recurse ///< Automatically scan sub-directories (default).
74  };
75 
76  /// Add data directory. All files in the directory are added to the list.
77  /// If the mode is eDir_Recurse, also adds all subdirectories.
78  /// Call UpdateData to parse and index the files.
79  void AddDataDir(const string& data_dir, EDirMode mode = eDir_Recurse);
80 
81  /// Register a URL handler. Using handlers allows to use special
82  /// storage types like compressed files, ftp or http locations etc.
83  /// The same handler must be registered in the data loader
84  /// when using LDS2 to fetch data. The default handlers "file" and
85  /// "gzipfile" for local files are registered automatically.
86  void RegisterUrlHandler(CLDS2_UrlHandler_Base* handler);
87 
88  /// Add a URL. The handler is used to access the URL and must be
89  /// registered in the manager before adding the URL.
90  void AddDataUrl(const string& url, const string& handler_name);
91 
92  /// Remove all data from the database
93  void ResetData(void);
94 
95  /// Rescan all indexed files, check for modifications, update the database.
96  void UpdateData(void);
97 
98  /// Control indexing of GB releases (bioseq-sets).
100  eGB_Ignore, ///< Do not split bioseq-sets (default)
101  eGB_Guess, ///< Try to autodetect and split GB release bioseq-sets
102  eGB_Force ///< Split all top-level bioseq-sets into seq-entries
103  };
104 
105  EGBReleaseMode GetGBReleaseMode(void) const { return m_GBReleaseMode; }
106  void SetGBReleaseMode(EGBReleaseMode mode) { m_GBReleaseMode = mode; }
107 
108  /// Control seq-id conflict resolving during file parsing.
110  /// Ignore bioseqs with duplicate ids, store just the first one.
112  /// Store all bioseqs regardless of seq-id conflicts (defalut).
113  /// The conflict may be resolved later by data loader.
115  /// Throw exception on bioseqs with duplicate seq-ids.
116  eDuplicate_Throw
117  };
118 
119  EDuplicateIdMode GetDuplicateIdMode(void) const { return m_DupIdMode; }
120  void SetDuplicateIdMode(EDuplicateIdMode mode) { m_DupIdMode = mode; }
121 
122  /// Control grouping of standalone seq-aligns into bigger blobs.
123  /// If set to 0 or 1, no grouping is performed, each seq-align
124  /// becomes a separate blob.
125  int GetSeqAlignGroupSize(void) const { return m_SeqAlignGroupSize; }
126  void SetSeqAlignGroupSize(int sz) { m_SeqAlignGroupSize = sz; }
127 
128  /// Error handling while indexing files.
129  /// NOTE: Only a few kinds of errors can be ignored (unsupported
130  /// file format or object type, broken data file etc.).
131  enum EErrorMode {
132  eError_Silent, ///< Try to ignore errors, continue indexing.
133  eError_Report, ///< Print error messages, but do not fail (default).
134  eError_Throw ///< Throw exceptions on errors.
135  };
136 
137  EErrorMode GetErrorMode(void) const { return m_ErrorMode; }
138  void SetErrorMode(EErrorMode mode) { m_ErrorMode = mode; }
139 
140  /// Fasta reader settings
141  CFastaReader::TFlags GetFastaFlags(void) const { return m_FastaFlags; }
142  void SetFastaFlags(CFastaReader::TFlags flags) { m_FastaFlags = flags; }
143 
144 private:
146 
147  // Check for gzip file.
148  bool x_IsGZipFile(const SLDS2_File& file_info);
149 
150  // Find handler for the file.
151  CLDS2_UrlHandler_Base* x_GetUrlHandler(const SLDS2_File& file_info);
152  // Get file info and handler
153  SLDS2_File x_GetFileInfo(const string& file_name,
155  void x_ParseFile(const SLDS2_File& info,
157 
158  // All registered handlers by name.
160  // List of URLs which require special handlers.
162 
172 };
173 
174 
175 inline
176 const string& CLDS2_Manager::GetDbFile(void) const
177 {
178  _ASSERT(m_Db);
179  return m_Db->GetDbFile();
180 }
181 
182 
185 
186 #endif // LDS2_HPP__
const string & GetDbFile(void) const
Get database file name.
Definition: lds2_db.hpp:264
Class for managing LDS2 database and related data files.
Definition: lds2.hpp:46
EErrorMode
Error handling while indexing files.
Definition: lds2.hpp:131
@ eError_Silent
Try to ignore errors, continue indexing.
Definition: lds2.hpp:132
@ eError_Report
Print error messages, but do not fail (default).
Definition: lds2.hpp:133
THandlersByUrl m_HandlersByUrl
Definition: lds2.hpp:165
EGBReleaseMode
Control indexing of GB releases (bioseq-sets).
Definition: lds2.hpp:99
@ eGB_Guess
Try to autodetect and split GB release bioseq-sets.
Definition: lds2.hpp:101
@ eGB_Ignore
Do not split bioseq-sets (default)
Definition: lds2.hpp:100
EDuplicateIdMode GetDuplicateIdMode(void) const
Definition: lds2.hpp:119
const string & GetDbFile(void) const
Get currently selected database name.
Definition: lds2.hpp:176
EGBReleaseMode m_GBReleaseMode
Definition: lds2.hpp:166
EErrorMode m_ErrorMode
Definition: lds2.hpp:168
map< string, CRef< CLDS2_UrlHandler_Base > > THandlers
Definition: lds2.hpp:159
void SetErrorMode(EErrorMode mode)
Definition: lds2.hpp:138
void SetFastaFlags(CFastaReader::TFlags flags)
Definition: lds2.hpp:142
void SetSeqAlignGroupSize(int sz)
Definition: lds2.hpp:126
CLDS2_Database * GetDatabase(void)
Get the current database object.
Definition: lds2.hpp:59
THandlers m_Handlers
Definition: lds2.hpp:170
EDirMode
Directory parsing mode while indexing files.
Definition: lds2.hpp:71
@ eDir_NoRecurse
Do not parse sub-dirs automatically.
Definition: lds2.hpp:72
EDuplicateIdMode m_DupIdMode
Definition: lds2.hpp:167
CFastaReader::TFlags GetFastaFlags(void) const
Fasta reader settings.
Definition: lds2.hpp:141
int m_SeqAlignGroupSize
Definition: lds2.hpp:171
void SetGBReleaseMode(EGBReleaseMode mode)
Definition: lds2.hpp:106
CFastaReader::TFlags m_FastaFlags
Definition: lds2.hpp:169
TFiles m_Files
Definition: lds2.hpp:164
EGBReleaseMode GetGBReleaseMode(void) const
Definition: lds2.hpp:105
CRef< CLDS2_Database > m_Db
Definition: lds2.hpp:163
void SetDuplicateIdMode(EDuplicateIdMode mode)
Definition: lds2.hpp:120
int GetSeqAlignGroupSize(void) const
Control grouping of standalone seq-aligns into bigger blobs.
Definition: lds2.hpp:125
EErrorMode GetErrorMode(void) const
Definition: lds2.hpp:137
EDuplicateIdMode
Control seq-id conflict resolving during file parsing.
Definition: lds2.hpp:109
@ eDuplicate_Skip
Ignore bioseqs with duplicate ids, store just the first one.
Definition: lds2.hpp:111
@ eDuplicate_Store
Store all bioseqs regardless of seq-id conflicts (defalut).
Definition: lds2.hpp:114
CLDS2_Database::TStringSet TFiles
Definition: lds2.hpp:145
map< string, string > THandlersByUrl
Definition: lds2.hpp:161
Base class for URL handler.
CObject –.
Definition: ncbiobj.hpp:180
Definition: map.hpp:338
void(*)(CSeq_entry_Handle seh, IWorkbench *wb, const CSerialObject &obj) handler
static uch flags
const char * file_name[]
Operators to edit gaps in sequences.
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_LDS2_EXPORT
Definition: ncbi_export.h:584
static MDB_envinfo info
Definition: mdb_load.c:37
mdb_mode_t mode
Definition: lmdb++.h:38
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
BOOL UpdateData(HWND hDlg, CProjBulderApp *pApp, BOOL bGet)
Definition: ptb_gui.cpp:62
LDS2 database.
Definition: lds2_db.hpp:57
#define _ASSERT
Modified on Wed Apr 17 13:08:46 2024 by modify_doxy.py rev. 669887