NCBI C++ ToolKit
blast_scope_src.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ===========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author: Christiam Camacho
26  *
27  */
28 
29 /** @file blast_scope_src.cpp
30  * Defines CBlastScopeSource class to create properly configured CScope
31  * objects.
32  */
33 
34 #include <ncbi_pch.hpp>
36 
37 #include <objmgr/scope.hpp>
42 
44 BEGIN_SCOPE(blast)
46 
47 const char* SDataLoaderConfig::kDefaultProteinBlastDb = "nr";
48 const char* SDataLoaderConfig::kDefaultNucleotideBlastDb = "nt";
49 
50 /// @note the database can be overridden at runtime by the DATA_LOADERS entry
51 /// in the BLAST section of the NCBI configuration file. Allowed values are
52 /// blastdb, genbank, and none. If this is changed, please update the
53 /// BLAST+ user manual
54 void
56  const string& dbname,
57  bool load_proteins)
58 {
59  m_UseFixedSizeSlices = true;
60  m_UseBlastDbs = (options & eUseBlastDbDataLoader) ? true : false;
61  m_UseGenbank = (options & eUseGenbankDataLoader) ? true : false;
62  if ( !dbname.empty() ) {
63  m_BlastDbName.assign(dbname);
64  }
65  m_IsLoadingProteins = load_proteins;
66 
68  if (app) {
69  const CNcbiRegistry& registry = app->GetConfig();
70  x_LoadDataLoadersConfig(registry);
71  x_LoadBlastDbDataLoaderConfig(registry);
72  }
73 }
74 
75 void
77 {
78  static const string kDataLoadersConfig("DATA_LOADERS");
79 
80  if (registry.HasEntry("BLAST", kDataLoadersConfig)) {
81  const string& kLoaders = registry.Get("BLAST", kDataLoadersConfig);
82  if (NStr::FindNoCase(kLoaders, "blastdb") == NPOS) {
83  m_UseBlastDbs = false;
84  }
85  if (NStr::FindNoCase(kLoaders, "genbank") == NPOS) {
86  m_UseGenbank = false;
87  }
88  if (NStr::FindNoCase(kLoaders, "none") != NPOS) {
89  m_UseBlastDbs = false;
90  m_UseGenbank = false;
91  }
92  }
93  _TRACE("Using data loaders: blastdb " << boolalpha << m_UseBlastDbs
94  << "; genbank " << boolalpha << m_UseGenbank);
95 }
96 
97 void
99 {
100  if ( !m_UseBlastDbs ) {
101  m_BlastDbName.clear();
102  return;
103  }
104 
105  // if the database was already specified via the API, don't override it.
106  if ( !m_BlastDbName.empty() ) {
107  return;
108  }
109 
110  static const string kProtBlastDbLoaderConfig("BLASTDB_PROT_DATA_LOADER");
111  static const string kNuclBlastDbLoaderConfig("BLASTDB_NUCL_DATA_LOADER");
112 
113  const string& config_param = m_IsLoadingProteins
114  ? kProtBlastDbLoaderConfig
115  : kNuclBlastDbLoaderConfig;
116 
117  if (registry.HasEntry("BLAST", config_param)) {
118  m_BlastDbName = registry.Get("BLAST", config_param);
119  } else {
120  _ASSERT(m_BlastDbName.empty());
124  }
125  _ASSERT( !m_BlastDbName.empty() );
126 }
127 
128 CBlastScopeSource::CBlastScopeSource(bool load_proteins /* = true */,
129  CObjectManager* objmgr /* = NULL */)
130  : m_Config(load_proteins)
131 {
132  m_ObjMgr.Reset(objmgr ? objmgr : CObjectManager::GetInstance());
138 }
139 
141  CObjectManager* objmgr /* = NULL */)
142  : m_Config(config)
143 {
144  m_ObjMgr.Reset(objmgr ? objmgr : CObjectManager::GetInstance());
150 }
151 
153  CObjectManager* objmgr /* = NULL */)
154  : m_Config(static_cast<bool>(db_handle->GetSequenceType() == CSeqDB::eProtein))
155 {
156  m_ObjMgr.Reset(objmgr ? objmgr : CObjectManager::GetInstance());
159 }
160 
161 void
163  EDbType dbtype)
164 {
165  if ( !m_Config.m_UseBlastDbs ) {
166  return;
167  }
168  try {
172  .GetLoader()->GetName();
173  _ASSERT( !m_BlastDbLoaderName.empty() );
174  _TRACE("Registered BLAST DB data loader '" << m_BlastDbLoaderName
175  << "' as non-default");
176  } catch (const CSeqDBException& e) {
177  // if the database isn't found, ignore the exception as the
178  // remote BLAST database data loader will be tried next
179  if (e.GetMsg().find("No alias or index file found ") != NPOS) {
180  _TRACE("Error initializing local BLAST database "
181  << "data loader: '" << e.GetMsg() << "'");
182  }
183  try {
187  .GetLoader()->GetName();
188  _ASSERT( !m_BlastDbLoaderName.empty() );
189  _TRACE("Registered BLAST DB data loader '" << m_BlastDbLoaderName
190  << "' as non-default");
191  } catch (const CSeqDBException& e) {
192  ERR_POST(Warning << "Error initializing remote BLAST database "
193  << "data loader: " << e.GetMsg());
194  _TRACE("Error initializing remote BLAST database "
195  << "data loader: '" << e.GetMsg() << "'");
196  }
197  }
198 }
199 
200 void
202 {
203  if ( !m_Config.m_UseBlastDbs ) {
204  return;
205  }
206 
207  if (db_handle.Empty()) {
208  ERR_POST(Warning << "No BLAST database handle provided");
209  } else {
210  try {
211 
213  (*m_ObjMgr, db_handle, m_Config.m_UseFixedSizeSlices,
215  _ASSERT( !m_BlastDbLoaderName.empty() );
216  _TRACE("Registered BLAST DB data loader '" << m_BlastDbLoaderName
217  << "' as non-default");
218 
219  } catch (const exception& e) {
220 
221  // in case of error when initializing the BLAST database, just
222  // ignore the exception as the remote BLAST database data loader
223  // will be the fallback (just issue a warning)
224  ERR_POST(Warning << "Error initializing local BLAST database data "
225  << "loader: '" << e.what() << "'");
226  const CBlastDbDataLoader::EDbType dbtype =
227  db_handle->GetSequenceType() == CSeqDB::eProtein
230  try {
233  (*m_ObjMgr, db_handle->GetDBNameList(), dbtype,
237  .GetLoader()->GetName();
238  _ASSERT( !m_BlastDbLoaderName.empty() );
239  _TRACE("Registered BLAST DB data loader '" << m_BlastDbLoaderName
240  << "' as non-default");
241  } catch (const CSeqDBException& e) {
242  ERR_POST(Warning << "Error initializing remote BLAST database "
243  << "data loader: " << e.GetMsg());
244  }
245  }
246  }
247 }
248 
249 void
251 {
252  if ( !m_Config.m_UseGenbank ) {
253  return;
254  }
255 
256  try {
257  CRef<CReader> reader(new CId2Reader);
258  reader->SetPreopenConnection(false);
261  .GetLoader()->GetName();
262  _TRACE("Registered Genbank data loader '" << m_GbLoaderName
263  << "' as non-default");
264  } catch (const CException& e) {
265  m_GbLoaderName.erase();
266  ERR_POST(Warning << "Error initializing Genbank data loader: "
267  << e.GetMsg());
268  }
269 }
270 
271 /// Counts the number of BLAST database data loaders registered in the object
272 /// manager. This is needed so that the priorities of the BLAST databases can
273 /// be adjusted accordingly when multiple BLAST database data loaders are added
274 /// to CScope objects (@sa AddDataLoaders)
275 
277 {
278  int retval = 0;
281  static const string kPrefix = "BLASTDB_";
282  static const string kPrefixThread = kPrefix + "THREAD";
283  int t_id = CThread::GetSelf();
284  string prefix = kPrefix;
285  if (t_id != 0) {
286  prefix = kPrefixThread + NStr::IntToString(t_id) + "_";
287  }
288  ITERATE(CObjectManager::TRegisteredNames, loader_name, loader_names) {
289  if (NStr::Find(*loader_name, prefix) != NPOS) {
290  retval++;
291  }
292  }
293  return retval;
294 }
295 
296 void
298 {
299  const int blastdb_loader_priority =
301 
302  // Note that these priorities are needed so that the CScope::AddXXX methods
303  // don't need a specific priority (the default will be fine).
304  if (!m_BlastDbLoaderName.empty()) {
305  _TRACE("Adding " << m_BlastDbLoaderName << " at priority " <<
306  blastdb_loader_priority);
307  scope->AddDataLoader(m_BlastDbLoaderName, blastdb_loader_priority);
308  }
309  if (!m_GbLoaderName.empty()) {
310  _TRACE("Adding " << m_GbLoaderName << " at priority " <<
312  scope->AddDataLoader(m_GbLoaderName, kGenbankLoaderPriority);
313  }
314 }
315 
317 {
318  CRef<CScope> retval(new CScope(*m_ObjMgr));
319  AddDataLoaders(retval);
320  return retval;
321 }
322 
323 void
325 {
326  if (!m_BlastDbLoaderName.empty()) {
328  m_BlastDbLoaderName.clear();
329  }
330 }
331 
332 bool
334 {
335  if (this == &rhs) {
336  return true;
337  }
338  if (m_UseGenbank != rhs.m_UseGenbank) {
339  return false;
340  }
341  if (m_UseBlastDbs != rhs.m_UseBlastDbs) {
342  return false;
343  }
345  return false;
346  }
347  if (m_BlastDbName != rhs.m_BlastDbName) {
348  return false;
349  }
350  return true;
351 }
352 
353 bool
355 {
356  return !(*this == rhs);
357 }
358 
359 END_SCOPE(blast)
static const string kPrefixThread
Definition: bdbloader.cpp:151
Data loader implementation that uses the blast databases remotely.
USING_SCOPE(objects)
static int s_CountBlastDbDataLoaders()
Counts the number of BLAST database data loaders registered in the object manager.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
void AddDataLoaders(CRef< objects::CScope > scope)
Add the data loader configured in the object to the provided scope.
CBlastScopeSource(bool load_proteins=true, CObjectManager *objmgr=NULL)
Constructor which only registers the Genbank data loader.
static const int kGenbankLoaderPriority
Data loader priority for Genbank data loader.
string m_GbLoaderName
Name of the Genbank data loader.
string m_BlastDbLoaderName
Name of the BLAST database data loader.
static const int kBlastDbLoaderPriority
Data loader priority for BLAST database data loader (if multiple BLAST database data loaders are regi...
CRef< objects::CObjectManager > m_ObjMgr
Our reference to the object manager.
SDataLoaderConfig m_Config
The configuration for this object.
void x_InitGenbankDataLoader()
Initialize the Genbank data loader.
void x_InitBlastDatabaseDataLoader(const string &dbname, EDbType dbtype)
Initializes the BLAST database data loader.
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiRegistry –.
Definition: ncbireg.hpp:913
CObjectManager –.
void SetPreopenConnection(bool preopen=true)
Definition: reader.cpp:207
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
CScope –.
Definition: scope.hpp:92
CSeqDBException.
Definition: seqdbcommon.hpp:73
CSeqDB.
Definition: seqdb.hpp:161
const string & GetDBNameList() const
Get list of database names.
Definition: seqdb.cpp:760
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
Definition: seqdb.cpp:427
@ eProtein
Definition: seqdb.hpp:174
static CMemoryRegistry registry
Definition: cn3d_tools.cpp:81
#define bool
Definition: bool.h:34
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
TLoader * GetLoader(void) const
Get pointer to the loader.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void GetRegisteredNames(TRegisteredNames &names)
Get names of all registered data loaders.
vector< string > TRegisteredNames
bool RevokeDataLoader(CDataLoader &loader)
Revoke previously registered data loader.
@ kPriority_NotSet
Deprecated: use kPriority_Default instead.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
virtual bool HasEntry(const string &section, const string &name=kEmptyStr, TFlags flags=0) const
Definition: ncbireg.cpp:290
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static TID GetSelf(void)
Definition: ncbithr.cpp:515
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
static string kPrefix
Definition: id2info.cpp:146
The Object manager core.
static const char * prefix[]
Definition: pcregrep.c:405
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
Definition: sequtils.cpp:42
Configuration structure for the CBlastScopeSource.
bool operator!=(const SDataLoaderConfig &rhs) const
Inequality operator.
bool m_UseGenbank
Use the Genbank data loader.
static const char * kDefaultProteinBlastDb
Default protein BLAST database to use for the BLAST DB data loaders.
string m_BlastDbName
Name of the BLAST database to use (non-empty if m_UseBlastDbs is true)
void x_LoadDataLoadersConfig(const CNcbiRegistry &registry)
Load the DATA_LOADERS configuration value from the config file.
static const char * kDefaultNucleotideBlastDb
Default nucleotide BLAST database to use for the BLAST DB data loaders.
bool operator==(const SDataLoaderConfig &rhs) const
Equality operator.
bool m_UseBlastDbs
Use the BLAST database data loaders.
bool m_UseFixedSizeSlices
Argument to configure BLAST database data loader.
void x_LoadBlastDbDataLoaderConfig(const CNcbiRegistry &registry)
Load the BLAST database configured to search for the blastdb DATA_LOADERS option from the config file...
EConfigOpts
Configuration options for the BlastScopeSource.
bool m_IsLoadingProteins
Is this intended to load protein sequences.
#define _ASSERT
#define const
Definition: zconf.h:232
Modified on Mon May 20 04:59:36 2024 by modify_doxy.py rev. 669887