NCBI C++ ToolKit
uniform_search.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ===========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author: Christiam Camacho
26  *
27  */
28 
29 /** @file uniform_search.cpp
30  * Implementation of the uniform BLAST search interface auxiliary classes
31  */
32 
33 #include <ncbi_pch.hpp>
36 
39 
40 /** @addtogroup AlgoBlast
41  *
42  * @{
43  */
44 
47 BEGIN_SCOPE(blast)
48 
50  : m_DbName(dbname), m_MolType(mol_type), m_GiListSet(false),
51  m_FilteringAlgorithmId(-1), m_MaskType(eNoSubjMasking),
52  m_NeedsFilteringTranslation(false), m_DbInitialized(false)
53 {}
54 
56  const string& entrez_query)
57  : m_DbName(dbname), m_MolType(mol_type),
58  m_EntrezQueryLimitation(entrez_query), m_GiListSet(false),
59  m_FilteringAlgorithmId(-1), m_MaskType(eNoSubjMasking),
60  m_NeedsFilteringTranslation(false), m_DbInitialized(false)
61 {}
62 
63 void
65 {
66  m_DbName = dbname;
67 }
68 
69 string
71 {
72  return m_DbName;
73 }
74 
75 void
77 {
78  m_MolType = mol_type;
79 }
80 
83 {
84  return m_MolType;
85 }
86 
87 void
88 CSearchDatabase::SetEntrezQueryLimitation(const string& entrez_query)
89 {
90  m_EntrezQueryLimitation = entrez_query;
91 }
92 
93 string
95 {
97 }
98 
99 void
101 {
102  if (m_GiListSet) NCBI_THROW(CBlastException, eInvalidArgument,
103  "Cannot have more than one type of id list filtering.");
104  m_GiListSet = true;
105  m_GiList.Reset(gilist);
106 }
107 
108 const CRef<CSeqDBGiList>&
110 {
111  return m_GiList;
112 }
113 
116 {
118  if (!m_GiList.Empty() && !m_GiList->Empty()) {
119  m_GiList->GetGiList(retval);
120  }
121  return retval;
122 }
123 
124 void
126 {
127  if (m_GiListSet) NCBI_THROW(CBlastException, eInvalidArgument,
128  "Cannot have more than one type of id list filtering.");
129  m_GiListSet = true;
130  m_NegativeGiList.Reset(gilist);
131 }
132 
133 const CRef<CSeqDBGiList>&
135 {
136  return m_NegativeGiList;
137 }
138 
141 {
144  m_NegativeGiList->GetGiList(retval);
145  }
146  return retval;
147 }
148 
149 static bool s_IsNumericId(const string &id)
150 {
151  Int4 nid(-1);
152  return NStr::StringToNumeric(id, &nid, NStr::fConvErr_NoThrow, 10);
153 }
154 
155 void
156 CSearchDatabase::SetFilteringAlgorithm(const string &filt_algorithm,
157  ESubjectMaskingType mask_type)
158 {
159  m_MaskType = mask_type;
161  if (mask_type == eNoSubjMasking) {
163  return;
164  }
165  if (s_IsNumericId(filt_algorithm)) {
166  m_FilteringAlgorithmId = NStr::StringToInt(filt_algorithm);
168  } else {
169  m_FilteringAlgorithmString = filt_algorithm;
171  }
172 }
173 
174 void
176 {
177  SetFilteringAlgorithm(filt_algorithm_id, eSoftSubjMasking);
178 }
179 
180 void
182  ESubjectMaskingType mask_type)
183 {
184  m_MaskType = mask_type;
186  if (mask_type == eNoSubjMasking) {
188  return;
189  }
190  m_FilteringAlgorithmId = filt_algorithm_id;
193 }
194 
195 int
197 {
198  if (m_MaskType == eNoSubjMasking) return -1;
199  try {
202  }
203  return m_FilteringAlgorithmId;
204  } catch (const CSeqDBException&) {
205  return -1;
206  }
207 }
208 
209 string
211 {
212  if (m_MaskType == eNoSubjMasking) return "";
214 }
215 
218 {
219  return m_MaskType;
220 }
221 
222 void
224 {
225  if (m_MaskType == eNoSubjMasking) return;
226  if (!m_DbInitialized) {
227  x_InitializeDb();
228  }
232 }
233 
234 void
236 {
237  m_SeqDb.Reset(seqdb);
238  m_DbInitialized = true;
239 }
240 
243 {
244  if (!m_DbInitialized) {
245  x_InitializeDb();
246  }
247  return m_SeqDb;
248 }
249 
250 void
252 {
254  if (! m_GiList.Empty() && ! m_GiList->Empty()) {
255  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, m_GiList));
256 
257  } else if (! m_NegativeGiList.Empty() && ! m_NegativeGiList->Empty()) {
258 
259  if(m_NegativeGiList->GetNumGis() > 0) {
260  vector<TGi> gis;
262  CSeqDBIdSet idset(gis, CSeqDBIdSet::eGi, false);
263  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, idset));
264  }
265  else if (m_NegativeGiList->GetNumSis() > 0){
266  vector<string> sis;
268 
271  seqIds->ReserveSis(sis.size());
272  ITERATE(vector<string>, iter, sis) {
273  seqIds->AddSi(*iter);
274  }
275  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, seqIds));
276  }
277  else if (m_NegativeGiList->GetNumTaxIds() > 0) {
280  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, taxIds.GetPointer()));
281  } else if(m_NegativeGiList->GetNumPigs() > 0) {
283  vector<TPig> pig_list;
284  m_NegativeGiList->GetPigList(pig_list);
285  pigs->SetPigList(pig_list);
286  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type, pigs.GetPointer()));
287  }
288 
289  }
290  else {
291  m_SeqDb.Reset(new CSeqDB(m_DbName, seq_type));
292 
293  }
294 
297  m_DbInitialized = true;
298 }
299 
300 void
302 {
303  if (m_FilteringAlgorithmId <= 0 || m_SeqDb.Empty()) {
304  return;
305  }
306 
307  vector<int> supported_algorithms;
308  m_SeqDb->GetAvailableMaskAlgorithms(supported_algorithms);
309  if (find(supported_algorithms.begin(),
310  supported_algorithms.end(),
311  m_FilteringAlgorithmId) == supported_algorithms.end()) {
312  CNcbiOstrstream oss;
313  oss << "Masking algorithm ID " << m_FilteringAlgorithmId << " is "
314  << "not supported in " <<
315  (IsProtein() ? "protein" : "nucleotide") << " '"
316  << GetDatabaseName() << "' BLAST database";
317  string msg = CNcbiOstrstreamToString(oss);
318  NCBI_THROW(CBlastException, eInvalidOptions, msg);
319  }
320 }
321 
322 END_SCOPE(blast)
324 
325 /* @} */
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
@ eNoSubjMasking
Definition: blast_def.h:236
@ eSoftSubjMasking
Definition: blast_def.h:237
Defines BLAST error codes (user errors included)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Blast Search Subject.
CSeqDBException.
Definition: seqdbcommon.hpp:73
CSeqDBGiList.
int GetNumGis() const
Get the number of GIs in the array.
int GetNumTaxIds() const
int GetNumSis() const
Get the number of Seq-ids in the array.
void GetPigList(vector< TPig > &pigs) const
void GetGiList(vector< TGi > &gis) const
Get the gi list.
int GetNumPigs() const
void GetSiList(vector< string > &sis) const
TODO Get the seqid list?
const SBlastSeqIdListInfo & GetListInfo()
set< TTaxId > & GetTaxIdsList()
bool Empty() const
Return false if there are elements present.
SeqDB ID list for performing boolean set operations.
CSeqDBNegativeList.
void AddTaxIds(const set< TTaxId > &tax_ids)
void AddSi(const string &si)
Add a new SeqId to the list.
void SetPigList(const vector< TPig > &new_list)
void SetListInfo(const SBlastSeqIdListInfo &list_info)
void ReserveSis(size_t n)
CSeqDB.
Definition: seqdb.hpp:161
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
Definition: seqdb.cpp:1227
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
Definition: seqdb.hpp:173
@ eNucleotide
Definition: seqdb.hpp:175
@ eProtein
Definition: seqdb.hpp:174
int GetMaskAlgorithmId(const string &algo_name) const
Get the numeric algorithm ID for a string.
Definition: seqdb.cpp:1232
#define false
Definition: bool.h:36
void SetEntrezQueryLimitation(const string &entrez_query)
Mutator for the entrez query.
string GetDatabaseName() const
Accessor for the database name.
void SetMoleculeType(EMoleculeType mol_type)
Mutator for the molecule type.
bool m_NeedsFilteringTranslation
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
EMoleculeType GetMoleculeType() const
Accessor for the molecule type.
string m_EntrezQueryLimitation
entrez query
string m_FilteringAlgorithmString
filtering to apply to database sequences
int GetFilteringAlgorithm() const
Accessor for the filtering algorithm ID.
const TGiList GetNegativeGiListLimitation() const
CRef< CSeqDBGiList > m_GiList
gi list
const CRef< CSeqDBGiList > & GetNegativeGiList() const
Accessor for the negative gi list.
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
CRef< CSeqDB > m_SeqDb
string GetFilteringAlgorithmKey() const
Accessor for the filtering algorithm key.
bool IsProtein() const
Determine whether this database contains protein sequences or not.
void x_TranslateFilteringAlgorithm() const
Translate string algorithm id to numeric id.
void SetDatabaseName(const string &dbname)
Mutator for the database name.
string m_DbName
database name
EMoleculeType m_MolType
molecule type
vector< TGi > TGiList
Define a list of gis.
static bool s_IsNumericId(const string &id)
CRef< CSeqDBGiList > m_NegativeGiList
negative gi list
ESubjectMaskingType m_MaskType
ESubjectMaskingType GetMaskType() const
void x_InitializeDb() const
Initialize the database.
void SetSeqDb(CRef< CSeqDB > seqdb)
Mutator for the seqdb.
const CRef< CSeqDBGiList > & GetGiList() const
Accessor for the gi list.
const TGiList GetGiListLimitation() const
CSearchDatabase(const string &dbname, EMoleculeType mol_type)
Simple constructor.
void x_ValidateMaskingAlgorithm() const
Validates that the specified filtering algorithm ID is supported by the specified BLAST database.
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
EMoleculeType
Molecule of the BLAST database.
string GetEntrezQueryLimitation() const
Accessor for the entrez query.
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
Definition: ncbistr.hpp:330
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define _ASSERT
Uniform BLAST Search Interface.
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:05 2024 by modify_doxy.py rev. 669887