NCBI C++ ToolKit
uniform_search_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: uniform_search_unit_test.cpp 91986 2020-12-17 15:27:06Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Christiam Camacho
27  *
28  */
29 
30 /** @file uniform_search_unit_test.cpp
31  * Unit tests for the uniform search API
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 
40 
42 
43 // needed for objmgr dependent tests of query data interface
44 #include "test_objmgr.hpp"
45 #include "blast_test_util.hpp"
48 
49 // Object utils
51 
52 #include <util/random_gen.hpp>
53 
54 // SeqAlign comparison includes
55 #include "seqalign_cmp.hpp"
56 #include "seqalign_set_convert.hpp"
57 
58 #ifndef SKIP_DOXYGEN_PROCESSING
59 
60 using namespace std;
61 using namespace ncbi;
62 using namespace ncbi::objects;
63 using namespace ncbi::blast;
64 
66 {
67  bool operator() (const CRef<CSearchMessage>& a,
68  const CRef<CSearchMessage>& b) const {
69  return *a < *b;
70  }
71 };
72 
73 
74  static CSearchResultSet
75  RunMultipleProteinSearch(ISearchFactory& factory, const string& impl)
76  {
77  // Obtain the search components from the factory
78  CRef<ISeqSearch> uniform_search = factory.GetSeqSearch();
79  CRef<CBlastOptionsHandle> options = factory.GetOptions(eBlastp);
81  (new CSearchDatabase("ecoli.aa",
83 
84  // Set up the queries
85  TSeqLocVector queries;
86  CSeq_id query_id0(CSeq_id::e_Gi, 129295);
87  unique_ptr<SSeqLoc> sl0(CTestObjMgr::Instance().CreateSSeqLoc(query_id0));
88  queries.push_back(*sl0);
89  CSeq_id query_id1(CSeq_id::e_Gi, 129296);
90  unique_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(query_id1));
91  queries.push_back(*sl1);
92  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
93 
94  options->SetEvalueThreshold(1.0);
95  options->SetHitlistSize(25);
96 
97  // Configure and run the uniform search object
98  uniform_search->SetQueryFactory(query_factory);
99  uniform_search->SetSubject(subject);
100  uniform_search->SetOptions(options);
101  CSearchResultSet retval = *uniform_search->Run();
102  return retval;
103  }
104 
105 BOOST_AUTO_TEST_SUITE(uniform_search);
106 
107 BOOST_AUTO_TEST_CASE(SortSearchMessages_DifferentSeverity) {
108  TQueryMessages messages;
109  messages.push_back(CRef<CSearchMessage>
110  (new CSearchMessage(eBlastSevFatal, 1, "test")));
111  messages.push_back(CRef<CSearchMessage>
112  (new CSearchMessage(eBlastSevInfo, 1, "test")));
113  messages.push_back(CRef<CSearchMessage>
114  (new CSearchMessage(eBlastSevError, 1, "test")));
115  messages.push_back(CRef<CSearchMessage>
116  (new CSearchMessage(eBlastSevWarning, 1, "test")));
117 
118  sort(messages.begin(), messages.end(), TQueryMessagesComparator());
119 
120  for (size_t i = 0; i < messages.size() - 1; i++) {
121  BOOST_REQUIRE(messages[i]->GetSeverity() <
122  messages[i+1]->GetSeverity());
123  }
124 }
125 
126 BOOST_AUTO_TEST_CASE(SortSearchMessages_DifferentErrorId) {
127  CRandom random_gen;
128  TQueryMessages messages;
129  for (int i = 0; i < 10; i++) {
130  messages.push_back(CRef<CSearchMessage>
132  random_gen.GetRand(),
133  "test")));
134  }
135 
136  sort(messages.begin(), messages.end(), TQueryMessagesComparator());
137 
138  for (size_t i = 0; i < messages.size() - 1; i++) {
139  BOOST_REQUIRE(messages[i]->GetErrorId() <
140  messages[i+1]->GetErrorId());
141  }
142 }
143 
144 BOOST_AUTO_TEST_CASE(SortSearchMessages_DifferentMessage) {
145  CRandom random_gen;
146  TQueryMessages messages;
147 
148  for (int i = 0; i < 10; i++) {
149  string msg("test");
150  msg += NStr::IntToString(random_gen.GetRand());
151  messages.push_back(CRef<CSearchMessage>
152  (new CSearchMessage(eBlastSevInfo, 2, msg)));
153  }
154 
155  sort(messages.begin(), messages.end(), TQueryMessagesComparator());
156 
157  for (size_t i = 0; i < messages.size() - 1; i++) {
158  BOOST_REQUIRE(messages[i]->GetMessage() <
159  messages[i+1]->GetMessage());
160  }
161 }
162 
163 BOOST_AUTO_TEST_CASE(PartialOrderSearchMessages) {
164  const EBlastSeverity kSev = eBlastSevWarning;
165  const int kErrorId = 2;
166  const string kMsg("hello");
167 
168  CSearchMessage m1(kSev, kErrorId, kMsg);
169  CSearchMessage m1_copy(kSev, kErrorId, kMsg);
170  CSearchMessage m2(kSev, kErrorId+2, kMsg);
171 
172  BOOST_REQUIRE(!(m1 < m1_copy));
173  BOOST_REQUIRE(m1 < m2);
174 
175  CSearchMessage m3(eBlastSevFatal, kErrorId, kMsg);
176  BOOST_REQUIRE(m1 < m3);
177 
178  CSearchMessage m4(kSev, kErrorId, string(kMsg + " world"));
179  BOOST_REQUIRE(m1 < m4);
180 }
181 
182 BOOST_AUTO_TEST_CASE(EmptyAlignmentInCSearchResultSet) {
183  const string kFname("data/empty_result_set.asn");
184  const size_t kNumQueries = 3;
185  const int gis[kNumQueries] = { 555, 115988564, 3090 };
186 
187  CSearchResultSet::TQueryIdVector queries(kNumQueries);
188  TSeqAlignVector alignments(kNumQueries);
189  TSearchMessages messages;
190  messages.resize(kNumQueries);
191 
192  ifstream input(kFname.c_str());
193  if ( !input ) {
194  throw runtime_error("Failed to read " + kFname);
195  }
196 
197  for (size_t i = 0; i < kNumQueries; i++) {
198  alignments[i].Reset(new CSeq_align_set);
199  input >> MSerial_AsnText >> *alignments[i];
200  queries[i].Reset(new CSeq_id(CSeq_id::e_Gi, gis[i]));
201  }
202 
203  CSearchResultSet results(queries, alignments, messages);
204  BOOST_REQUIRE_EQUAL(kNumQueries, results.GetNumResults());
205 
206  BOOST_REQUIRE(results[0].HasAlignments());
207  BOOST_REQUIRE(!results[1].HasAlignments());
208  BOOST_REQUIRE(results[2].HasAlignments());
209 }
210 
211 BOOST_AUTO_TEST_CASE(EqualitySearchMessages) {
212  const EBlastSeverity kSev = eBlastSevWarning;
213  const int kErrorId = 2;
214  const string kMsg("hello");
215  CSearchMessage m1(kSev, kErrorId, kMsg);
216  CSearchMessage m2(kSev, kErrorId, kMsg);
217 
218  BOOST_REQUIRE(m1 == m2);
219 
220  CSearchMessage m3(kSev, kErrorId+1, kMsg);
221  BOOST_REQUIRE(m1 != m3);
222 }
223 
224 BOOST_AUTO_TEST_CASE(MultipleProteinSearch) {
225  CLocalSearchFactory local_factory;
226  CSearchResultSet local_results =
227  RunMultipleProteinSearch(local_factory, "Local");
228  BOOST_REQUIRE(local_results.GetNumResults() > 0);
229 
230  CRemoteSearchFactory remote_factory;
231  CSearchResultSet remote_results =
232  RunMultipleProteinSearch(remote_factory, "Remote");
233  BOOST_REQUIRE(remote_results.GetNumResults() > 0);
234 }
235 
236 BOOST_AUTO_TEST_CASE(SearchDatabase_RestrictionGiList)
237 {
238  CSeqDBGiList gis;
239  gis.AddGi(GI_CONST(1));
240  gis.AddGi(GI_CONST(5));
242  db.SetGiList(&gis);
243  BOOST_REQUIRE_THROW(db.SetNegativeGiList(&gis), CBlastException);
244 }
245 
246 BOOST_AUTO_TEST_CASE(SearchDatabase_Restriction)
247 {
248  CSeqDBGiList gis;
249  gis.AddGi(GI_CONST(1));
250  gis.AddGi(GI_CONST(5));
252  db.SetNegativeGiList(&gis);
253  BOOST_REQUIRE_THROW(db.SetGiList(&gis), CBlastException);
254 }
255 
257 
258 #endif /* SKIP_DOXYGEN_PROCESSING */
User-defined methods of the data storage class.
EBlastSeverity
Blast error message severities .
Definition: blast_message.h:55
@ eBlastSevError
Definition: blast_message.h:58
@ eBlastSevInfo
Definition: blast_message.h:56
@ eBlastSevFatal
Definition: blast_message.h:59
@ eBlastSevWarning
Definition: blast_message.h:57
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Defines BLAST error codes (user errors included)
Factory for CLocalSearch.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
CRandom::
Definition: random_gen.hpp:66
CRef –.
Definition: ncbiobj.hpp:618
Factory for CRemoteSearch.
Blast Search Subject.
Error or Warning Message from search.
Search Results for All Queries.
CSeqDBGiList.
void AddGi(TGi gi)
Add a new GI to the list.
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
Factory for ISearch.
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
virtual CRef< ISeqSearch > GetSeqSearch()=0
Create a new search object with a sequence-based query.
virtual CRef< CBlastOptionsHandle > GetOptions(EProgram program)=0
Create a CBlastOptionsHandle.
vector< CConstRef< objects::CSeq_id > > TQueryIdVector
List of query ids.
void SetHitlistSize(int s)
Sets HitlistSize.
size_type GetNumResults() const
Return the number of results contained by this object.
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eBlastDbIsProtein
protein
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
static int input()
int i
Implementation of the uniform BLAST search interface for searching locally installed BLAST databases.
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
unsigned int a
Definition: ncbi_localip.c:102
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Utilities to develop and debug unit tests for BLAST.
static int * results[]
Remote implementation of the uniform BLAST search interface.
API to compare CSeq-aligns produced by BLAST.
Converts a Seq-align-set into a neutral seqalign for use with the CSeqAlignCmp class.
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
static string subject
Utility stuff for more convenient using of Boost.Test library.
Uniform BLAST Search Interface.
BOOST_AUTO_TEST_SUITE(uniform_search)
static CSearchResultSet RunMultipleProteinSearch(ISearchFactory &factory, const string &impl)
BOOST_AUTO_TEST_CASE(SortSearchMessages_DifferentSeverity)
Modified on Wed Sep 04 15:04:41 2024 by modify_doxy.py rev. 669887