NCBI C++ ToolKit
prelimsearch_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: prelimsearch_unit_test.cpp 91997 2020-12-17 15:27:19Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  * File Description:
29  * Unit test module for the preliminary stage of the BLAST search.
30  *
31  * ===========================================================================
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <algo/blast/api/uniform_search.hpp> // for CSearchDatabase
41 #include "blast_test_util.hpp"
42 #include "test_objmgr.hpp"
43 
44 using namespace std;
45 using namespace ncbi;
46 using namespace ncbi::objects;
47 using namespace ncbi::blast;
48 
50  BlastHSPStream* hsp_stream,
51  CConstRef<CBlastOptions> options) {
52 
53  CBlastHSPResults hsp_results
54  (blaster.ComputeBlastHSPResults(hsp_stream));
55 
56  BOOST_REQUIRE_EQUAL((Int4)1, hsp_results->num_queries);
57  BOOST_REQUIRE(hsp_results->hitlist_array[0]);
58  BOOST_REQUIRE_EQUAL((Int4)24,
59  hsp_results->hitlist_array[0]->hsplist_count);
60  BOOST_REQUIRE(hsp_results->hitlist_array[0]->hsplist_array[0]);
61  BlastHSPList* hsp_list =
62  hsp_results->hitlist_array[0]->hsplist_array[0];
63  BOOST_REQUIRE(hsp_list);
64  BOOST_REQUIRE_EQUAL((Int4)0, hsp_list[0].oid);
65  BOOST_REQUIRE_EQUAL((Int4)1, hsp_list[0].hspcnt);
66  BOOST_REQUIRE(hsp_list[0].hsp_array[0]);
67  BOOST_REQUIRE_EQUAL((Int4)103, hsp_list[0].hsp_array[0]->score);
68  BOOST_REQUIRE_EQUAL((Int4)0, hsp_list[0].hsp_array[0]->query.offset);
69  BOOST_REQUIRE_EQUAL((Int4)21, hsp_list[0].hsp_array[0]->query.end);
70  BOOST_REQUIRE_EQUAL((Int4)0,
71  hsp_list[0].hsp_array[0]->subject.offset);
72  BOOST_REQUIRE_EQUAL((Int4)21,
73  hsp_list[0].hsp_array[0]->query.end);
74 
75 }
76 
77 BOOST_AUTO_TEST_SUITE(prelimsearch)
78 
79 BOOST_AUTO_TEST_CASE(ShortProteinSearch) {
80  CSeq_id id(CSeq_id::e_Gi, 1786182);
82  q.AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(id));
83  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(q));
84 
85  // Create the options
86  CRef<CBlastOptionsHandle> options_handle
88  CRef<CBlastOptions> options(&options_handle->SetOptions());
89  options->SetSegFiltering(false); // allow hits to be found
90 
91  // Create the database description (by default will use CSeqDB)
93 
94  CBlastPrelimSearch prelim_search(query_factory, options, dbinfo);
95  BOOST_REQUIRE(prelim_search.GetNumberOfThreads() == 1);
96  BOOST_REQUIRE(prelim_search.IsMultiThreaded() == false);
97 
98  CRef<SInternalData> results = prelim_search.Run();
99  BOOST_REQUIRE(results.GetPointer() != 0);
100 
101  BOOST_REQUIRE(results->m_HspStream != 0);
102  BOOST_REQUIRE(results->m_Diagnostics != 0);
103 
105  (prelim_search, results->m_HspStream->GetPointer(), options);
106 }
107 
108 BOOST_AUTO_TEST_CASE(ShortProteinSearchMT) {
109  CSeq_id id(CSeq_id::e_Gi, 1786182);
111  q.AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(id));
112  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(q));
113 
114  // Create the options
115  CRef<CBlastOptionsHandle> options_handle
117  CRef<CBlastOptions> options(&options_handle->SetOptions());
118  options->SetSegFiltering(false); // allow hits to be found
119 
120  // Create the database description (by default will use CSeqDB)
122 
123  CBlastPrelimSearch prelim_search(query_factory, options, dbinfo);
124  prelim_search.SetNumberOfThreads(2);
125  BOOST_REQUIRE(prelim_search.GetNumberOfThreads() == 2);
126  BOOST_REQUIRE(prelim_search.IsMultiThreaded() == true);
127 
128  CRef<SInternalData> results = prelim_search.Run();
129  BOOST_REQUIRE(results.GetPointer() != 0);
130 
131  BOOST_REQUIRE(results->m_HspStream != 0);
132  BOOST_REQUIRE(results->m_Diagnostics != 0);
133 
135  (prelim_search, results->m_HspStream->GetPointer(), options);
136 }
137 
138 // This tests a problem that occurred when a chunk consisted of only N's, so that
139 // Karlin-Altschul statistics were not calculated. This is a test for SB-546.
140 BOOST_AUTO_TEST_CASE(SplitNucleotideQuery) {
141  CSeq_id q_id(CSeq_id::e_Gi, 224384753);
142  const TSeqRange kRange(0, 5000000);
144  unique_ptr<SSeqLoc> q_ssl(CTestObjMgr::Instance().CreateSSeqLoc(q_id, kRange, kStrand));
145  TSeqLocVector q_tsl;
146  q_tsl.push_back(*q_ssl);
147  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(q_tsl));
148 
149 
150  CSearchDatabase dbinfo("data/nt.41646578", CSearchDatabase::eBlastDbIsNucleotide);
151 
152  // Create the options
153  CRef<CBlastOptionsHandle> options_handle
155  CRef<CBlastOptions> options(&options_handle->SetOptions());
156 
157  // Setting the chunk size low means we hit an area of all N's pretty quickly.
158  CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
159 
160  CBlastPrelimSearch prelim_search(query_factory, options, dbinfo);
161 
162  // The main thing here is that an exception is NOT thrown.
163  CRef<SInternalData> results = prelim_search.Run();
164  BOOST_REQUIRE(results.GetPointer() != 0);
165  BOOST_REQUIRE(results->m_HspStream != 0);
166  BOOST_REQUIRE(results->m_Diagnostics != 0);
167 }
168 
169 BOOST_AUTO_TEST_CASE(BuildCStd_seg_blastn) {
170  CSeq_id q_id(CSeq_id::e_Gi, 41646578);
171  const TSeqRange kRange(54, 560);
173  unique_ptr<SSeqLoc> q_ssl(CTestObjMgr::Instance().CreateSSeqLoc(q_id, kRange, kStrand));
174  TSeqLocVector q_tsl;
175  q_tsl.push_back(*q_ssl);
176  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(q_tsl));
177 
178 
179  CSearchDatabase dbinfo("data/nt.41646578", CSearchDatabase::eBlastDbIsNucleotide);
180 
181  // Create the options
182  CRef<CBlastOptionsHandle> options_handle
184  CRef<CBlastOptions> options(&options_handle->SetOptions());
185 
186  CBlastPrelimSearch prelim_search(query_factory, options, dbinfo);
187 
188  std::vector<std::list<CRef<CStd_seg> > > l;
189  prelim_search.Run(l);
190 
191  BOOST_REQUIRE(l.size() == 1);
192  BOOST_REQUIRE(l[0].size() >= 1);
193  CRef<CStd_seg> & seg = l[0].front();
194  BOOST_REQUIRE(seg->GetSeqStart(0) == 0);
195  BOOST_REQUIRE(seg->GetSeqStop(0) == 506);
196  BOOST_REQUIRE(seg->GetSeqStart(1) == 54);
197  BOOST_REQUIRE(seg->GetSeqStop(1) == 560);
198  const vector<CRef<CSeq_id> > & id = seg->GetIds();
199  BOOST_REQUIRE(id[0]->GetSeqIdString() == "41646578");
200  const vector<CRef<CSeq_loc> > & loc = seg->GetLoc();
201  BOOST_REQUIRE(loc[0]->GetStrand() == eNa_strand_plus);
202  BOOST_REQUIRE(loc[1]->GetStrand() == eNa_strand_plus);
203 }
204 
205 
206 BOOST_AUTO_TEST_CASE(BuildCStd_seg_tblastx) {
207  CSeq_id q_id(CSeq_id::e_Gi, 41646578);
208  const TSeqRange kRange(54, 560);
210  unique_ptr<SSeqLoc> q_ssl(CTestObjMgr::Instance().CreateSSeqLoc(q_id, kRange, kStrand));
211  TSeqLocVector q_tsl;
212  q_tsl.push_back(*q_ssl);
213  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(q_tsl));
214 
215 
216  CSearchDatabase dbinfo("data/nt.41646578", CSearchDatabase::eBlastDbIsNucleotide);
217 
218  // Create the options
219  CRef<CBlastOptionsHandle> options_handle
221  CRef<CBlastOptions> options(&options_handle->SetOptions());
222 
223  CBlastPrelimSearch prelim_search(query_factory, options, dbinfo);
224 
225  std::vector<std::list<CRef<CStd_seg> > > l;
226  prelim_search.Run(l);
227 
228  BOOST_REQUIRE(l.size() == 1);
229  BOOST_REQUIRE(l[0].size() > 1);
230  CRef<CStd_seg> & seg = l[0].front();
231  BOOST_REQUIRE(seg->GetSeqStart(0) == 0);
232  BOOST_REQUIRE(seg->GetSeqStop(0) == 506);
233  BOOST_REQUIRE(seg->GetSeqStart(1) == 54);
234  BOOST_REQUIRE(seg->GetSeqStop(1) == 560);
235  const vector<CRef<CSeq_id> > & id = seg->GetIds();
236  BOOST_REQUIRE(id[0]->GetSeqIdString() == "41646578");
237  const vector<CRef<CSeq_loc> > & loc = seg->GetLoc();
238  BOOST_REQUIRE(loc[0]->GetStrand() == eNa_strand_plus);
239  BOOST_REQUIRE(loc[1]->GetStrand() == eNa_strand_plus);
240 }
241 
242 
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
@ eTblastx
Translated nucl-Translated nucl.
Definition: blast_types.hpp:62
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
CAutoEnvironmentVariable –.
Definition: ncbienv.hpp:179
Wrapper class for BlastHSPResults .
Definition: blast_aux.hpp:343
Search class to perform the preliminary stage of the BLAST search.
Query Vector.
Definition: sseqloc.hpp:276
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
Definition: sseqloc.hpp:293
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Blast Search Subject.
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
CRef< SInternalData > Run()
Borrow the internal data and results results.
bool IsMultiThreaded(void) const
Returns true if more than 1 thread is specified.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
virtual void SetNumberOfThreads(size_t nthreads)
@inheritDoc
CRef< TBlastDiagnostics > m_Diagnostics
Diagnostic output from preliminary and traceback stages.
void SetSegFiltering(bool val=true)
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
BlastHSPResults * ComputeBlastHSPResults(BlastHSPStream *stream, Uint4 max_num_hsps=0, bool *rm_hsps=NULL, vector< bool > *rm_hsps_info=NULL) const
Return HSPs in a structure other than the HSPStream? Provide conversion? How to combine this with CBl...
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
const string kStrand
const struct ncbi::grid::netcache::search::fields::SIZE size
Magic spell ;-) needed for some weird compilers... very empiric.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Utilities to develop and debug unit tests for BLAST.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
void x_ValidateResultsForShortProteinSearch(CBlastPrelimSearch &blaster, BlastHSPStream *hsp_stream, CConstRef< CBlastOptions > options)
BOOST_AUTO_TEST_CASE(ShortProteinSearch)
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
Default implementation of BlastHSPStream.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
static string subject
static string query
Utility stuff for more convenient using of Boost.Test library.
Uniform BLAST Search Interface.
Modified on Wed May 08 12:06:18 2024 by modify_doxy.py rev. 669887