NCBI C++ ToolKit
search_strategy_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: search_strategy_unit_test.cpp 92014 2020-12-17 15:27:35Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Tom Madden
27 *
28 * File Description:
29 * Unit test module to test search_strategy.cpp
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
46 
47 #include "test_objmgr.hpp"
48 #include <serial/serial.hpp>
49 #include <serial/objostr.hpp>
50 #include <serial/exception.hpp>
51 #include <util/range.hpp>
52 
53 
54 using namespace std;
55 using namespace ncbi;
56 using namespace ncbi::objects;
57 using namespace ncbi::blast;
58 
59 BOOST_AUTO_TEST_SUITE(search_strategy)
60 
62 {
63  CRef<CBlast4_request> empty_request(new CBlast4_request);
64  BOOST_REQUIRE_THROW(CImportStrategy import_strat(empty_request), CBlastException);
65 }
66 
67 BOOST_AUTO_TEST_CASE(testMegablast)
68 {
69  const char* fname = "data/ss.asn";
70  ifstream in(fname);
71  BOOST_REQUIRE(in);
73  CImportStrategy import_strat(request);
74  BOOST_REQUIRE(import_strat.GetService() == "megablast");
75  BOOST_REQUIRE(import_strat.GetProgram() == "blastn");
76  BOOST_REQUIRE(import_strat.GetTask() == "megablast");
77  BOOST_REQUIRE(import_strat.GetDBFilteringID() == 40);
78 
80  BOOST_REQUIRE(query->IsPssm() == false);
81  BOOST_REQUIRE(query->IsSeq_loc_list() == true);
82 
83  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
84  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
85  BOOST_REQUIRE_EQUAL(opts_handle->GetCullingLimit(), 0);
86  CBlastNucleotideOptionsHandle* blastn_opts = dynamic_cast<CBlastNucleotideOptionsHandle*> (&*opts_handle);
87  BOOST_REQUIRE_EQUAL(blastn_opts->GetMatchReward(), 1);
88  BOOST_REQUIRE_EQUAL(blastn_opts->GetMismatchPenalty(), -2);
89 }
90 
92 {
93  const char* fname = "data/ss.blastp.asn";
94  ifstream in(fname);
95  BOOST_REQUIRE(in);
97  CImportStrategy import_strat(request);
98  BOOST_REQUIRE(import_strat.GetService() == "plain");
99  BOOST_REQUIRE(import_strat.GetProgram() == "blastp");
100 
102  BOOST_REQUIRE(query->IsPssm() == false);
103  BOOST_REQUIRE(query->IsSeq_loc_list() == false);
104  const CBioseq_set& bss = query->GetBioseq_set();
105  list<CRef<CSeq_entry> > seq_entry = bss.GetSeq_set();
106  BOOST_REQUIRE(seq_entry.front()->GetSeq().GetLength() == 232);
107 
109  BOOST_REQUIRE(subject->IsDatabase() == true);
110  BOOST_REQUIRE(subject->GetDatabase() == "refseq_protein");
111 
112  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
113  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
114  BOOST_REQUIRE_EQUAL(opts_handle->GetCullingLimit(), 0);
115  CBlastAdvancedProteinOptionsHandle* blastp_opts = dynamic_cast<CBlastAdvancedProteinOptionsHandle*> (&*opts_handle);
116  BOOST_REQUIRE(!strcmp("BLOSUM62", blastp_opts->GetMatrixName()));
117  BOOST_REQUIRE_EQUAL(3, blastp_opts->GetWordSize());
118 
119 }
120 
121 BOOST_AUTO_TEST_CASE(testBlastnBl2seq)
122 {
123  const char* fname = "data/ss.bl2seq.asn";
124  ifstream in(fname);
125  BOOST_REQUIRE(in);
127  CImportStrategy import_strat(request);
128  BOOST_REQUIRE(import_strat.GetService() == "plain");
129  BOOST_REQUIRE(import_strat.GetProgram() == "blastn");
130  BOOST_REQUIRE(import_strat.GetTask() == "blastn");
131 
133  BOOST_REQUIRE(query->IsPssm() == false);
134  BOOST_REQUIRE(query->IsSeq_loc_list() == false);
135  const CBioseq_set& bss = query->GetBioseq_set();
136  list<CRef<CSeq_entry> > seq_entry = bss.GetSeq_set();
137  BOOST_REQUIRE(seq_entry.front()->GetSeq().GetLength() == 2772);
138 
140  BOOST_REQUIRE(subject->IsDatabase() == false);
141  list<CRef< CBioseq> > subject_list = subject->GetSequences();
142  BOOST_REQUIRE(subject_list.size() == 1);
143  BOOST_REQUIRE(subject_list.front()->GetLength() == 9180);
144 
145  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
146  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
147  BOOST_REQUIRE_EQUAL(opts_handle->GetCullingLimit(), 0);
148  CBlastNucleotideOptionsHandle* blastn_opts = dynamic_cast<CBlastNucleotideOptionsHandle*> (&*opts_handle);
149  BOOST_REQUIRE_EQUAL(blastn_opts->GetMatchReward(), 2);
150  BOOST_REQUIRE_EQUAL(blastn_opts->GetMismatchPenalty(), -3);
151 }
152 
153 BOOST_AUTO_TEST_CASE(LoadWebPsiBlastSearchStrategyAfterRound1)
154 {
155  const char* fname = "data/webpsi.1stround.ss.asn";
156  ifstream in(fname);
158  CImportStrategy import_strat(request);
159  BOOST_REQUIRE_EQUAL(import_strat.GetTask(), "psiblast");
160  BOOST_REQUIRE_EQUAL(import_strat.GetService(), "plain");
161  BOOST_REQUIRE_EQUAL(import_strat.GetProgram(), "blastp");
162 
164  BOOST_REQUIRE(query->IsPssm() == false);
165  BOOST_REQUIRE(query->IsSeq_loc_list() == false);
166  const CBioseq_set& bss = query->GetBioseq_set();
167  list<CRef<CSeq_entry> > seq_entry = bss.GetSeq_set();
168  BOOST_REQUIRE(seq_entry.front()->GetSeq().GetLength() == 320);
169 
171  BOOST_REQUIRE_EQUAL(subject->IsDatabase(), true);
172  BOOST_REQUIRE_EQUAL(subject->GetDatabase(), "nr");
173 
174  BOOST_REQUIRE(import_strat.GetAlgoOptions() != NULL);
175  BOOST_REQUIRE(import_strat.GetProgramOptions() == NULL);
176  BOOST_REQUIRE(import_strat.GetWebFormatOptions() != NULL);
177 
178  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
179  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
180  BOOST_REQUIRE_EQUAL(opts_handle->GetWindowSize(), 40);
181  BOOST_REQUIRE_EQUAL(opts_handle->GetMaskAtHash(), false);
182  BOOST_REQUIRE_EQUAL(opts_handle->GetGappedMode(), true);
183  CPSIBlastOptionsHandle* psi_opts = dynamic_cast<CPSIBlastOptionsHandle*> (&*opts_handle);
184  BOOST_REQUIRE(psi_opts != NULL);
185  BOOST_REQUIRE_EQUAL(psi_opts->GetGapExtensionCost(), 1);
186  BOOST_REQUIRE_EQUAL(psi_opts->GetGapOpeningCost(), 11);
187  BOOST_REQUIRE_EQUAL(psi_opts->GetMatrixName(), "BLOSUM62");
188  BOOST_REQUIRE_EQUAL(import_strat.GetPsiNumOfIterations(), 1);
189 }
190 
191 BOOST_AUTO_TEST_CASE(LoadWebPsiBlastSearchStrategyAfterRound2)
192 {
193  const char* fname = "data/webpsi.2ndround.ss.asn";
194  ifstream in(fname);
196  CImportStrategy import_strat(request);
197  BOOST_REQUIRE_EQUAL(import_strat.GetTask(), "psiblast");
198  BOOST_REQUIRE_EQUAL(import_strat.GetService(), "psi");
199  BOOST_REQUIRE_EQUAL(import_strat.GetProgram(), "blastp");
200 
202  BOOST_REQUIRE(query->IsPssm() == true);
203  BOOST_REQUIRE(query->IsSeq_loc_list() == false);
204  BOOST_REQUIRE(query->IsBioseq_set() == false);
205  const CPssmWithParameters& pssm = query->GetPssm();
206  BOOST_REQUIRE_EQUAL(pssm.HasQuery(), true);
207  BOOST_REQUIRE_EQUAL(pssm.GetQuery().IsSeq(), true);
208  const TSeqPos kQueryLength(320);
209  BOOST_REQUIRE_EQUAL(pssm.GetQuery().GetSeq().GetLength(), kQueryLength);
210  BOOST_REQUIRE_EQUAL(pssm.GetPssm().GetQueryLength(), kQueryLength);
211 
213  BOOST_REQUIRE_EQUAL(subject->IsDatabase(), true);
214  BOOST_REQUIRE_EQUAL(subject->GetDatabase(), "nr");
215 
216  BOOST_REQUIRE(import_strat.GetAlgoOptions() != NULL);
217  BOOST_REQUIRE(import_strat.GetProgramOptions() == NULL);
218  BOOST_REQUIRE(import_strat.GetWebFormatOptions() != NULL);
219 
220  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
221  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
222  BOOST_REQUIRE_EQUAL(opts_handle->GetWindowSize(), 40);
223  BOOST_REQUIRE_EQUAL(opts_handle->GetMaskAtHash(), false);
224  BOOST_REQUIRE_EQUAL(opts_handle->GetGappedMode(), true);
225  CPSIBlastOptionsHandle* psi_opts = dynamic_cast<CPSIBlastOptionsHandle*> (&*opts_handle);
226  BOOST_REQUIRE(psi_opts != NULL);
227  BOOST_REQUIRE_EQUAL(psi_opts->GetGapExtensionCost(), 1);
228  BOOST_REQUIRE_EQUAL(psi_opts->GetGapOpeningCost(), 11);
229  BOOST_REQUIRE_EQUAL(psi_opts->GetMatrixName(), "BLOSUM62");
230  BOOST_REQUIRE_EQUAL(psi_opts->GetInclusionThreshold(), 0.001);
231  BOOST_REQUIRE_EQUAL(psi_opts->GetPseudoCount(), 0);
232  BOOST_REQUIRE_EQUAL(import_strat.GetPsiNumOfIterations(), 2);
233 }
234 
235 // Created with command line psiblast, one iteration
236 BOOST_AUTO_TEST_CASE(LoadPhiBlastSearchStrategy)
237 {
238  const char* fname = "data/phi.ss.asn";
239  ifstream in(fname);
241  CImportStrategy import_strat(request);
242  BOOST_REQUIRE_EQUAL(import_strat.GetTask(), "phiblastp");
243  BOOST_REQUIRE_EQUAL(import_strat.GetService(), "plain");
244  BOOST_REQUIRE_EQUAL(import_strat.GetProgram(), "blastp");
245 
247  BOOST_REQUIRE(query->IsPssm() == false);
248  BOOST_REQUIRE(query->IsSeq_loc_list() == false);
249  const CBioseq_set& bss = query->GetBioseq_set();
250  list<CRef<CSeq_entry> > seq_entry = bss.GetSeq_set();
251  BOOST_REQUIRE(seq_entry.front()->GetSeq().GetLength() == 549);
252 
254  BOOST_REQUIRE_EQUAL(subject->IsDatabase(), true);
255  BOOST_REQUIRE_EQUAL(subject->GetDatabase(), "ecoli");
256 
257  BOOST_REQUIRE(import_strat.GetAlgoOptions() != NULL);
258  BOOST_REQUIRE(import_strat.GetProgramOptions() == NULL);
259  BOOST_REQUIRE(import_strat.GetWebFormatOptions() != NULL);
260 
261  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
262  BOOST_REQUIRE_EQUAL(opts_handle->GetHitlistSize(), 500);
263  BOOST_REQUIRE_EQUAL(opts_handle->GetWindowSize(), 40);
264  BOOST_REQUIRE_EQUAL(opts_handle->GetMaskAtHash(), false);
265  BOOST_REQUIRE_EQUAL(opts_handle->GetGappedMode(), true);
266  BOOST_REQUIRE_EQUAL(opts_handle->GetOptions().GetInclusionThreshold(), 0.002);
267  BOOST_REQUIRE_EQUAL(opts_handle->GetOptions().GetPseudoCount(), 0);
268  CPHIBlastProtOptionsHandle* psi_opts = dynamic_cast<CPHIBlastProtOptionsHandle*> (&*opts_handle);
269  BOOST_REQUIRE(psi_opts != NULL);
270  BOOST_REQUIRE_EQUAL(psi_opts->GetGapExtensionCost(), 1);
271  BOOST_REQUIRE_EQUAL(psi_opts->GetGapOpeningCost(), 11);
272  BOOST_REQUIRE_EQUAL(psi_opts->GetMatrixName(), "BLOSUM62");
273  BOOST_REQUIRE_EQUAL(psi_opts->GetPHIPattern(), "[GA]xxxxGK[ST]");
274  BOOST_REQUIRE_EQUAL(import_strat.GetPsiNumOfIterations(), 1);
275 }
276 
277 /*
278  * Export Search Strategy Tests
279  */
280 
281 // Test that when a query with a range restriction is NOT provided, no
282 // RequiredEnd and RequiredStart fields are sent over the network
283 BOOST_AUTO_TEST_CASE(ExportStrategy_FullQuery) {
284  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
285  unique_ptr<blast::SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(*id));
286  TSeqLocVector queries(1, *sl.get());
287  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
288  const string kDbName("nt");
289  CSearchDatabase db(kDbName,
291 
292  CRef<CSearchDatabase> target_db(&db);
295 
296  CExportStrategy exp_ss(qf, opts, target_db);
298  BOOST_REQUIRE(ss.NotEmpty());
299 
300  bool found_query_range = false;
301 
302  const CBlast4_request_body& body = ss->GetBody();
303  BOOST_REQUIRE(body.IsQueue_search());
304  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
305 
306  // These are the parameters that we are looking for
307  vector<string> param_names;
308  param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredStart));
309  param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredEnd));
310 
311  // Get the program options
312  if (qsr.CanGetProgram_options()) {
313  const CBlast4_parameters& prog_options = qsr.GetProgram_options();
314  ITERATE(vector<string>, pname, param_names) {
315  CRef<CBlast4_parameter> p = prog_options.GetParamByName(*pname);
316  if (p.NotEmpty()) {
317  found_query_range = true;
318  break;
319  }
320  }
321  }
322  BOOST_REQUIRE(found_query_range == false);
323 
324  // (check also the algorithm options, just in case they ever get misplaced)
325  if (qsr.CanGetAlgorithm_options()) {
326  const CBlast4_parameters& algo_options = qsr.GetAlgorithm_options();
327  ITERATE(vector<string>, pname, param_names) {
328  CRef<CBlast4_parameter> p = algo_options.GetParamByName(*pname);
329  if (p.NotEmpty()) {
330  found_query_range = true;
331  break;
332  }
333  }
334  }
335  BOOST_REQUIRE(found_query_range == false);
336 
337  // just as a bonus, check the database
338  BOOST_REQUIRE(qsr.CanGetSubject());
339  BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
340 }
341 
342 // Test that when a query with a range restriction is provided, the appropriate
343 // RequiredEnd and RequiredStart fields are sent over the network
344 BOOST_AUTO_TEST_CASE(ExportStrategy_QueryWithRange) {
345  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
346  TSeqRange query_range(1,200);
347  unique_ptr<blast::SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(*id,
348  query_range));
349  TSeqLocVector queries(1, *sl.get());
350  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
351  const string kDbName("nt");
352  CSearchDatabase db(kDbName,
354 
355  CRef<CSearchDatabase> target_db(&db);
358 
359  CExportStrategy exp_ss(qf, opts, target_db);
361  BOOST_REQUIRE(ss.NotEmpty());
362 
363  bool found_query_range = false;
364 
365  const CBlast4_request_body& body = ss->GetBody();
366  BOOST_REQUIRE(body.IsQueue_search());
367  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
368 
369  // These are the parameters that we are looking for
370  vector<string> param_names;
371  param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredStart));
372  param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredEnd));
373 
374  // Get the program options
375  if (qsr.CanGetProgram_options()) {
376  const CBlast4_parameters& prog_options = qsr.GetProgram_options();
377  ITERATE(vector<string>, pname, param_names) {
378  CRef<CBlast4_parameter> p = prog_options.GetParamByName(*pname);
379  if (p.NotEmpty()) {
380  BOOST_REQUIRE(p->CanGetValue());
381  found_query_range = true;
383  BOOST_REQUIRE_EQUAL((int)query_range.GetFrom(),
384  (int)p->GetValue().GetInteger());
385  }
387  BOOST_REQUIRE_EQUAL((int)query_range.GetTo(),
388  (int)p->GetValue().GetInteger());
389  }
390  }
391  }
392  }
393  BOOST_REQUIRE(found_query_range == true);
394 
395  found_query_range = false;
396  // Check that this option is NOT specified in the algorithm options
397  if (qsr.CanGetAlgorithm_options()) {
398  const CBlast4_parameters& algo_options = qsr.GetAlgorithm_options();
399  ITERATE(vector<string>, pname, param_names) {
400  CRef<CBlast4_parameter> p = algo_options.GetParamByName(*pname);
401  if (p.NotEmpty()) {
402  found_query_range = true;
403  break;
404  }
405  }
406  }
407  BOOST_REQUIRE(found_query_range == false);
408 
409  // just as a bonus, check the database
410  BOOST_REQUIRE(qsr.CanGetSubject());
411  BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
412 }
413 
414 // Test that when no identifier is provided for the sequence data, a Bioseq
415 // should be submitted
416 BOOST_AUTO_TEST_CASE(ExportStrategy_QueryWithLocalIds) {
417 
418  CSeq_entry seq_entry;
419  ifstream in("data/seq_entry_lcl_id.asn");
420  in >> MSerial_AsnText >> seq_entry;
421  CSeq_id& id = const_cast<CSeq_id&>(*seq_entry.GetSeq().GetFirstId());
422  in.close();
423 
425  scope->AddTopLevelSeqEntry(seq_entry);
426  CRef<CSeq_loc> sl(new CSeq_loc(id, (TSeqPos)0, (TSeqPos)11));
427  TSeqLocVector query_loc(1, SSeqLoc(sl, scope));
428  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query_loc));
429  const string kDbName("nt");
430  CSearchDatabase db(kDbName,
432 
433  CRef<CSearchDatabase> target_db(&db);
436 
437  CExportStrategy exp_ss(qf, opts, target_db);
439  BOOST_REQUIRE(ss.NotEmpty());
440 
441  const CBlast4_request_body& body = ss->GetBody();
442  BOOST_REQUIRE(body.IsQueue_search());
443  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
444  BOOST_REQUIRE(qsr.CanGetQueries());
445  const CBlast4_queries& b4_queries = qsr.GetQueries();
446  BOOST_REQUIRE_EQUAL(query_loc.size(), b4_queries.GetNumQueries());
447  BOOST_REQUIRE(b4_queries.IsBioseq_set());
448  BOOST_REQUIRE( !b4_queries.IsPssm() );
449  BOOST_REQUIRE( !b4_queries.IsSeq_loc_list() );
450 
451  // just as a bonus, check the database
452  BOOST_REQUIRE(qsr.CanGetSubject());
453  BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
454 }
455 
456 // Test that when GIs are provided as the queries, no bioseq
457 // should be submitted, instead a list of seqlocs should be sent
458 BOOST_AUTO_TEST_CASE(ExportStrategy_QueryWithGIs) {
459 
461  typedef pair<int, int> TGiLength;
462  vector<TGiLength> gis;
463  gis.push_back(TGiLength(555, 624));
464  gis.push_back(TGiLength(556, 310));
465  ifstream in("data/seq_entry_gis.asn");
466  TSeqLocVector query_loc;
467 
468  ITERATE(vector<TGiLength>, gi, gis) {
469  CRef<CSeq_entry> seq_entry(new CSeq_entry);
470  in >> MSerial_AsnText >> *seq_entry;
471  scope->AddTopLevelSeqEntry(*seq_entry);
472  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, gi->first));
473  CRef<CSeq_loc> sl(new CSeq_loc(*id, 0, gi->second));
474  query_loc.push_back(SSeqLoc(sl, scope));
475  }
476  in.close();
477 
478  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query_loc));
479  const string kDbName("nt");
480  CSearchDatabase db(kDbName,
482 
483  CRef<CSearchDatabase> target_db(&db);
486 
487  CExportStrategy exp_ss(qf, opts, target_db);
489  BOOST_REQUIRE(ss.NotEmpty());
490 
491 
492  const CBlast4_request_body& body = ss->GetBody();
493  BOOST_REQUIRE(body.IsQueue_search());
494  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
495  BOOST_REQUIRE(qsr.CanGetQueries());
496  const CBlast4_queries& b4_queries = qsr.GetQueries();
497  BOOST_REQUIRE_EQUAL(query_loc.size(), b4_queries.GetNumQueries());
498  BOOST_REQUIRE( !b4_queries.IsBioseq_set() );
499  BOOST_REQUIRE( !b4_queries.IsPssm() );
500  BOOST_REQUIRE( b4_queries.IsSeq_loc_list() );
501 
502  // just as a bonus, check the database
503  BOOST_REQUIRE(qsr.CanGetSubject());
504  BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
505 }
506 
507 BOOST_AUTO_TEST_CASE(ExportStrategy_CBlastOptions)
508 {
509  CRef<CBlastOptionsHandle> optsHandle;
511  const CBlastOptions& opts = optsHandle->SetOptions();
512 
513  CExportStrategy exp_ss(optsHandle);
515  BOOST_REQUIRE(ss.NotEmpty());
516 
517  const CBlast4_request_body& body = ss->GetBody();
518  BOOST_REQUIRE(body.IsQueue_search());
519  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
520 
521  string program;
522  string service;
523  opts.GetRemoteProgramAndService_Blast3(program, service);
524 
525  BOOST_REQUIRE(qsr.GetProgram() == program);
526  BOOST_REQUIRE(qsr.GetService() == service);
527 
528 }
529 
530 BOOST_AUTO_TEST_CASE(LoadHardMaskDBSearchStrategy)
531 {
532  const char* fname = "data/ss.filter.asn";
533  ifstream in(fname);
535  CImportStrategy import_strat(request);
536  BOOST_REQUIRE_EQUAL(import_strat.GetService(), "megablast");
537  BOOST_REQUIRE_EQUAL(import_strat.GetProgram(), "blastn");
538 
539  BOOST_REQUIRE(import_strat.GetAlgoOptions() != NULL);
540  BOOST_REQUIRE(import_strat.GetProgramOptions() != NULL);
541  BOOST_REQUIRE(import_strat.GetSubjectMaskingType() == eHardSubjMasking);
542  BOOST_REQUIRE(import_strat.GetDBFilteringKey() == "unit_test_mask");
543 
544  CRef<blast::CBlastOptionsHandle> opts_handle = import_strat.GetOptionsHandle();
545  BOOST_REQUIRE_EQUAL(opts_handle->GetMaskAtHash(), true);
546  BOOST_REQUIRE_EQUAL(opts_handle->GetGappedMode(), true);
547 }
548 
549 // Test that when a query with a range restriction is NOT provided, no
550 // RequiredEnd and RequiredStart fields are sent over the network
551 BOOST_AUTO_TEST_CASE(ExportStrategy_DBSoftMask) {
552  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
553  unique_ptr<blast::SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(*id));
554  TSeqLocVector queries(1, *sl.get());
555  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
556  const string kDbName("data/nt.41646578");
557  CSearchDatabase db(kDbName,
559 
560  CRef<CSearchDatabase> target_db(&db);
561  const string mask_str ("unit_test_mask");
562  target_db->SetFilteringAlgorithm(mask_str, eSoftSubjMasking);
565 
566  CExportStrategy exp_ss(qf, opts, target_db);
568  BOOST_REQUIRE(ss.NotEmpty());
569 
570  const CBlast4_request_body& body = ss->GetBody();
571  BOOST_REQUIRE(body.IsQueue_search());
572  const CBlast4_queue_search_request& qsr = body.GetQueue_search();
573 
574 
575  // Get the program options
576  BOOST_REQUIRE(qsr.CanGetProgram_options() == true);
577  const CBlast4_parameters& prog_options = qsr.GetProgram_options();
579  BOOST_REQUIRE(p1->GetValue().GetString()== mask_str);
580 
582  BOOST_REQUIRE(p2->GetValue().GetInteger()== eSoftSubjMasking);
583 
584 }
585 
586 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastAdvancedProteinOptionsHandle class.
@ eHardSubjMasking
Definition: blast_def.h:238
@ eSoftSubjMasking
Definition: blast_def.h:237
Declares the CBlastNucleotideOptionsHandle class.
Declares the CBlastProteinOptionsHandle class.
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
const string & GetName() const
Get field name (key).
Definition: names.cpp:355
CRef< CBlast4_parameter > GetParamByName(const string name) const
Returns Blast4_parameter with specified name.
size_t GetNumQueries() const
Retrieve the number of queries in this object.
CBlast4_queue_search_request –.
CBlast4_request_body –.
CBlast4_request –.
Handle to the Advanced BLASTP options.
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
@ eRemote
To be used when running BLAST remotely.
virtual void GetRemoteProgramAndService_Blast3(string &p, string &s) const
Get the program and service name for remote blast.
Class to return parts of the CBlast4_request, or data associated with a CBlast4_request,...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein PHI BLAST options.
Handle to the protein-protein options to the BLAST algorithm.
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
SIZE_TYPE GetQueryLength() const
Return the query length or 0 if no query is available.
Definition: Pssm.cpp:62
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Definition: Seq_entry.hpp:56
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:71
string GetDBFilteringKey()
The DB filter key.
string GetService() const
Fetches service, such as psiblast, plain, megablast.
string GetTask()
Fetches task, such as "megablast", "blastn", etc.
double GetInclusionThreshold() const
Returns InclusionThreshold.
int GetMatchReward() const
Returns MatchReward.
objects::CBlast4_parameters * GetWebFormatOptions()
Options for controlling formatting (psi blast iteration number also).
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
CRef< objects::CBlast4_request > GetSearchStrategy(void)
const char * GetPHIPattern() const
Retrieves the pattern string option.
string GetProgram() const
Fetches program, one of blastn, blastp, blastx, tblastn, tblastx.
CRef< objects::CBlast4_request > ExtractBlast4Request(CNcbiIstream &in)
Extract a Blast4-request (a.k.a.
const char * GetMatrixName() const
Returns MatrixName.
int GetGapExtensionCost() const
Returns GapExtensionCost.
int GetWordSize() const
Returns WordSize.
int GetDBFilteringID()
The DB filter ID.
CRef< objects::CBlast4_queries > GetQueries()
The queries either as Bioseq, seqloc, or pssm.
unsigned int GetPsiNumOfIterations()
Get number of iteration for psi blast, return 0 if num of iterations not available.
objects::CBlast4_parameters * GetAlgoOptions()
Options specific to blast searches (e.g, threshold, expect value).
CRef< objects::CBlast4_subject > GetSubject()
Returns the target sequences.
int GetGapOpeningCost() const
Returns GapOpeningCost.
objects::CBlast4_parameters * GetProgramOptions()
Options for controlling program execution and database filtering.
int GetMismatchPenalty() const
Returns MismatchPenalty.
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
int GetPseudoCount() const
Returns PseudoCount.
ESubjectMaskingType GetSubjectMaskingType()
Get Subject Masking Type.
CRef< blast::CBlastOptionsHandle > GetOptionsHandle()
Builds and returns the OptionsHandle.
@ eBlastDbIsNucleotide
nucleotide
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
const TSubject & GetSubject(void) const
Get the Subject member data.
bool IsSeq_loc_list(void) const
Check if variant Seq_loc_list is selected.
const TService & GetService(void) const
Get the Service member data.
const TDatabase & GetDatabase(void) const
Get the variant data.
bool IsPssm(void) const
Check if variant Pssm is selected.
bool CanGetQueries(void) const
Check if it is safe to call GetQueries method.
bool IsQueue_search(void) const
Check if variant Queue_search is selected.
const TQueue_search & GetQueue_search(void) const
Get the variant data.
bool CanGetSubject(void) const
Check if it is safe to call GetSubject method.
bool IsBioseq_set(void) const
Check if variant Bioseq_set is selected.
bool CanGetProgram_options(void) const
Check if it is safe to call GetProgram_options method.
const TProgram_options & GetProgram_options(void) const
Get the Program_options member data.
const TQueries & GetQueries(void) const
Get the Queries member data.
const TAlgorithm_options & GetAlgorithm_options(void) const
Get the Algorithm_options member data.
const TBody & GetBody(void) const
Get the Body member data.
const TProgram & GetProgram(void) const
Get the Program member data.
bool CanGetAlgorithm_options(void) const
Check if it is safe to call GetAlgorithm_options method.
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TPssm & GetPssm(void) const
Get the Pssm member data.
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
@ eBlastOpt_RequiredEnd
Definition: names.hpp:139
@ eBlastOpt_DbFilteringAlgorithmKey
Definition: names.hpp:133
@ eBlastOpt_RequiredStart
Definition: names.hpp:140
@ eBlastOpt_SubjectMaskingType
Definition: names.hpp:134
Magic spell ;-) needed for some weird compilers... very empiric.
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CPHIBlastProtOptionsHandle class.
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Declares the CPSIBlastOptionsHandle class.
Declares the CRemoteBlast class.
Declares the CImportStrategy and CExportStrategy.
BOOST_AUTO_TEST_CASE(emptyInput)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
static string subject
static string query
Utility stuff for more convenient using of Boost.Test library.
Modified on Thu Mar 28 17:07:03 2024 by modify_doxy.py rev. 669887