70 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
100 const string kPssmFile(
"data/pssm_freq_ratios.asn");
101 m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
104 BOOST_REQUIRE(m_SeqEntry->
IsSeq());
110 m_Seq_locs.push_back(seqloc);
113 const string kSeqEntryFile(
"data/7450545.seqentry.asn");
115 TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
120 BOOST_REQUIRE(m_SeqEntry->
IsSeq());
125 m_Seq_locs.push_back(seqloc);
142 x_ReadSeqEntriesFromFile();
163 const CSeq_id& seqid = (*itr)->GetSeq_id(1);
168 BOOST_REQUIRE(seqid.
IsOther() ||
178 if (acc != last_acc || ver != last_ver) {
188 if (new_gi != last_id) {
231 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
232 m_OptHandle, rps_opts);
239 BOOST_REQUIRE((*domain_results)[0].GetErrors().
empty());
241 const int kNumExpectedMatchingDomains = 3;
244 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
245 x_CountNumberUniqueIds(domain_sas));
247 const size_t kNumExpectedDomainHSPs = 3;
252 expected_domain_results[0].score = 742;
253 expected_domain_results[0].evalue = 5.21679e-102;
254 expected_domain_results[0].bit_score = 2.89507e+02;
255 expected_domain_results[0].num_ident = 111;
256 int starts[] = {1, 139, 80, -1, 81, 218};
257 int lengths[] = {79, 1, 151};
259 back_inserter(expected_domain_results[0].starts));
261 back_inserter(expected_domain_results[0].lengths));
266 expected_domain_results[1].score = 713;
267 expected_domain_results[1].evalue = 1.33296e-97;
268 expected_domain_results[1].bit_score = 2.78295e+02;
269 expected_domain_results[1].num_ident = 107;
270 int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
271 213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
273 int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
275 back_inserter(expected_domain_results[1].starts));
277 back_inserter(expected_domain_results[1].lengths));
282 expected_domain_results[2].score = 673;
283 expected_domain_results[2].evalue = 1.63233e-91;
284 expected_domain_results[2].bit_score = 2.62959e+02;
285 expected_domain_results[2].num_ident = 106;
286 int starts[] = {0, 137, 115, -1, 117, 252 };
287 int lengths[] = {115, 2, 112};
289 back_inserter(expected_domain_results[2].starts));
291 back_inserter(expected_domain_results[2].lengths));
298 qa::CSeqAlignCmpOpts opts;
299 qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
301 bool identical_results = cmp_d.Run(&errors);
303 BOOST_REQUIRE_MESSAGE(identical_results, errors);
315 BOOST_REQUIRE(results[0].GetErrors().
empty());
318 results[0].GetSeqAlign()->
Get().
front()->GetSeq_id(0).GetGi(),
321 const int kNumExpectedMatchingSeqs = 8;
324 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
326 const size_t kNumExpectedHSPs = 9;
331 expected_results[0].score = 861;
332 expected_results[0].evalue = 2.80386e-109;
333 expected_results[0].bit_score = 3.35656e+02;
334 expected_results[0].num_ident = 101;
335 int starts[] = {0, 941, -1, 1094, 153, 1095};
336 int lengths[] = {153, 1, 79};
338 back_inserter(expected_results[0].starts));
340 back_inserter(expected_results[0].lengths));
345 expected_results[1].score = 633;
346 expected_results[1].evalue = 3.86624e-77;
347 expected_results[1].bit_score = 2.47830e+02;
348 expected_results[1].num_ident = 73;
349 int starts[] = {0, 154, -1, 307, 153, 308};
350 int lengths[] = {153, 1, 25};
352 back_inserter(expected_results[1].starts));
354 back_inserter(expected_results[1].lengths));
359 expected_results[2].score = 645;
360 expected_results[2].evalue = 6.62145e-84;
361 expected_results[2].bit_score = 2.52452e+02;
362 expected_results[2].num_ident = 80;
363 int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
364 119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
366 int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 21, 4, 49};
368 back_inserter(expected_results[2].starts));
370 back_inserter(expected_results[2].lengths));
375 expected_results[3].score = 53;
376 expected_results[3].evalue = 3.83197e+00;
377 expected_results[3].bit_score = 244103049e-7;
378 expected_results[3].num_ident = 7;
379 int starts[] = {127, 104, 132, -1, 134, 109};
380 int lengths[] = {5, 2, 15};
382 back_inserter(expected_results[3].starts));
384 back_inserter(expected_results[3].lengths));
390 expected_results[4].score = 51;
391 expected_results[4].evalue = 5.55808;
392 expected_results[4].bit_score = 23.6440;
393 expected_results[4].num_ident = 5;
394 int starts[] = {137, 20, 151, -1, 156, 34};
395 int lengths[] = {14, 5, 17};
397 back_inserter(expected_results[4].starts));
399 back_inserter(expected_results[4].lengths));
404 expected_results[5].score = 51;
405 expected_results[5].evalue = 6.14178;
406 expected_results[5].bit_score = 23.6440;
407 expected_results[5].num_ident = 8;
408 int starts[] = {153, 102, -1, 122, 173, 127};
409 int lengths[] = {20, 5, 12};
411 back_inserter(expected_results[5].starts));
413 back_inserter(expected_results[5].lengths));
418 expected_results[6].score = 51;
419 expected_results[6].evalue = 6.33109;
420 expected_results[6].bit_score = 23.6440;
421 expected_results[6].num_ident = 7;
422 int starts[] = {172, 305, 179, -1, 182, 312};
423 int lengths[] = {7, 3, 33};
425 back_inserter(expected_results[6].starts));
427 back_inserter(expected_results[6].lengths));
433 expected_results[7].score = 48;
434 expected_results[7].evalue = 7.91913;
435 expected_results[7].bit_score = 22.4884;
436 expected_results[7].num_ident = 5;
437 int starts[] = {155, 78};
438 int lengths[] = {18};
440 back_inserter(expected_results[7].starts));
442 back_inserter(expected_results[7].lengths));
448 expected_results[8].score = 49;
449 expected_results[8].evalue = 8.65824;
450 expected_results[8].bit_score = 22.8736;
451 expected_results[8].num_ident = 4;
452 int starts[] = {175, 11};
453 int lengths[] = {14};
455 back_inserter(expected_results[8].starts));
457 back_inserter(expected_results[8].lengths));
464 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
465 identical_results =
cmp.Run(&errors);
467 BOOST_REQUIRE_MESSAGE(identical_results, errors);
479 m_OptHandle->SetEvalueThreshold(5);
492 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
493 m_OptHandle, rps_opts);
500 BOOST_REQUIRE((*domain_results)[0].GetErrors().
empty());
502 const int kNumExpectedMatchingDomains = 3;
505 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
506 x_CountNumberUniqueIds(domain_sas));
508 const size_t kNumExpectedDomainHSPs = 3;
513 expected_domain_results[0].score = 728;
514 expected_domain_results[0].evalue = 6.61511e-100;
515 expected_domain_results[0].bit_score = 2841082571e-7;
516 expected_domain_results[0].num_ident = 111;
517 int starts[] = {1, 139, 80, -1, 81, 218, 162, -1, 163, 299};
518 int lengths[] = {79, 1, 81, 1, 69};
520 back_inserter(expected_domain_results[0].starts));
522 back_inserter(expected_domain_results[0].lengths));
527 expected_domain_results[1].score = 698;
528 expected_domain_results[1].evalue = 2.17510e-95;
529 expected_domain_results[1].bit_score = 2725169055e-7;
530 expected_domain_results[1].num_ident = 107;
531 int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
532 213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
534 int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
536 back_inserter(expected_domain_results[1].starts));
538 back_inserter(expected_domain_results[1].lengths));
543 expected_domain_results[2].score = 661;
544 expected_domain_results[2].evalue = 9.15785e-90;
545 expected_domain_results[2].bit_score = 2583366987e-7;
546 expected_domain_results[2].num_ident = 106;
547 int starts[] = {0, 137, 115, -1, 117, 252 };
548 int lengths[] = {115, 2, 112};
550 back_inserter(expected_domain_results[2].starts));
552 back_inserter(expected_domain_results[2].lengths));
559 qa::CSeqAlignCmpOpts opts;
560 qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
562 bool identical_results = cmp_d.Run(&errors);
564 BOOST_REQUIRE_MESSAGE(identical_results, errors);
576 BOOST_REQUIRE(results[0].GetErrors().
empty());
579 results[0].GetSeqAlign()->
Get().
front()->GetSeq_id(0).GetGi(),
582 const int kNumExpectedMatchingSeqs = 5;
585 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
587 const size_t kNumExpectedHSPs = 6;
592 expected_results[0].score = 876;
593 expected_results[0].evalue = 2.04885e-111;
594 expected_results[0].bit_score = 3414303038e-7;
595 expected_results[0].num_ident = 101;
596 int starts[] = {0, 941, -1, 1094, 153, 1095};
597 int lengths[] = {153, 1, 79};
599 back_inserter(expected_results[0].starts));
601 back_inserter(expected_results[0].lengths));
607 expected_results[1].score = 642;
608 expected_results[1].evalue = 2.54740e-78;
609 expected_results[1].bit_score = 2512936031e-7;
610 expected_results[1].num_ident = 73;
611 int starts[] = {0, 154, -1, 307, 153, 308};
612 int lengths[] = {153, 1, 25};
614 back_inserter(expected_results[1].starts));
616 back_inserter(expected_results[1].lengths));
621 expected_results[2].score = 736;
622 expected_results[2].evalue = 1.10324e-97;
623 expected_results[2].bit_score = 2875023632e-7;
624 expected_results[2].num_ident = 83;
625 int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
626 119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
628 int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 28, 4, 42};
630 back_inserter(expected_results[2].starts));
632 back_inserter(expected_results[2].lengths));
637 expected_results[3].score = 53;
638 expected_results[3].evalue = 3.45771;
639 expected_results[3].bit_score = 2441105291e-8;
640 expected_results[3].num_ident = 4;
641 int starts[] = {139, 22, 151, -1, 156, 34};
642 int lengths[] = {12, 5, 17};
644 back_inserter(expected_results[3].starts));
646 back_inserter(expected_results[3].lengths));
652 expected_results[4].score = 52;
653 expected_results[4].evalue = 4.61874;
654 expected_results[4].bit_score = 2402585333e-8;
655 expected_results[4].num_ident = 7;
656 int starts[] = {172, 305, 179, -1, 182, 312};
657 int lengths[] = {7, 3, 33};
659 back_inserter(expected_results[4].starts));
661 back_inserter(expected_results[4].lengths));
666 expected_results[5].score = 52;
667 expected_results[5].evalue = 4.62283;
668 expected_results[5].bit_score = 2402585333e-8;
669 expected_results[5].num_ident = 7;
670 int starts[] = {127, 104, 132, -1, 134, 109};
671 int lengths[] = {5, 2, 15};
673 back_inserter(expected_results[5].starts));
675 back_inserter(expected_results[5].lengths));
681 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
682 identical_results =
cmp.Run(&errors);
684 BOOST_REQUIRE_MESSAGE(identical_results, errors);
698 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
704 BOOST_REQUIRE(results[0].GetErrors().
empty());
705 BOOST_REQUIRE(results[1].GetErrors().
empty());
709 results[0].GetSeqAlign()->
Get().
front()->GetSeq_id(0).GetGi(),
713 results[1].GetSeqAlign()->
Get().
front()->GetSeq_id(0).GetPir().GetName(),
719 deltablast.
GetPssm(0)->GetQuery().GetSeq().GetFirstId()->GetGi(),
723 deltablast.
GetPssm(1)->GetQuery().GetSeq().GetFirstId()->GetPir().GetName(),
733 BOOST_REQUIRE_THROW(
CDeltaBlast deltablast(query_factory, dbadapter,
734 domain_dbadapter, m_OptHandle),
747 BOOST_REQUIRE_THROW(
CDeltaBlast deltablast(query_factory, dbadapter,
748 domain_dbadapter, m_OptHandle),
760 BOOST_REQUIRE_THROW(
CDeltaBlast deltablast(query_factory, dbadapter,
761 domain_dbadapter, m_OptHandle),
773 BOOST_REQUIRE_THROW(
CDeltaBlast deltablast(query_factory, dbadapter,
774 domain_dbadapter, m_OptHandle),
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastRPSOptionsHandle class.
const CSeq_id * GetFirstId() const
Defines BLAST error codes (user errors included)
Handle to the rpsblast options to the BLAST algorithm.
Handle to the protein-protein options to the BLAST algorithm.
A simple realization of the DELTA-BLAST algorithm: seacrch domain database, compute PSSM,...
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
Search Results for All Queries.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
#define STATIC_ARRAY_SIZE(array)
Calculate the size of a static array.
BOOST_AUTO_TEST_CASE(TestSingleQuery_CBS)
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declares the CDeltaBlastOptionsHandle class.
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetCompositionBasedStats(bool mode)
Set composition based statistics mode.
CConstRef< objects::CPssmWithParameters > GetPssm(int index=0) const
Accessor for PSSM computd from CDD hits and used in protein search.
CRef< CSearchResultSet > Run()
Run the DELTA-BLAST engine with one iteration.
ECompoAdjustModes GetCompositionBasedStats() const
Returns this mode, which mostly specifies whether composition of db sequence is taken into account wh...
CRef< CSearchResultSet > GetDomainResults()
Get results of conserved domain search (intermediate results)
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
@ eBlastDbIsProtein
protein
#define GI_FROM(T, value)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
void Reset(void)
Reset reference object.
uint32_t Uint4
4-byte (32-bit) unsigned integer
const TTag & GetTag(void) const
Get the Tag member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
TId GetId(void) const
Get the variant data.
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
bool CanGetQuery(void) const
Check if it is safe to call GetQuery method.
const TPssm & GetPssm(void) const
Get the Pssm member data.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsGenbank(void) const
Check if variant Genbank is selected.
bool IsOther(void) const
Check if variant Other is selected.
bool IsGeneral(void) const
Check if variant General is selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
bool IsSeq(void) const
Check if variant Seq is selected.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
const TYPE & Get(const CNamedParameterList *param)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
Magic spell ;-) needed for some weird compilers... very empiric.
std::vector< SeqAlign > TSeqAlignSet
Vector of neutral sequence alignments.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
API to compare CSeq-aligns produced by BLAST.
void SeqAlignSetConvert(const objects::CSeq_align_set &ss, std::vector< SeqAlign > &retval)
Converts a Seq-align-set into a neutral seqalign for use with the CSeqAlignCmp class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
vector< CRef< CSeq_loc > > m_Seq_locs
Seq-locs for creating instances of CObjMgr_QueryFactory.
int x_CountNumberUniqueIds(CConstRef< CSeq_align_set > sas)
CRef< CPssmWithParameters > m_Pssm
CRef< CSeq_entry > m_SeqEntry
Contains a single Bioseq.
CRef< CSeq_entry > m_SeqSet
Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295.
CRef< CScope > m_Scope
Scope.
CRef< CSearchDatabase > m_DomainDb
CRef< CDeltaBlastOptionsHandle > m_OptHandle
void x_ReadSeqEntriesFromFile()
CRef< CSearchDatabase > m_SearchDb
~CDeltaBlastTestFixture()
Structure to represent a single sequence to be fed to BLAST.
Utility stuff for more convenient using of Boost.Test library.