76 #ifndef SKIP_DOXYGEN_PROCESSING
89 retval->
SetPacked_int().AddInterval(*
id, (*itr).GetFrom(), (*itr).GetTo());
100 vector<TSeqRange> range_vec;
102 range_vec.push_back(
TSeqRange(73011288, 73011591));
103 range_vec.push_back(
TSeqRange(73080052, 73080223));
104 range_vec.push_back(
TSeqRange(73096483, 73096589));
105 range_vec.push_back(
TSeqRange(73097765, 73097864));
106 range_vec.push_back(
TSeqRange(73113762, 73113809));
107 range_vec.push_back(
TSeqRange(73119266, 73119340));
108 range_vec.push_back(
TSeqRange(73168955, 73169141));
109 range_vec.push_back(
TSeqRange(73178294, 73178376));
110 range_vec.push_back(
TSeqRange(73220818, 73220920));
111 range_vec.push_back(
TSeqRange(73223091, 73223365));
114 ofstream o1(
"temploc1.out.asn");
122 SSeqLoc subj_seqloc(ssl1, scope);
123 subjects.push_back(subj_seqloc);
127 BOOST_CHECK_EQUAL(10, (
int) sav.size());
132 BOOST_CHECK_EQUAL((
int)303, num_ident);
137 ofstream o(
"seqalign.out.asn");
162 subjects.push_back(
SSeqLoc(local_loc, local_scope));
214 int& counter = *
reinterpret_cast<int*
>(progress_info->
user_data);
224 pair<int, int>& progress_pair =
225 *
reinterpret_cast< pair<int, int>*
>(progress_info->
user_data);
227 if (++progress_pair.first == progress_pair.second) {
237 static int num_calls = 0;
238 if (++num_calls < 3) {
264 x_drop_ungapped = 22;
270 x_drop_ungapped = 16;
276 x_drop_ungapped = 22;
280 x_drop_ungapped = 22;
288 x_drop_ungapped = 22;
298 gap_trigger = 19;
break;
300 gap_trigger = 23;
break;
302 gap_trigger = 23;
break;
304 gap_trigger = 23;
break;
306 gap_trigger = 19;
break;
315 BOOST_CHECK_EQUAL(x_drop_ungapped,
317 BOOST_CHECK_EQUAL(33, raw_cutoffs->
x_drop_gap);
324 BOOST_CHECK_EQUAL(x_drop_ungapped,
326 BOOST_CHECK_EQUAL(13, raw_cutoffs->
x_drop_gap);
333 BOOST_CHECK_EQUAL(38, raw_cutoffs->
x_drop_gap);
339 BOOST_CHECK_EQUAL(x_drop_ungapped,
350 size_t num_total_alns = num_queries * num_subjects;
353 BOOST_REQUIRE_EQUAL(result_alnvec.size(), num_total_alns);
360 vector< CConstRef<CSeq_id> > id_prev_subjects;
361 id_prev_subjects.resize(num_subjects);
363 bool prev_query_available =
false;
364 vector<bool> prev_subjects_available(num_subjects,
false);
374 for (
size_t i_query = 0; i_query < num_queries; i_query++)
376 prev_query_available =
false;
377 for (
size_t i_subject = 0; i_subject < num_subjects; i_subject++)
379 size_t i_lin_index = i_query * num_subjects + i_subject;
383 BOOST_REQUIRE(aln_set.
NotNull());
387 if (aln_set->
Get().size() > 0)
396 prev_query_available)
406 prev_subjects_available[i_subject])
410 id_prev_subjects[i_subject].GetObject()));
414 prev_subjects_available[i_subject] =
true;
415 id_prev_subjects[i_subject] = id_subject;
418 prev_query_available =
true;
419 id_prev_query = id_query;
434 BOOST_CHECK_EQUAL(314, (
int)ungapped_stats->
lookup_hits);
437 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
441 BOOST_CHECK_EQUAL(157, (
int)ungapped_stats->
lookup_hits);
444 BOOST_CHECK_EQUAL(3, gapped_stats->
extensions);
448 BOOST_CHECK_EQUAL(30, (
int)ungapped_stats->
lookup_hits);
451 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
455 BOOST_CHECK_EQUAL(582, (
int)ungapped_stats->
lookup_hits);
462 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
466 BOOST_CHECK_EQUAL(282, (
int)ungapped_stats->
lookup_hits);
469 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
473 BOOST_CHECK_EQUAL(157, (
int)ungapped_stats->
lookup_hits);
476 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
480 BOOST_CHECK_EQUAL(5, (
int)ungapped_stats->
lookup_hits);
483 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
487 BOOST_CHECK_EQUAL(2590, (
int)ungapped_stats->
lookup_hits);
492 BOOST_CHECK_EQUAL(18587, (
int)ungapped_stats->
lookup_hits);
497 BOOST_CHECK_EQUAL(210, (
int)ungapped_stats->
lookup_hits);
500 BOOST_CHECK_EQUAL(3, gapped_stats->
extensions);
504 BOOST_CHECK_EQUAL(15, (
int)ungapped_stats->
lookup_hits);
507 BOOST_CHECK_EQUAL(2, gapped_stats->
extensions);
511 BOOST_CHECK_EQUAL(2129, (
int)ungapped_stats->
lookup_hits);
514 BOOST_CHECK_EQUAL(8, gapped_stats->
extensions);
518 BOOST_CHECK_EQUAL(963, (
int)ungapped_stats->
lookup_hits);
521 BOOST_CHECK_EQUAL(11, gapped_stats->
extensions);
531 BOOST_CHECK_EQUAL(3579, (
int)ungapped_stats->
lookup_hits);
534 BOOST_CHECK_EQUAL(3580, (
int)ungapped_stats->
lookup_hits);
539 BOOST_CHECK_EQUAL(3939, (
int)ungapped_stats->
lookup_hits);
542 BOOST_CHECK_EQUAL(27, gapped_stats->
extensions);
546 BOOST_CHECK_EQUAL(2666, (
int)ungapped_stats->
lookup_hits);
549 BOOST_CHECK_EQUAL(2, gapped_stats->
extensions);
553 BOOST_CHECK_EQUAL(5950, (
int)ungapped_stats->
lookup_hits);
556 BOOST_CHECK_EQUAL(2, gapped_stats->
extensions);
560 BOOST_CHECK_EQUAL(108, (
int)ungapped_stats->
lookup_hits);
566 BOOST_CHECK_EQUAL(3, gapped_stats->
extensions);
570 BOOST_CHECK_EQUAL(14, (
int)ungapped_stats->
lookup_hits);
573 BOOST_CHECK_EQUAL(1, gapped_stats->
extensions);
588 BOOST_REQUIRE(sav[0].NotEmpty());
589 BOOST_REQUIRE( !sav[0]->IsEmpty() );
590 BOOST_REQUIRE(sav[0]->
Get().begin()->NotEmpty());
605 BOOST_CHECK_EQUAL(232, num_ident);
617 BOOST_CHECK_EQUAL((
size_t)1, ancillary_data.size());
618 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() !=
NULL );
619 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() !=
NULL );
620 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (
Int8)0 );
630 unique_ptr<SSeqLoc> subj(
647 int score = 0, comp_adj = 0;
650 BOOST_CHECK_EQUAL(26, score);
651 BOOST_CHECK_EQUAL(2, comp_adj);
656 BOOST_CHECK_EQUAL((
size_t)1, ancillary_data.size());
657 BOOST_REQUIRE( ancillary_data.front().NotEmpty() );
658 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() !=
NULL );
659 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() !=
NULL );
660 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (
Int8)0 );
668 pair<TSeqPos, TSeqPos>
range(150000, 170000);
674 BOOST_CHECK_EQUAL(11, (
int) sav[0]->
Get().
size());
677 vector < CRef< CSeq_loc > > locs = sar->
GetSegs().
GetStd().front()->GetLoc();
681 BOOST_CHECK_EQUAL(161, num_ident);
683 ofstream o(
"minus1.new.asn");
701 BOOST_CHECK_EQUAL(1, (
int) sav[0]->
Get().
size());
704 vector < CRef< CSeq_loc > > locs = sar->
GetSegs().
GetStd().front()->GetLoc();
708 BOOST_CHECK_EQUAL(11, num_ident);
710 ofstream o(
"minus2.asn");
723 unique_ptr<SSeqLoc> subj(
736 BOOST_CHECK_EQUAL(229, num_ident);
746 BOOST_CHECK_EQUAL((
size_t)1, ancillary_data.size());
747 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() !=
NULL );
748 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() !=
NULL );
749 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (
Int8)0 );
758 unique_ptr<SSeqLoc> subj(
770 BOOST_CHECK_EQUAL(1, (
int)sav[0]->Size());
782 BOOST_CHECK_EQUAL(5, num_ident);
807 BOOST_CHECK_EQUAL(377, num_ident);
819 BOOST_CHECK_EQUAL((
size_t)1, ancillary_data.size());
820 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() !=
NULL );
821 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() !=
NULL );
822 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (
Int8)0 );
835 unique_ptr<SSeqLoc>
query(
851 !
strcmp(
"BLASTN penalty must be negative",
858 CSeq_id qid(
"ref|NT_024524.13");
859 pair<TSeqPos, TSeqPos>
range(27886902, 27886932);
860 unique_ptr<SSeqLoc>
query(
866 unique_ptr<SSeqLoc> subj(
870 options->SetTraditionalBlastnDefaults();
871 options->SetMismatchPenalty(-1);
872 options->SetMatchReward(1);
873 options->SetGapXDropoff(100);
874 options->SetMaskAtHash(
false);
877 BOOST_REQUIRE_NO_THROW(results = blaster.
RunEx());
883 const bool kHasProteinQuery(
true);
900 unique_ptr<CAutoEnvironmentVariable> envvar1, envvar2;
907 BOOST_REQUIRE_NO_THROW(results = blaster.
Run());
908 BOOST_REQUIRE_EQUAL(3U, results->
size());
909 for (
size_t query_idx = 0; query_idx < results->
size(); query_idx++) {
910 const string& warnings((*results)[query_idx].GetWarningStrings());
911 const string& errors((*results)[query_idx].GetErrorStrings());
915 oss <<
"Forced splitting of queries: ";
917 oss <<
"No splitting of queries: ";
920 if (query_idx == 0 || query_idx == 2) {
921 oss <<
" expected no warnings/errors, got errors='" << errors <<
"'; "
922 <<
"warnings='" << warnings <<
"'";
924 BOOST_CHECK_MESSAGE(
kEmptyStr == warnings, msg);
925 BOOST_CHECK_MESSAGE(
kEmptyStr == errors, msg);
928 <<
"; instead got '" << warnings <<
"'";
943 BOOST_REQUIRE(fnptr ==
NULL);
946 try { sav = blaster.
Run(); }
948 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
958 query_vec.push_back(*ql);
965 subj_vec.push_back(*sl);
977 CLocalBlast blaster(queries, tblastn_opts, db_adapter);
983 BOOST_CHECK(
result.HasAlignments());
985 BOOST_CHECK_EQUAL((
size_t)1, alignment->
Size());
988 BOOST_CHECK_EQUAL(
static_cast<int>(
eNa_strand_unknown),
static_cast<int>((*aln)->GetSeqStrand(0)));
990 BOOST_CHECK_EQUAL(
static_cast<int>(
eNa_strand_plus),
static_cast<int>((*aln)->GetSeqStrand(1)));
1003 BOOST_REQUIRE(fnptr ==
NULL);
1006 try { sav = blaster.
Run(); }
1008 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1019 BOOST_REQUIRE(fnptr ==
NULL);
1022 try { sav = blaster.
Run(); }
1024 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1037 BOOST_REQUIRE(fnptr ==
NULL);
1040 try { sav = blaster.
Run(); }
1042 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1055 BOOST_REQUIRE(fnptr ==
NULL);
1058 try { sav = blaster.
Run(); }
1060 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1065 CSeq_id query_id(
"gi|129295");
1073 BOOST_REQUIRE(fnptr ==
NULL);
1076 try { sav = blaster.
Run(); }
1078 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1082 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a))
1088 TIntId protein_gis[] = { 6, 129295, 15606659, 4336138, 5556 };
1089 TIntId nucl_gis[] = { 272208, 272217, 272211, 272247, 272227, 272236,
1092 vector<TIntId> q_gis, s_gis;
1093 if (query_is_nucl) {
1096 back_inserter(q_gis));
1098 copy(&protein_gis[0],
1100 back_inserter(q_gis));
1106 back_inserter(s_gis));
1108 copy(&protein_gis[0],
1110 back_inserter(s_gis));
1115 ITERATE(vector<TIntId>, itr, q_gis) {
1121 queries.push_back(
SSeqLoc(loc, scope));
1125 ITERATE(vector<TIntId>, itr, s_gis) {
1131 subjects.push_back(
SSeqLoc(loc, scope));
1143 int num_callbacks_executed(0);
1146 (
void*) &num_callbacks_executed);
1147 BOOST_REQUIRE(fnptr ==
NULL);
1151 int max_interrupt_callbacks =
r.GetRand(1, num_callbacks_executed);
1152 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1155 (
void*)&progress_pair);
1159 try { sav = blaster->Run(); }
1161 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1170 int num_callbacks_executed(0);
1173 (
void*)&num_callbacks_executed);
1174 BOOST_REQUIRE(fnptr ==
NULL);
1178 int max_interrupt_callbacks =
r.GetRand(1, num_callbacks_executed);
1179 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1182 (
void*)&progress_pair);
1186 try { sav = blaster->Run(); }
1188 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1198 int num_callbacks_executed(0);
1201 (
void*) & num_callbacks_executed);
1202 BOOST_REQUIRE(fnptr ==
NULL);
1206 int max_interrupt_callbacks =
r.GetRand(1, num_callbacks_executed);
1207 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1210 (
void*)&progress_pair);
1214 try { sav = blaster->Run(); }
1216 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1225 int num_callbacks_executed(0);
1228 (
void*)&num_callbacks_executed);
1229 BOOST_REQUIRE(fnptr ==
NULL);
1233 int max_interrupt_callbacks =
r.GetRand(1, num_callbacks_executed);
1234 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1237 (
void*)&progress_pair);
1241 try { sav = blaster->Run(); }
1243 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1252 int num_callbacks_executed(0);
1255 (
void*) & num_callbacks_executed);
1256 BOOST_REQUIRE(fnptr ==
NULL);
1260 int max_interrupt_callbacks =
r.GetRand(1, num_callbacks_executed);
1261 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1264 (
void*)&progress_pair);
1268 try { sav = blaster->Run(); }
1270 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1281 BOOST_REQUIRE(fnptr ==
NULL);
1284 try { sav = blaster.
Run(); }
1286 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1297 BOOST_REQUIRE(fnptr ==
NULL);
1300 try { sav = blaster->Run(); }
1302 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1312 BOOST_REQUIRE(fnptr ==
NULL);
1315 try { sav = blaster->Run(); }
1317 BOOST_REQUIRE_EQUAL((
size_t)0, sav.size());
1322 vector<TIntId> q_gis, s_gis;
1325 q_gis.push_back(129295);
1326 q_gis.push_back(-1);
1329 s_gis.push_back(129295);
1330 s_gis.push_back(4336138);
1333 ITERATE(vector<TIntId>, itr, q_gis) {
1339 queries.push_back(
SSeqLoc(loc, scope));
1343 ITERATE(vector<TIntId>, itr, s_gis) {
1349 subjects.push_back(
SSeqLoc(loc, scope));
1358 BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
1360 BOOST_REQUIRE(m[0].
empty());
1361 BOOST_REQUIRE(!m[1].
empty());
1365 BOOST_REQUIRE(qm.front()->GetMessage().find(
"Cannot resolve") !=
1370 BOOST_REQUIRE_EQUAL(0, (
int) sas_v[2]->Size());
1374 BOOST_REQUIRE_EQUAL(q_gis.size()*s_gis.size(), res->
GetNumResults());
1383 vector<TIntId> q_gis, s_gis;
1386 q_gis.push_back(296863684);
1387 q_gis.push_back(129295);
1390 s_gis.push_back(129296);
1393 ITERATE(vector<TIntId>, itr, q_gis) {
1399 queries.push_back(
SSeqLoc(loc, scope));
1403 ITERATE(vector<TIntId>, itr, s_gis) {
1409 subjects.push_back(
SSeqLoc(loc, scope));
1418 BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
1420 BOOST_REQUIRE(!m[0].
empty());
1421 BOOST_REQUIRE(m[1].
empty());
1430 const TSeqPos kFakeBioseqLength = 12;
1432 vector<char> na_data(kFakeBioseqLength/4,
byte);
1436 fake_bioseq->
SetInst().SetLength(kFakeBioseqLength);
1437 fake_bioseq->
SetInst().SetSeq_data().SetNcbi2na().Set().swap(na_data);
1440 fake_bioseq->
SetId().push_back(fake_id);
1446 unique_ptr<SSeqLoc> sl_bad(
new SSeqLoc(*fake_loc, *scope));
1449 BOOST_REQUIRE_EQUAL(kFakeBioseqLength,
len);
1452 queries.push_back(*sl1);
1453 queries.push_back(*sl_bad);
1454 queries.push_back(*sl2);
1458 unique_ptr<SSeqLoc> subj_loc
1461 subject.push_back(*subj_loc);;
1464 opts_handle->SetMaskAtHash(
false);
1467 sas_v = bl2seq.
Run();
1470 BOOST_REQUIRE_EQUAL(sas_v.size(), m.size());
1471 BOOST_REQUIRE_EQUAL(queries.size(), sas_v.size());
1473 BOOST_REQUIRE(m[0].
empty());
1474 BOOST_REQUIRE(!m[1].
empty());
1475 BOOST_REQUIRE(m[2].
empty());
1480 BOOST_REQUIRE(qm.size() == 1);
1483 BOOST_REQUIRE((*itr)->GetMessage().find(
"Could not calculate "
1484 "ungapped Karlin-Altschul "
1491 BOOST_REQUIRE((*alignments)->GetSegs().IsDisc());
1492 BOOST_REQUIRE((*alignments)->GetSegs().GetDisc().Get().empty());
1521 SSeqLoc subj_seqloc(ssl, scope);
1525 BOOST_REQUIRE_EQUAL(0U, sav[0]->
Get().
size());
1531 const bool is_protein(
false);
1547 subj_vec.push_back(
SSeqLoc(ssl, scope));
1554 size_t num_queries = seqs->Size();
1555 size_t num_subjects = subj_vec.size();
1556 BOOST_REQUIRE_EQUAL((
size_t)1, num_queries);
1557 BOOST_REQUIRE_EQUAL((
size_t)1, num_subjects);
1560 CLocalBlast blaster(queries, opts_handle, subjects);
1563 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
1565 BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->
size());
1567 BOOST_REQUIRE_EQUAL(num_subjects,
1582 vector<TIntId> q_gis;
1592 queries.push_back(
SSeqLoc(loc, scope));
1595 BOOST_REQUIRE_THROW(
CLocalBlast blaster(query_fact, opts, subject_adapter),
1606 const string kFileName(
"data/blastp_compstats.fa");
1609 throw runtime_error(
"Failed to open " +
kFileName);
1611 throw runtime_error(
"Failed to read sequence from " +
kFileName);
1614 const string kSeqIdString1(
"lcl|1");
1623 opts_handle->SetWordSize(2);
1624 opts_handle->SetEvalueThreshold(20000);
1625 opts_handle->SetFilterString(
"F");
1626 opts_handle->SetMatrixName(
"PAM30");
1627 opts_handle->SetGapOpeningCost(9);
1628 opts_handle->SetGapExtensionCost(1);
1629 opts_handle->SetOptions().SetCompositionBasedStats(
1632 CBl2Seq blaster(ss1, *ss2, *opts_handle);
1640 unique_ptr<SSeqLoc>
query(
1642 query->genetic_code_id = 1;
1651 BOOST_REQUIRE_EQUAL(1, (
int)sar->
GetSegs().
GetStd().size());
1662 mask_vector.push_back(
TSeqRange(0, 44));
1663 mask_vector.push_back(
TSeqRange(69, 582));
1664 mask_vector.push_back(
TSeqRange(610, 834));
1665 mask_vector.push_back(
TSeqRange(854, 1000));
1676 BOOST_REQUIRE_EQUAL((
size_t)1, sav.front()->Get().size());
1684 BOOST_REQUIRE_EQUAL((
size_t)4, sav.front()->Get().size());
1690 unique_ptr<SSeqLoc> sl(
1695 CBl2Seq blaster(*sl, *sl, *opts);
1705 sav = blaster.
Run();
1706 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
1707 sar = *(sav[0]->Get().begin());
1715 sav = blaster.
Run();
1716 sar = *(sav[0]->Get().begin());
1723 CSeq_id query_id(
"gi|2655203");
1724 unique_ptr<SSeqLoc> ql(
1728 CSeq_id subject_id(
"gi|200811");
1729 unique_ptr<SSeqLoc> sl(
1737 opts->SetTraditionalMegablastDefaults();
1738 opts->SetMatchReward(1);
1739 opts->SetMismatchPenalty(-2);
1740 opts->SetGapOpeningCost(3);
1741 opts->SetGapExtensionCost(1);
1742 opts->SetWordSize(24);
1746 CBl2Seq blaster(*ql, *sl, *opts);
1750 BOOST_REQUIRE_EQUAL(1U, res->
size());
1753 BOOST_REQUIRE_EQUAL(1U, sas->
Get().size());
1757 BOOST_REQUIRE_EQUAL(832, score);
1760 BOOST_REQUIRE_EQUAL(1U, alignments.size());
1761 sa = alignments[0]->Get().front();
1763 BOOST_REQUIRE_EQUAL(832, score);
1772 ifstream in1(
"data/greedy1a.fsa");
1774 throw runtime_error(
"Failed to open file1");
1776 throw runtime_error(
"Failed to read sequence from file1");
1779 const string kSeqIdString1(
"lcl|1");
1785 ifstream in2(
"data/greedy1b.fsa");
1787 throw runtime_error(
"Failed to open file2");
1789 throw runtime_error(
"Failed to read sequence from file2");
1792 const string kSeqIdString2(
"lcl|2");
1798 handle->SetGapOpeningCost(0);
1799 handle->SetGapExtensionCost(0);
1800 handle->SetDustFiltering(
false);
1804 CBl2Seq blaster1(ss1, ss2, *handle);
1807 BOOST_REQUIRE_EQUAL(1, (
int)sav[0]->Size());
1812 BOOST_REQUIRE((*itr)->IsSetId());
1813 if ((*itr)->GetId().GetStr() ==
"score") {
1814 BOOST_REQUIRE_EQUAL(619, (*itr)->GetValue().GetInt());
1819 handle->SetMatchReward(10);
1820 handle->SetMismatchPenalty(-25);
1821 handle->SetGapXDropoff(100.0);
1822 handle->SetGapXDropoffFinal(100.0);
1824 CBl2Seq blaster2(ss1, ss2, *handle);
1825 sav = blaster2.
Run();
1826 sar = *(sav[0]->Get().begin());
1827 BOOST_REQUIRE_EQUAL(1, (
int)sav[0]->Size());
1832 BOOST_REQUIRE((*itr)->IsSetId());
1833 if ((*itr)->GetId().GetStr() ==
"score") {
1834 BOOST_REQUIRE_EQUAL(6035, (*itr)->GetValue().GetInt());
1843 CSeq_id query_id(
"gi|56384585");
1844 unique_ptr<SSeqLoc> ql(
1848 CSeq_id subject_id(
"gi|56384585");
1849 unique_ptr<SSeqLoc> sl(
1854 opts->SetTraditionalMegablastDefaults();
1855 opts->SetMatchReward(1);
1856 opts->SetMismatchPenalty(-2);
1857 opts->SetGapOpeningCost(0);
1858 opts->SetGapExtensionCost(0);
1859 opts->SetWordSize(28);
1863 CBl2Seq blaster(*ql, *sl, *opts);
1867 BOOST_REQUIRE_EQUAL(1U, res->
size());
1873 BOOST_REQUIRE_EQUAL(3794584, score);
1878 unique_ptr<SSeqLoc>
query(
1887 BOOST_REQUIRE_EQUAL(1, (
int)sar->
GetSegs().
GetStd().size());
1892 unique_ptr<SSeqLoc>
query(
1901 BOOST_REQUIRE(sav[0]->IsEmpty() ==
true);
1907 unique_ptr<SSeqLoc> sl(
1913 BOOST_REQUIRE_EQUAL(39, (
int)sar->
GetSegs().
GetStd().size());
1920 unique_ptr<SSeqLoc> sl(
1926 BOOST_REQUIRE_EQUAL(11, (
int)sar->
GetSegs().
GetStd().size());
1931 unique_ptr<SSeqLoc> sl(
1937 BOOST_REQUIRE_EQUAL(12, (
int)sar->
GetSegs().
GetStd().size());
1942 const int total_num_hsps = 49;
1943 const int num_hsps_to_check = 8;
1944 const int score_array[num_hsps_to_check] =
1945 { 947, 125, 820, 113, 624, 221, 39, 778};
1946 const int sum_n_array[num_hsps_to_check] =
1947 { 2, 2, 2, 2, 3, 3, 3, 0};
1949 unique_ptr<SSeqLoc> qsl(
1952 pair<TSeqPos, TSeqPos>
range(15185000, 15195000);
1953 unique_ptr<SSeqLoc> ssl(
1964 list< CRef<CStd_seg> >& segs = sar->
SetSegs().SetStd();
1965 BOOST_REQUIRE_EQUAL(total_num_hsps, (
int)segs.size());
1968 const vector< CRef< CScore > >& score_v = (*itr)->GetScores();
1970 BOOST_REQUIRE((*sitr)->IsSetId());
1971 if ((*sitr)->GetId().GetStr() ==
"score") {
1972 BOOST_REQUIRE_EQUAL(score_array[index],
1973 (*sitr)->GetValue().GetInt());
1974 }
else if ((*sitr)->GetId().GetStr() ==
"sum_n") {
1975 BOOST_REQUIRE_EQUAL(sum_n_array[index],
1976 (*sitr)->GetValue().GetInt());
1979 if (++index == num_hsps_to_check)
2001 unique_ptr<SSeqLoc>
query(
2006 const string kRepeatDb(
"junk");
2009 BOOST_REQUIRE(is_repeat_filtering_on);
2013 BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
2022 unique_ptr<SSeqLoc>
query(
2026 opts->SetTraditionalMegablastDefaults();
2027 opts->SetRepeatFiltering(
true);
2028 string repeat_db(opts->GetRepeatFilteringDB()
2029 ? opts->GetRepeatFilteringDB()
2033 const string kRepeatDb(
"repeat/repeat_9606");
2034 opts->SetRepeatFilteringDB(kRepeatDb.c_str());
2035 repeat_db.assign(opts->GetRepeatFilteringDB()
2036 ? opts->GetRepeatFilteringDB()
2038 BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
2040 bool is_repeat_filtering_on = opts->GetRepeatFiltering();
2041 BOOST_REQUIRE(is_repeat_filtering_on);
2052 unique_ptr<SSeqLoc>
query(
2056 opts->SetTraditionalMegablastDefaults();
2058 opts->SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
2059 string wmdb(opts->GetWindowMaskerDatabase()
2060 ? opts->GetWindowMaskerDatabase() :
kEmptyStr);
2061 BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
2062 BOOST_REQUIRE_EQUAL(0, opts->GetWindowMaskerTaxId());
2072 unique_ptr<SSeqLoc>
query(
2076 opts->SetTraditionalMegablastDefaults();
2077 opts->SetWindowMaskerTaxId(9606);
2087 unique_ptr<SSeqLoc>
query(
2092 const string kWindowMaskerDb(
"Dummydb");
2096 BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
2104 unique_ptr<SSeqLoc>
query(
2109 const int kInvalidTaxId = -1;
2119 BOOST_CHECK_EQUAL(1, rv);
2121 BOOST_CHECK_EQUAL(1, rv);
2126 BOOST_CHECK_EQUAL(0, rv);
2132 unique_ptr<SSeqLoc>
query(
2136 opts->SetTraditionalMegablastDefaults();
2139 opts->SetWindowMaskerTaxId(-1);
2158 unique_ptr<SSeqLoc>
query(
2163 unique_ptr<SSeqLoc> subj(
2167 opts->SetTraditionalMegablastDefaults();
2168 opts->SetRepeatFiltering(
true);
2178 unique_ptr<SSeqLoc>
query(
2182 unique_ptr<SSeqLoc> subj(
2186 opts->SetTraditionalBlastnDefaults();
2210 sav = blaster.
Run();
2211 sar = *(sav[0]->Get().begin());
2230 sav = blaster.
Run();
2231 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
2232 sar = *(sav[0]->Get().begin());
2238 unique_ptr<SSeqLoc>
query(
2242 unique_ptr<SSeqLoc> subj(
2247 opts->SetTraditionalBlastnDefaults();
2248 CBl2Seq blaster(*subj, *subj, *opts);
2255 sav = blaster.
Run();
2256 BOOST_REQUIRE_EQUAL(2, (
int)sav[0]->Size());
2257 sar = *(sav[0]->Get().begin());
2263 unique_ptr<SSeqLoc>
query(
2267 unique_ptr<SSeqLoc> subj(
2272 opts->SetTraditionalBlastnDefaults();
2280 sav = blaster.
Run();
2281 sar = *(sav[0]->Get().begin());
2291 sequences.push_back(*sl1);
2295 sequences.push_back(*sl2);
2306 BOOST_REQUIRE_EQUAL(4, (
int)seqalign_v.size());
2307 BOOST_REQUIRE_EQUAL(2, (
int)sequences.size());
2311 BOOST_REQUIRE_EQUAL(1U, seqalign_v[0]->
Get().
size());
2312 sar = *(seqalign_v[0]->Get().begin());
2315 BOOST_REQUIRE_EQUAL((
size_t)2, seqalign_v[1]->
Get().
size());
2316 sar = *(seqalign_v[1]->Get().begin());
2318 sar = *(++(seqalign_v[1]->Get().begin()));
2321 BOOST_REQUIRE_EQUAL((
size_t)3, seqalign_v[2]->
Get().
size());
2322 sar = *(seqalign_v[2]->Get().begin());
2324 sar = *(++(seqalign_v[2]->Get().begin()));
2327 BOOST_REQUIRE_EQUAL((
size_t)1, seqalign_v[3]->
Get().
size());
2328 sar = *(seqalign_v[3]->Get().begin());
2344 unique_ptr<SSeqLoc> sl1(
2346 sequences.push_back(*sl1);
2347 BOOST_REQUIRE(sl1->mask.Empty());
2350 unique_ptr<SSeqLoc> sl2(
2352 sequences.push_back(*sl2);
2353 BOOST_REQUIRE(sl2->mask.Empty());
2357 BOOST_REQUIRE_EQUAL(2, (
int)sequences.size());
2358 BOOST_REQUIRE_EQUAL(4, (
int)seqalign_v.size());
2364 sar = *(seqalign_v[2]->Get().begin());
2376 void DoSearchWordSize4(
const char *file1,
const char *file2) {
2381 ifstream in1(file1);
2383 throw runtime_error(
"Failed to open file1");
2385 throw runtime_error(
"Failed to read sequence from file1");
2386 scope->AddTopLevelSeqEntry(*seq_entry1);
2388 const string kSeqIdString1(
"lcl|1");
2390 seqloc1->SetWhole(*id1);
2394 ifstream in2(file2);
2396 throw runtime_error(
"Failed to open file2");
2398 throw runtime_error(
"Failed to read sequence from file2");
2399 scope->AddTopLevelSeqEntry(*seq_entry2);
2401 const string kSeqIdString2(
"lcl|2");
2403 seqloc2->SetWhole(*id2);
2414 CBl2Seq blaster(ss1, ss2, handle);
2415 blaster.RunWithoutSeqalignGeneration();
2417 BOOST_REQUIRE(results !=
NULL);
2421 BOOST_REQUIRE(hsp_list->
hspcnt > 0);
2427 for (
int i = 0;
i < hsp_list->
hspcnt;
i++) {
2429 BOOST_REQUIRE(hsp !=
NULL);
2444 BOOST_REQUIRE_EQUAL(1, res->
size());
2446 BOOST_REQUIRE(
r.HasAlignments());
2453 DoSearchWordSize4(
"data/blastn_size4a.fsa",
2454 "data/blastn_size4b.fsa");
2460 DoSearchWordSize4(
"data/blastn_size4c.fsa",
2461 "data/blastn_size4d.fsa");
2474 opts->SetOutOfFrameMode();
2475 opts->SetFrameShiftPenalty(10);
2476 opts->SetFilterString(
"m;L");
2477 opts->SetEvalueThreshold(0.01);
2482 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
2484 BOOST_REQUIRE_EQUAL(2, (
int)sav[0]->Size());
2499 opts->SetOutOfFrameMode();
2500 opts->SetFrameShiftPenalty(5);
2502 opts->SetFilterString(
"L");
2507 BOOST_REQUIRE_EQUAL(3, (
int)sav[0]->Size());
2514 BOOST_REQUIRE((*itr)->IsSetId());
2515 if ((*itr)->GetId().GetStr() ==
"num_ident") {
2516 BOOST_REQUIRE_EQUAL(80, (*itr)->GetValue().GetInt());
2526 unique_ptr<SSeqLoc>
query(
2532 opts->SetOutOfFrameMode();
2533 opts->SetFrameShiftPenalty(10);
2535 opts->SetFilterString(
"m;L");
2536 opts->SetEvalueThreshold(0.01);
2541 BOOST_REQUIRE_EQUAL(2, (
int)sav[0]->Size());
2552 unique_ptr<SSeqLoc>
query(
2558 opts->SetOutOfFrameMode();
2559 opts->SetFrameShiftPenalty(10);
2560 opts->SetFilterString(
"L");
2565 BOOST_REQUIRE_EQUAL(5, (
int)sav[0]->Size());
2572 string aligned_strands) {
2575 vector< pair<TSignedSeqPos, TSignedSeqPos> > starts;
2576 starts.push_back(make_pair(7685759, 10));
2577 starts.push_back(make_pair(7685758, -1));
2578 starts.push_back(make_pair(7685718, 269));
2579 starts.push_back(make_pair(7685717, -1));
2580 starts.push_back(make_pair(7685545, 309));
2582 const size_t kNumSegments(starts.size());
2585 vector<TSeqPos> lengths;
2586 lengths.reserve(kNumSegments);
2587 lengths.push_back(259);
2588 lengths.push_back(1);
2589 lengths.push_back(40);
2590 lengths.push_back(1);
2591 lengths.push_back(172);
2594 typedef vector< pair<ENa_strand, ENa_strand> > TStrandPairs;
2595 TStrandPairs strands(kNumSegments,
2599 if (aligned_strands ==
"plus-minus") {
2600 reverse(starts.begin(), starts.end());
2601 reverse(lengths.begin(), lengths.end());
2603 swap(itr->first, itr->second);
2606 BOOST_REQUIRE_EQUAL(kNumSegments, lengths.size());
2607 BOOST_REQUIRE_EQUAL(kNumSegments, strands.size());
2611 BOOST_REQUIRE_EQUAL(1, (
int)sav[0]->Size());
2615 const size_t kNumDim(ds.
GetDim());
2616 vector< TSignedSeqPos > seg_starts = ds.
GetStarts();
2617 vector< TSeqPos> seg_lengths = ds.
GetLens();
2618 vector< ENa_strand> seg_strands = ds.
GetStrands();
2619 BOOST_REQUIRE_EQUAL(kNumSegments, seg_lengths.size());
2620 BOOST_REQUIRE_EQUAL(kNumSegments*kNumDim, seg_starts.size());
2623 for (
size_t index = 0; index < kNumSegments; ++index) {
2625 os <<
"Segment " << index <<
": expected " << lengths[index]
2626 <<
" actual " << seg_lengths[index];
2627 BOOST_REQUIRE_MESSAGE(lengths[index] == seg_lengths[index],
2631 os <<
"Segment " << index <<
": expected " << starts[index].first
2632 <<
" actual " << seg_starts[2*index];
2633 BOOST_REQUIRE_MESSAGE(starts[index].
first == seg_starts[2*index],
2636 os <<
"Segment " << index <<
": expected " << starts[index].second
2637 <<
" actual " << seg_starts[2*index];
2638 BOOST_REQUIRE_MESSAGE(starts[index].second == seg_starts[2*index+1],
2641 os <<
"Segment " << index <<
": expected " << (
int)strands[index].
first
2642 <<
" actual " << (
int)seg_strands[2*index];
2643 BOOST_REQUIRE_MESSAGE(strands[index].
first == seg_strands[2*index],
2646 os <<
"Segment " << index <<
": expected " << (
int)strands[index].second
2647 <<
" actual " << (
int)seg_strands[2*index];
2648 BOOST_REQUIRE_MESSAGE(strands[index].second == seg_strands[2*index+1],
2655 const int num_segs = 5;
2656 const int num_starts = 10;
2657 const int starts[num_starts] = { 7685759, 0, 7685758, -1, 7685718,
2658 269, 7685717, -1, 7685545, 309 };
2659 const int lengths[num_segs] = { 269, 1, 40, 1, 172 };
2663 BOOST_REQUIRE_EQUAL(1, (
int)sav[0]->Size());
2665 vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
2666 vector< TSeqPos> seg_lengths = segs_itr->GetLens();
2667 vector< ENa_strand> seg_strands = segs_itr->GetStrands();
2668 BOOST_REQUIRE_EQUAL(num_segs, (
int)seg_lengths.size());
2669 BOOST_REQUIRE_EQUAL(num_starts, (
int)seg_starts.size());
2670 for (index = 0; index < num_segs; ++index) {
2671 BOOST_REQUIRE_EQUAL(lengths[index], (
int)seg_lengths[index]);
2672 BOOST_REQUIRE_EQUAL(starts[2*index], (
int)seg_starts[2*index]);
2673 BOOST_REQUIRE_EQUAL(starts[2*index+1], (
int)seg_starts[2*index+1]);
2681 const int num_segs = 5;
2682 const int num_starts = 10;
2683 const int starts[num_starts] = { 309, 7685545, -1, 7685717, 269, 7685718,
2684 -1, 7685758, 0, 7685759 };
2685 const int lengths[num_segs] = { 172, 1, 40, 1, 269 };
2689 BOOST_REQUIRE_EQUAL(1, (
int)sav[0]->Size());
2691 vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
2692 vector< TSeqPos> seg_lengths = segs_itr->GetLens();
2693 vector< ENa_strand> seg_strands = segs_itr->GetStrands();
2694 BOOST_REQUIRE_EQUAL(num_segs, (
int)seg_lengths.size());
2695 BOOST_REQUIRE_EQUAL(num_starts, (
int)seg_starts.size());
2696 for (index = 0; index < num_segs; ++index) {
2697 BOOST_REQUIRE_EQUAL(lengths[index], (
int)seg_lengths[index]);
2698 BOOST_REQUIRE_EQUAL(starts[2*index], (
int)seg_starts[2*index]);
2699 BOOST_REQUIRE_EQUAL(starts[2*index+1], (
int)seg_starts[2*index+1]);
2709 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2710 unique_ptr<SSeqLoc>
query(
2717 unique_ptr<SSeqLoc> subj(
2732 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2733 unique_ptr<SSeqLoc>
query(
2740 unique_ptr<SSeqLoc> subj(
2745 opts->SetTraditionalBlastnDefaults();
2755 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2756 unique_ptr<SSeqLoc>
query(
2763 unique_ptr<SSeqLoc> subj(
2768 opts->SetTraditionalBlastnDefaults();
2778 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2779 unique_ptr<SSeqLoc>
query(
2786 unique_ptr<SSeqLoc> subj(
2791 opts->SetTraditionalBlastnDefaults();
2794 BOOST_REQUIRE(sav[0]->IsEmpty() ==
true);
2801 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2802 unique_ptr<SSeqLoc>
query(
2809 unique_ptr<SSeqLoc> subj(
2814 opts->SetTraditionalBlastnDefaults();
2825 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2826 unique_ptr<SSeqLoc>
query(
2833 unique_ptr<SSeqLoc> subj(
2838 opts->SetTraditionalBlastnDefaults();
2841 BOOST_REQUIRE(sav[0]->IsEmpty() ==
true);
2848 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2849 unique_ptr<SSeqLoc>
query(
2856 unique_ptr<SSeqLoc> subj(
2861 opts->SetTraditionalBlastnDefaults();
2864 BOOST_REQUIRE(sav[0]->IsEmpty() ==
true);
2871 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2872 unique_ptr<SSeqLoc>
query(
2879 unique_ptr<SSeqLoc> subj(
2884 opts->SetTraditionalBlastnDefaults();
2894 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2895 unique_ptr<SSeqLoc>
query(
2902 unique_ptr<SSeqLoc> subj(
2907 opts->SetTraditionalBlastnDefaults();
2920 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2921 unique_ptr<SSeqLoc> subj(
2926 opts->SetTraditionalBlastnDefaults();
2935 pair<TSeqPos, TSeqPos>
range(7685545, 7686027);
2936 unique_ptr<SSeqLoc>
query(
2944 opts->SetTraditionalBlastnDefaults();
2951 vector<TIntId> q_gis, s_gis;
2955 q_gis.push_back(129295);
2956 q_gis.push_back(15606659);
2959 s_gis.push_back(129295);
2961 s_gis.push_back(4336138);
2962 s_gis.push_back(15606659);
2963 s_gis.push_back(5556);
2966 ITERATE(vector<TIntId>, itr, q_gis) {
2972 queries.push_back(
SSeqLoc(loc, scope));
2976 ITERATE(vector<TIntId>, itr, s_gis) {
2982 subjects.push_back(
SSeqLoc(loc, scope));
2985 size_t num_queries = queries.size();
2986 size_t num_subjects = subjects.size();
2991 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
3001 vector<TIntId> q_gis, s_gis;
3005 q_gis.push_back(129295);
3006 q_gis.push_back(15606659);
3009 s_gis.push_back(129295);
3011 s_gis.push_back(4336138);
3012 s_gis.push_back(15606659);
3013 s_gis.push_back(5556);
3016 ITERATE(vector<TIntId>, itr, q_gis) {
3022 queries.push_back(
SSeqLoc(loc, scope));
3026 ITERATE(vector<TIntId>, itr, s_gis) {
3032 subjects.push_back(
SSeqLoc(loc, scope));
3035 size_t num_queries = queries.size();
3036 size_t num_subjects = subjects.size();
3042 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
3047 for (
size_t i = 0;
i < num_queries;
i++)
3049 for (
size_t j = 0; j < num_subjects; j++)
3055 sas_v.push_back(aln_set);
3062 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
3073 vector<TIntId> q_gis, s_gis;
3077 q_gis.push_back(129295);
3078 q_gis.push_back(15606659);
3081 s_gis.push_back(129295);
3083 s_gis.push_back(4336138);
3084 s_gis.push_back(15606659);
3085 s_gis.push_back(5556);
3088 ITERATE(vector<TIntId>, itr, q_gis) {
3094 query_vec.push_back(
SSeqLoc(loc, scope));
3102 ITERATE(vector<TIntId>, itr, s_gis) {
3108 subj_vec.push_back(
SSeqLoc(loc, scope));
3116 size_t num_queries = query_vec.size();
3117 size_t num_subjects = subj_vec.size();
3120 CLocalBlast blaster(queries, opts_handle, subjects);
3123 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
3125 BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->
size());
3127 BOOST_REQUIRE_EQUAL(num_subjects,
3132 for (
size_t i = 0;
i < num_queries;
i++)
3134 for (
size_t j = 0; j < num_subjects; j++)
3140 sas_v.push_back(aln_set);
3146 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
3153 vector<TIntId> q_gis, s_gis;
3156 q_gis.push_back(816838863);
3159 s_gis.push_back(16130156);
3160 s_gis.push_back(15644111);
3161 s_gis.push_back(126699345);
3162 s_gis.push_back(504220075);
3163 s_gis.push_back(21222553);
3164 s_gis.push_back(24376189);
3165 s_gis.push_back(15598078);
3166 s_gis.push_back(15599919);
3167 s_gis.push_back(15597767);
3168 s_gis.push_back(16131833);
3169 s_gis.push_back(15599742);
3170 s_gis.push_back(15598387);
3171 s_gis.push_back(15600358);
3172 s_gis.push_back(24375949);
3173 s_gis.push_back(126698248);
3174 s_gis.push_back(24375956);
3175 s_gis.push_back(24375382);
3176 s_gis.push_back(126698598);
3179 ITERATE(vector<TIntId>, itr, q_gis) {
3185 query_vec.push_back(
SSeqLoc(loc, scope));
3193 ITERATE(vector<TIntId>, itr, s_gis) {
3199 subj_vec.push_back(
SSeqLoc(loc, scope));
3203 opts_handle,
true));
3207 size_t num_queries = query_vec.size();
3208 size_t num_subjects = subj_vec.size();
3211 CLocalBlast blaster(queries, opts_handle, subjects);
3214 BOOST_REQUIRE_EQUAL(num_queries,
3216 BOOST_REQUIRE_EQUAL(num_queries, results->
size());
3225 BOOST_REQUIRE_EQUAL(num_subjects+1, aln_set->
Size());
3231 unique_ptr<CBlastOptionsHandle> megablast_options_handle(
3234 BOOST_REQUIRE(megablast_options_handle->GetOptions() ==
3241 BOOST_REQUIRE(prot_options_handle.
GetOptions() !=
3245 unique_ptr<CBlastOptionsHandle> blastn_options_handle(
3247 BOOST_REQUIRE(blastn_options_handle->GetOptions() !=
3253 BOOST_REQUIRE(prot_options_handle.
GetOptions() !=
3260 unique_ptr<SSeqLoc>
query(
3266 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
3277 pair<TSeqPos, TSeqPos> qrange(7868209-1, 7868602-1);
3278 pair<TSeqPos, TSeqPos> srange(2-1, 397-1);
3279 unique_ptr<SSeqLoc>
query(
3282 unique_ptr<SSeqLoc> subj(
3289 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
3301 const int num_subjects = 15;
3302 const int results_size[num_subjects] =
3303 { 1, 1, 0, 1, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 1 };
3304 const int query_gi = 7274302;
3305 const int gi_diff = 28;
3307 unique_ptr<SSeqLoc> sl(
3310 query.push_back(*sl);
3313 for (index = 0; index < num_subjects; ++index) {
3318 subjects.push_back(*sl);
3322 BOOST_REQUIRE_EQUAL(num_subjects, (
int)seqalign_v.size());
3328 BOOST_REQUIRE_EQUAL(results_size[index], (
int) (*itr)->Get().size());
3335 const size_t num_seqs = 19;
3336 const int gi_list[num_seqs] =
3337 { 1346057, 125527, 121064, 1711551, 125412, 128337, 2507199,
3338 1170625, 1730070, 585365, 140977, 1730069, 20455504, 125206,
3339 125319, 114152, 1706450, 1706307, 125565 };
3340 const int score_cutoff = 70;
3344 for (index = 0; index < num_seqs; ++index) {
3346 unique_ptr<SSeqLoc> sl(
3348 seq_vec.push_back(*sl);
3352 prot_opts->SetSegFiltering(
false);
3353 prot_opts->SetCutoffScore(score_cutoff);
3354 CBl2Seq blaster(seq_vec, seq_vec, *prot_opts);
3356 blaster.RunWithoutSeqalignGeneration();
3359 int qindex, sindex, qindex1, sindex1;
3360 for (qindex = 0; qindex < num_seqs; ++qindex) {
3365 qindex1 = hsp_list1->
oid;
3367 for (sindex1 = 0; sindex1 < hitlist->
hsplist_count; ++sindex1) {
3373 BOOST_REQUIRE(hsp_list2 !=
NULL);
3375 for (hindex = 0; hindex < hsp_list1->
hspcnt; ++hindex) {
3378 BOOST_REQUIRE(hindex < hsp_list2->hspcnt);
3389 BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, res->
GetNumResults());
3390 BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, res->
size());
3400 BOOST_REQUIRE_EQUAL((size_type)1, sas->
Get().size());
3404 BOOST_REQUIRE(qid.
Match(sid));
3408 &*seq_vec[q].scope);
3409 BOOST_REQUIRE_EQUAL(seqlen, num_ident);
3425 BOOST_REQUIRE_EQUAL(sas1->
Get().size(), sas2->
Get().size());
3426 CSeq_align_set::Tdata::const_iterator i1 = sas1->
Get().begin(),
3427 i2 = sas2->
Get().begin();
3428 for (; i1 != sas1->
Get().end(); ++i1, ++i2) {
3436 int score1 = 0, score2 = 0;
3439 BOOST_REQUIRE_EQUAL(score1, score2);
3441 double bit_score1 = .0, bit_score2 = .0;
3444 BOOST_REQUIRE_CLOSE(bit_score1, bit_score2, 10e-2);
3446 double evalue1 = .0, evalue2 = .0;
3449 BOOST_REQUIRE_CLOSE(evalue1, evalue2, 10e-5);
3457 BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, alignments.size());
3458 for (
size_t q = 0; q < num_seqs; q++) {
3459 for (
size_t s = 0; s < num_seqs; s++) {
3460 size_t idx1 = q*num_seqs + s;
3462 if (idx1 == (q*num_seqs+q)) {
3464 BOOST_REQUIRE_EQUAL(1U, al1->
Get().size());
3468 BOOST_REQUIRE(qid.
Match(sid));
3472 &*seq_vec[q].scope);
3473 BOOST_REQUIRE_EQUAL(seqlen, num_ident);
3476 size_t idx2 = s*num_seqs + q;
3479 BOOST_REQUIRE_EQUAL(al1.
Empty(), al2.
Empty());
3492 int score1 = 0, score2 = 0;
3495 BOOST_REQUIRE_EQUAL(score1, score2);
3497 double bit_score1 = .0, bit_score2 = .0;
3500 BOOST_REQUIRE_CLOSE(bit_score1, bit_score2, 10e-2);
3502 double evalue1 = .0, evalue2 = .0;
3505 BOOST_REQUIRE_CLOSE(evalue1, evalue2, 10e-5);
3519 BOOST_REQUIRE(fnptr ==
NULL);
3527 BOOST_REQUIRE(fnptr == null_fnptr);
3534 sav = blaster.
Run();
3535 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
3536 sar = *(sav[0]->Get().begin());
3546 BOOST_REQUIRE(fnptr ==
NULL);
3549 BOOST_REQUIRE_EQUAL(1, (
int)sav.size());
3554 #if SEQLOC_MIX_QUERY_OK
3556 const size_t kNumInts = 20;
3557 const size_t kStarts[kNumInts] =
3558 { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
3559 20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
3560 31786, 33315, 35402 };
3561 const size_t kEnds[kNumInts] =
3562 { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
3563 20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
3564 31911, 33485, 37952 };
3569 for (index = 0; index < kNumInts; ++index) {
3571 next_loc->SetInt().SetFrom(kStarts[index]);
3572 next_loc->SetInt().SetTo(kEnds[index]);
3573 next_loc->SetInt().SetId(qid);
3574 qloc->SetMix().Set().push_back(next_loc);
3578 scope->AddDefaults();
3583 pair<TSeqPos, TSeqPos>
range(15595732, 15705419);
3597 pair<TSeqPos, TSeqPos>
range(662070, 662129);
3609 BOOST_REQUIRE(sas->
Get().empty());
User-defined methods of the data storage class.
static const char * kFileName
Declares the CBl2Seq (BLAST 2 Sequences) class.
void testBlastHitCounts(CBl2Seq &blaster, EBl2seqTest test_id)
CRef< CSeq_loc > s_MakePackedInt2(CRef< CSeq_id > id, vector< TSeqRange > &range_vec)
static CRef< CBl2Seq > s_SetupWithMultipleQueriesAndSubjects(bool query_is_nucl, bool subj_is_nucl, EProgram program)
Boolean do_not_interrupt(SBlastProgress *)
Returns false so that the processing never stops in spite of a callback function to interrupt the pro...
BOOST_AUTO_TEST_CASE_TIMEOUT(testInterruptBlastSetup, 3)
Boolean interrupt_after3calls(SBlastProgress *)
The interruption occurs after 3 invokations of this callback.
BOOST_AUTO_TEST_CASE(NucleotideMultipleSeqLocs1)
Boolean callback_counter(SBlastProgress *progress_info)
This callback never interrupts the BLAST search, its only purpose is to count the number of times thi...
Boolean interrupt_immediately(SBlastProgress *)
Returns true so that the processing stops upon the first invocation of this callback.
static void testWholeIntervalAlignment(TSeqAlignVector &sav)
void testRawCutoffs(CBl2Seq &blaster, EProgram program, EBl2seqTest test_id)
static void testIntervalWholeAlignment(TSeqAlignVector &sav)
Boolean interrupt_at_random(SBlastProgress *progress_info)
This callback interrupts the BLAST search after the callback has been executed the requested number o...
Boolean interrupt_on_traceback(SBlastProgress *progress_info)
The interruption occurs after starting the traceback stage.
void testResultAlignments(size_t num_queries, size_t num_subjects, TSeqAlignVector result_alnvec)
void x_TestAlignmentQuerySubjStrandCombinations(TSeqAlignVector &sav, string aligned_strands)
@ eDiscMegablast_U02544_U61969
@ eTblastn_129295_555_large_word
Boolean(* TInterruptFnPtr)(SBlastProgress *progress_info)
Prototype for function pointer to determine whether the BLAST search should proceed or be interrupted...
@ eTracebackSearch
Traceback stage.
const char * kBlastErrMsg_CantCalculateUngappedKAParams
Declares the CBlastNucleotideOptionsHandle class.
@ eGreedyScoreOnly
Greedy extension (megaBlast)
@ eGreedyTbck
Greedy extension (megaBlast)
#define kDefaultRepeatFilterDb
Default value for repeats database filtering.
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eDatabaseSearch
Seq-aligns in the style of a database search.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
@ eBlastx
Translated nucl-Protein.
Declares the CBlastxOptionsHandle class.
CAutoEnvironmentVariable –.
Runs the BLAST algorithm between 2 sequences.
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Handle to the protein-protein options to the BLAST algorithm.
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Handle to the translated nucleotide-protein options to the BLAST algorithm.
Handle to the nucleotide-nucleotide options to the discontiguous BLAST algorithm.
Base class for reading FASTA sequences.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
NCBI C++ Object Manager dependant implementation of IQueryFactory.
vector< CRange< TSeqPos > > TRanges
Search Results for One Query.
Tdata::size_type Size() const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetSeqStart(TDim row) const
static CRef< CScope > NewScope(bool with_defaults=true)
Return a new scope, possibly (by default) with default loaders, which will include the Genbank loader...
Handle to the protein-translated nucleotide options to the BLAST algorithm.
static CTestObjMgr & Instance()
Template class for iteration on objects of class C.
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
static const string kMismatch
Declares the CDiscNucleotideOptionsHandle class.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Operators to edit gaps in sequences.
void SetCompositionBasedStats(ECompoAdjustModes mode)
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetWindowSize(int w)
void SetSubject(const SSeqLoc &subject)
Set the subject sequence.
string WindowMaskerTaxidToDb(int taxid)
Get the windowmasker file path for a given taxid.
void GetMessages(TSearchMessages &messages) const
Returns error messages/warnings.
void SetMatchReward(int r)
Sets MatchReward.
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
void SetWindowMaskerTaxId(int taxid)
Enable window masker and select a taxid (or 0 to disable).
CRef< CSearchResultSet > Run()
Executes the search.
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, void *user_data=NULL)
Set a function callback to be invoked by the CORE of BLAST to allow interrupting a BLAST search in pr...
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
void SetWordThreshold(double w)
Sets WordThreshold.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
const char * GetWindowMaskerDatabase() const
Get the window masker database name (or NULL if not set).
size_type GetNumQueries() const
Return the number of unique query ID's represented by this object.
void SetTraditionalBlastnDefaults()
Sets TraditionalBlastnDefaults.
void SetMismatchPenalty(int p)
Sets MismatchPenalty.
void WindowMaskerPathReset()
Resets the path to the windowmasker data files.
void SetRepeatFilteringDB(const char *db)
Enable repeat filtering.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
size_type size() const
Identical to GetNumResults, provided to facilitate STL-style iteration.
void SetTraditionalBlastnDefaults()
NOTE: Unavailable for discontiguous megablast.
vector< value_type >::size_type size_type
size_type type definition
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
void SetTraditionalMegablastDefaults()
Sets TraditionalMegablastDefaults.
void SetMatrixName(const char *matrix)
Sets MatrixName.
void GetAncillaryResults(CSearchResultSet::TAncillaryVector &retval) const
Get the ancillary results for a BLAST search (to be used with the Run() method)
bool GetRepeatFiltering() const
Is repeat filtering enabled?
CBlastOptionsHandle & SetOptionsHandle()
Set the options handle.
int GetWindowMaskerTaxId() const
Get the window masker taxid (or 0 if not set).
EResultType GetResultType() const
Get the type of results contained in this object.
const char * GetRepeatFilteringDB() const
Get the repeat filtering database.
void SetWordSize(int ws)
Sets WordSize.
void SetDustFiltering(bool val)
Enable dust filtering.
void SetWindowMaskerDatabase(const char *db)
Enable window masker and select a database (or NULL to disable).
void SetMaxNumHspPerSequence(int m)
Sets MaxNumHspPerSequence.
vector< CRef< CBlastAncillaryData > > TAncillaryVector
typedef for a vector of CRef<CBlastAncillaryData>
void SetQuery(const SSeqLoc &query)
Set the query.
int WindowMaskerPathInit(const string &window_masker_path)
Initialize the path to the windowmasker data files.
size_type GetNumResults() const
Return the number of results contained by this object.
void SetLookupTableType(ELookupTableType type)
CSearchResults & GetResults(size_type qi, size_type si)
Retrieve results for a query-subject pair contained by this object.
BlastDiagnostics * GetDiagnostics() const
Retrieves the diagnostics information returned from the engine.
bool IsDbScanMode() const
Returns true if this is not a database but is database scanning mode.
bool HasAlignments() const
Return true if there are any alignments for this query.
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
const string & GetMsg(void) const
Get message string.
static string GetCwd(void)
Get the current working directory.
#define MSerial_AsnText
I/O stream manipulators –.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
void SetPacked_int(TPacked_int &v)
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
TObjectType & GetObject(void) const
Get object.
int64_t Int8
8-byte (64-bit) signed integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
const TDenseg & GetDenseg(void) const
Get the variant data.
vector< CRef< CScore > > TScore
const TStarts & GetStarts(void) const
Get the Starts member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
const TLens & GetLens(void) const
Get the Lens member data.
TDim GetDim(void) const
Get the Dim member data.
const TStd & GetStd(void) const
Get the variant data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
const TDisc & GetDisc(void) const
Get the variant data.
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
ENa_strand
strand of nucleic acid
@ eNa_strand_both
in forward orientation
@ e_Gi
GenInfo Integrated Database.
TId & SetId(void)
Assign a value to Id data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
@ eRepr_raw
continuous sequence
@ eMol_na
just a nucleic acid
unsigned int
A callback function used to compare two keys in a database.
Main class to perform a BLAST search on the local machine.
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
static void byte(MDB_val *v)
const TYPE & Get(const CNamedParameterList *param)
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
int strcmp(const char *str1, const char *str2)
Uint1 Boolean
bool replacment for C
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
BlastUngappedStats * ungapped_stat
Ungapped extension counts.
BlastRawCutoffs * cutoffs
Various raw values for the cutoffs.
BlastGappedStats * gapped_stat
Gapped extension counts.
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 good_extensions
Number of HSPs below the e-value threshold after gapped extension.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Structure holding all information about an HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Structure holding raw cutoff and gap-x-drop values.
Int4 ungapped_cutoff
Minimal raw score for starting gapped extension.
Int4 x_drop_gap_final
Raw value of the x-dropoff for gapped extensions with traceback.
Int4 x_drop_gap
Raw value of the x-dropoff for preliminary gapped extensions.
Int4 x_drop_ungapped
Raw value of the x-dropoff for ungapped extensions.
Int4 gapped_start
Where the gapped extension started.
Structure containing hit counts from the ungapped stage of a BLAST search.
Int8 lookup_hits
Number of successful lookup table hits.
Int4 init_extends
Number of initial words found and extended.
Int4 good_init_extends
Number of successful initial extensions, i.e.
Progress monitoring structure.
EBlastStage stage
Stage of the BLAST search currently in progress.
void * user_data
Pointer to user-provided data.
Structure to represent a single sequence to be fed to BLAST.
Declares the CTBlastnOptionsHandle class.
Utility stuff for more convenient using of Boost.Test library.
static const char * kMatch
Interface to retrieve list of available windowmasker filtering.