58 #include <unordered_set>
71 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
78 #ifndef SKIP_DOXYGEN_PROCESSING
81 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
122 BOOST_REQUIRE(it->NotEmpty());
132 BOOST_REQUIRE(!archive.
Empty());
170 if ((
int)expected_hits.size() != hitlist.
Size()) {
171 err +=
"Hitlist sizes "
180 for (
size_t i=0;
i <
min(expected_hits.size(), (
size_t)hitlist.
Size());
189 ->CompareOrdered(expected_query_id) != 0) {
191 err += header +
"Query ids " + expected_hits[
i].query
193 ->GetId()->AsFastaString() +
" do not match\n";
200 err += header +
"Subject ids "
209 if (expected_hits[
i].query_range.GetFrom()
211 || expected_hits[
i].query_range.GetTo()
214 err += header +
"Query ranges "
227 if (expected_hits[
i].subject_range.GetFrom()
229 || expected_hits[
i].subject_range.GetTo()
232 err += header +
"Subject ranges "
245 if (expected_hits[
i].score != hit->
m_Score) {
246 err += header +
"Scores "
300 bool kParseDeflines =
true;
306 "Reading FASTA sequences has failed");
313 bool kParseDeflines =
true;
377 const vector< CRef<CSeq_loc> >& q = aligner.
GetQueries();
378 BOOST_REQUIRE_EQUAL(seqlocs.size(), q.size());
379 for (
size_t i=0;
i < q.size();
i++) {
380 BOOST_CHECK(seqlocs[
i]->
GetId()->Match(*q[
i]->
GetId()));
391 const vector< CRef<CSeq_loc> >& q = aligner.
GetQueries();
392 BOOST_REQUIRE_EQUAL(bioseqs.size(), q.size());
393 for (
size_t i=0;
i < q.size();
i++) {
394 BOOST_CHECK(bioseqs[
i]->GetFirstId()->Match(*q[
i]->
GetId()));
406 BOOST_REQUIRE((*it)->GetId());
430 vector< CRef<CBioseq> > bioseqs;
448 s.
SetInt().SetId().Assign(
id);
449 BOOST_REQUIRE(s.
GetId());
462 vector< CRef<CSeq_loc> > seqlocs;
463 vector< CRef<CBioseq> > bioseqs;
482 seqlocs.push_back(m_Sequences.front());
485 BOOST_REQUIRE_EQUAL((
int)seqlocs.size(), 1);
497 BOOST_REQUIRE_EQUAL(status, 0);
498 BOOST_CHECK_THROW(aligner.
SetQueries(seqlocs, scope),
509 vector< CRef<CBioseq> > bioseqs;
512 m_Options->SetRpsDb(
"");
516 int query_index = (
int)m_Sequences.size();
517 constr[0].seq1_index = query_index;
518 constr[0].seq1_start = 0;
519 constr[0].seq1_stop = 50;
521 constr[0].seq2_index = 0;
522 constr[0].seq2_start = 0;
523 constr[0].seq2_stop = 50;
527 BOOST_REQUIRE(m_Options->Validate());
530 BOOST_CHECK_THROW(m_Aligner->SetQueries(m_Sequences,
m_Scope),
534 BOOST_CHECK_THROW(m_Aligner->SetQueries(bioseqs),
540 constr[0].seq1_index = 0;
541 constr[0].seq1_start = 0;
542 constr[0].seq1_stop = 50;
547 constr[0].seq2_index = 1;
548 constr[0].seq2_start = 0;
549 constr[0].seq2_stop = length + 1;
551 BOOST_REQUIRE(m_Options->Validate());
554 BOOST_CHECK_THROW(m_Aligner->SetQueries(m_Sequences,
m_Scope),
558 BOOST_CHECK_THROW(m_Aligner->SetQueries(bioseqs),
577 m_Aligner->SetQueries(m_Sequences,
m_Scope);
589 BOOST_CHECK(node->
GetValue().IsSetDist());
595 BOOST_CHECK(!node->
GetValue().GetLabel().empty());
602 BOOST_CHECK_EQUAL(node->
GetValue().GetId(),
607 BOOST_REQUIRE(!queries[
id]);
613 BOOST_CHECK(node->
GetValue().GetLabel().empty());
628 vector<bool> used_queries(num_queries,
false);
632 ITERATE(vector<bool>, it, used_queries) {
641 vector<bool> used_queries(num_queries,
false);
647 BOOST_CHECK((*it)->CanGetName());
648 if ((*it)->GetName() ==
"label") {
649 label_fid = (*it)->GetId();
651 if ((*it)->GetName() ==
"dist") {
652 dist_fid = (*it)->GetId();
656 BOOST_REQUIRE(label_fid >= 0);
658 BOOST_REQUIRE(dist_fid >= 0);
661 if ((*node)->GetId() == 0) {
666 BOOST_REQUIRE((*node)->CanGetFeatures());
668 bool is_dist =
false;
670 if ((*feat)->GetFeatureid() == label_fid) {
671 string label = (*feat)->GetValue();
672 const char* ptr =
label.c_str();
679 BOOST_REQUIRE(!used_queries[
id]);
680 used_queries[id] =
true;
683 if ((*feat)->GetFeatureid() == dist_fid) {
688 BOOST_CHECK(is_dist);
692 ITERATE(vector<bool>, it, used_queries) {
705 if (clusters.empty()) {
710 vector<bool> used_queries(num_queries,
false);
715 BOOST_REQUIRE(cluster->size() > 0);
716 num_elems += (
int)cluster->size();
718 int prototype = cluster->GetPrototype();
719 bool is_prototype =
false;
723 BOOST_REQUIRE(!used_queries[*elem]);
724 used_queries[*elem] =
true;
726 if (prototype == *elem) {
732 BOOST_CHECK(is_prototype);
736 BOOST_REQUIRE_EQUAL(num_elems, num_queries);
739 ITERATE(vector<bool>, it, used_queries) {
745 if (constraints.empty()) {
749 unordered_set<int> constr_queries;
751 constr_queries.insert(it->seq1_index);
752 constr_queries.insert(it->seq2_index);
754 size_t remain = constr_queries.size();
758 unordered_set<int>::const_iterator it = constr_queries.find(*elem);
759 if (it != constr_queries.end()) {
763 BOOST_CHECK_EQUAL(cluster->size(), 1u);
778 const vector<CSequence>& seqs,
780 const string& aln_ref =
"")
785 int num_queries = (
int)queries.size();
787 BOOST_REQUIRE_EQUAL(seqalign->
GetDim(), num_queries);
790 for (
int i=0;
i < num_queries;
i++) {
797 BOOST_REQUIRE_EQUAL(seqs.size(), queries.size());
798 int len = seqs[0].GetLength();
799 ITERATE(vector<CSequence>, it, seqs) {
800 BOOST_CHECK_EQUAL(it->GetLength(),
len);
804 for (
size_t i=0;
i < queries.size();
i++) {
808 int num_residues = 0;
809 const unsigned char* sequence = seqs[
i].GetSequence();
810 for (
int k=0;k < seqs[
i].GetLength();k++) {
815 BOOST_CHECK_EQUAL(query_len, num_residues);
820 if (!aln_ref.empty()) {
826 BOOST_REQUIRE_EQUAL(seqalign->
GetDim(), ref_align.
GetDim());
838 BOOST_REQUIRE_EQUAL(starts.size(), ref_starts.size());
839 BOOST_REQUIRE_EQUAL(lens.size(), ref_lens.size());
840 for (
size_t i=0;
i < starts.size();
i++) {
841 BOOST_CHECK_EQUAL(starts[
i], ref_starts[
i]);
843 for (
size_t i=0;
i < lens.size();
i++) {
844 BOOST_CHECK_EQUAL(lens[
i], ref_lens[
i]);
851 const string& ref_aln =
"")
869 BOOST_REQUIRE(m_Options->Validate());
871 m_Aligner->SetQueries(m_Sequences,
m_Scope);
875 BOOST_CHECK_EQUAL((
int)m_Aligner->GetMessages().size(), 0);
885 m_Options->SetMaxInClusterDist(0.0);
886 BOOST_REQUIRE(m_Options->Validate());
889 m_Aligner->SetQueries(m_Sequences,
m_Scope);
896 BOOST_CHECK((
int)m_Aligner->GetMessages().size() == 0);
905 m_Options->SetMaxInClusterDist(1.0);
906 BOOST_REQUIRE(m_Options->Validate());
909 m_Aligner->SetQueries(m_Sequences,
m_Scope);
927 BOOST_REQUIRE(m_Options->Validate());
930 m_Aligner->SetQueries(m_Sequences,
m_Scope);
934 BOOST_CHECK_EQUAL((
int)m_Aligner->GetMessages().size(), 0);
944 vector<CMultiAlignerOptions::SConstraint>& constr
945 = m_Options->SetUserConstraints();
948 constr[0].seq1_index = 0;
949 constr[0].seq1_start = 0;
950 constr[0].seq1_stop = 50;
952 constr[0].seq2_index = 1;
953 constr[0].seq2_start = 0;
954 constr[0].seq2_stop = 50;
956 constr[1].seq1_index = 1;
957 constr[1].seq1_start = 0;
958 constr[1].seq1_stop = 50;
960 constr[1].seq2_index = 5;
961 constr[1].seq2_start = 0;
962 constr[1].seq2_stop = 50;
964 BOOST_REQUIRE(m_Options->Validate());
967 m_Aligner->SetQueries(m_Sequences,
m_Scope);
971 BOOST_CHECK_EQUAL((
int)m_Aligner->GetMessages().size(), 0);
982 m_Options->SetMaxInClusterDist(1.0);
985 vector< CRef<CSeq_loc> > queries;
986 queries.push_back(m_Sequences[0]);
987 queries.push_back(m_Sequences[1]);
988 m_Aligner->SetQueries(queries,
m_Scope);
995 m_Options->SetMaxInClusterDist(0.01);
997 m_Aligner->SetQueries(queries,
m_Scope);
998 status = m_Aligner->Run();
1010 vector< CRef<CSeq_loc> > sequences;
1014 m_Options->SetUseQueryClusters(
true);
1034 BOOST_REQUIRE(
result->GetSegs().IsDenseg());
1041 int num_input_sequences = (
int)first_denseg.
GetDim()
1042 + second_denseg.
GetDim();
1045 BOOST_REQUIRE_EQUAL(
result->GetDim(), num_input_sequences);
1047 vector<int> first_rows, second_rows;
1053 for (;j <
result->GetDim();j++) {
1054 if (
id.Match(*
result->GetSegs().GetDenseg().GetIds()[j])) {
1055 first_rows.push_back(j);
1059 BOOST_REQUIRE(j < result->GetDim());
1065 for (;j <
result->GetDim();j++) {
1066 if (
id.Match(*
result->GetSegs().GetDenseg().GetIds()[j])) {
1067 second_rows.push_back(j);
1071 BOOST_REQUIRE(j < result->GetDim());
1079 f->RemovePureGapSegs();
1081 BOOST_REQUIRE_EQUAL(first_denseg.
GetStarts().size(),
f->GetStarts().size());
1082 BOOST_REQUIRE_EQUAL(first_denseg.
GetLens().size(),
f->GetLens().size());
1084 BOOST_REQUIRE_EQUAL(first_denseg.
GetStarts()[
i],
f->GetStarts()[
i]);
1087 BOOST_REQUIRE_EQUAL(first_denseg.
GetLens()[
i],
f->GetLens()[
i]);
1096 BOOST_REQUIRE_EQUAL(second_denseg.
GetLens().size(), s->
GetLens().size());
1111 m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
m_Scope);
1131 const_cast<CSeq_id*
>(m_Sequences.front()->GetId())));
1137 m_Aligner->SetInputMSAs(*m_Align1, *align, repr, repr,
m_Scope);
1147 BOOST_REQUIRE(m_Align1->CheckNumRows() > 3);
1148 BOOST_REQUIRE(m_Align2->CheckNumRows() > 3);
1154 m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
m_Scope);
1168 BOOST_CHECK_THROW(m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
1176 repr.
insert(m_Align1->CheckNumRows());
1177 BOOST_CHECK_THROW(m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
1185 vector< CRef<CSeq_id> > expected_queries;
1187 string id = (*it)->GetId()->AsFastaString();
1192 m_Options->SetUseQueryClusters(
false);
1193 m_Options->SetRpsEvalue(0.1);
1196 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1204 BOOST_REQUIRE_EQUAL(m_Options->GetUseQueryClusters(),
false);
1205 BOOST_REQUIRE(
fabs(m_Options->GetRpsEvalue() - 0.1) < 0.01);
1213 BOOST_REQUIRE_EQUAL(expected_queries.size(),
1214 m_Aligner->GetQueries().size());
1216 for (
size_t i=0;
i < expected_queries.size();
i++) {
1217 BOOST_REQUIRE(expected_queries[
i]->CompareOrdered(
1218 *m_Aligner->GetQueries()[
i]->GetId()) == 0);
1222 vector<bool> expected_is_domain_searched(m_Sequences.size(),
false);
1223 expected_is_domain_searched[0] =
true;
1224 expected_is_domain_searched[2] =
true;
1226 BOOST_REQUIRE_EQUAL(expected_is_domain_searched.size(),
1229 for (
size_t i=0;
i < expected_is_domain_searched.size();
i++) {
1230 BOOST_REQUIRE_EQUAL(expected_is_domain_searched[
i],
1237 const size_t kNumExpectedPreHits = 7;
1238 vector<SHit> expected_hits(kNumExpectedPreHits);
1241 expected_hits[0].query =
"lcl|1buc_A";
1242 expected_hits[0].subject = 1;
1243 expected_hits[0].query_range =
TRange(6, 382);
1244 expected_hits[0].subject_range =
TRange(0, 372);
1245 expected_hits[0].score = 1414;
1248 expected_hits[1].query =
"lcl|1buc_A";
1249 expected_hits[1].subject = 0;
1250 expected_hits[1].query_range =
TRange(95, 377);
1251 expected_hits[1].subject_range =
TRange(42, 325);
1252 expected_hits[1].score = 885;
1255 expected_hits[2].query =
"lcl|1buc_A";
1256 expected_hits[2].subject = 2;
1257 expected_hits[2].query_range =
TRange(1, 382);
1258 expected_hits[2].subject_range =
TRange(19, 405);
1259 expected_hits[2].score = 718;
1262 expected_hits[3].query =
"lcl|Q8jzn5";
1263 expected_hits[3].subject = 2;
1264 expected_hits[3].query_range =
TRange(41, 448);
1265 expected_hits[3].subject_range =
TRange(0, 408);
1266 expected_hits[3].score = 1779;
1269 expected_hits[4].query =
"lcl|Q8jzn5";
1270 expected_hits[4].subject = 1;
1271 expected_hits[4].query_range =
TRange(88, 440);
1272 expected_hits[4].subject_range =
TRange(22, 367);
1273 expected_hits[4].score = 981;
1276 expected_hits[5].query =
"lcl|Q8jzn5";
1277 expected_hits[5].subject = 0;
1278 expected_hits[5].query_range =
TRange(151, 440);
1279 expected_hits[5].subject_range =
TRange(42, 325);
1280 expected_hits[5].score = 872;
1283 expected_hits[6].query =
"lcl|Q8jzn5";
1284 expected_hits[6].subject = 0;
1285 expected_hits[6].query_range =
TRange(511, 581);
1286 expected_hits[6].subject_range =
TRange(208, 280);
1287 expected_hits[6].score = 75;
1294 BOOST_REQUIRE_MESSAGE(hits_match, errors);
1301 m_Aligner->SetInterruptCallback(
1305 const size_t kNumExpectedHits = 10;
1306 BOOST_REQUIRE(kNumExpectedHits > kNumExpectedPreHits);
1307 expected_hits.resize(kNumExpectedHits);
1311 expected_hits[7].query =
"lcl|Q10535";
1312 expected_hits[7].subject = 2;
1313 expected_hits[7].query_range =
TRange(27, 432);
1314 expected_hits[7].subject_range =
TRange(0, 400);
1315 expected_hits[7].score = 768;
1318 expected_hits[8].query =
"lcl|Q10535";
1319 expected_hits[8].subject = 0;
1320 expected_hits[8].query_range =
TRange(138, 433);
1321 expected_hits[8].subject_range =
TRange(42, 326);
1322 expected_hits[8].score = 738;
1325 expected_hits[9].query =
"lcl|Q10535";
1326 expected_hits[9].subject = 1;
1327 expected_hits[9].query_range =
TRange(75, 434);
1328 expected_hits[9].subject_range =
TRange(24, 369);
1329 expected_hits[9].score = 704;
1335 BOOST_REQUIRE_MESSAGE(hits_match, errors);
1341 vector< CRef<CSeq_id> > expected_queries;
1343 string id = (*it)->GetId()->AsFastaString();
1348 m_Options->SetUseQueryClusters(
false);
1349 m_Options->SetRpsEvalue(0.1);
1352 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1355 BOOST_REQUIRE_EQUAL(m_Options->GetUseQueryClusters(),
false);
1356 BOOST_REQUIRE(
fabs(m_Options->GetRpsEvalue() - 0.1) < 0.01);
1364 BOOST_REQUIRE_EQUAL(expected_queries.size(),
1365 m_Aligner->GetQueries().size());
1367 for (
size_t i=0;
i < expected_queries.size();
i++) {
1368 BOOST_REQUIRE(expected_queries[
i]->CompareOrdered(
1369 *m_Aligner->GetQueries()[
i]->GetId()) == 0);
1373 vector<bool> expected_is_domain_searched(m_Sequences.size(),
false);
1374 expected_is_domain_searched[0] =
true;
1375 expected_is_domain_searched[2] =
true;
1377 BOOST_REQUIRE_EQUAL(expected_is_domain_searched.size(),
1380 for (
size_t i=0;
i < expected_is_domain_searched.size();
i++) {
1381 BOOST_REQUIRE_EQUAL(expected_is_domain_searched[
i],
1388 const size_t kNumExpectedPreHits = 7;
1389 vector<SHit> expected_hits(kNumExpectedPreHits);
1392 expected_hits[0].query =
"lcl|1buc_A";
1393 expected_hits[0].subject = 1;
1394 expected_hits[0].query_range =
TRange(6, 382);
1395 expected_hits[0].subject_range =
TRange(0, 372);
1396 expected_hits[0].score = 1414;
1399 expected_hits[1].query =
"lcl|1buc_A";
1400 expected_hits[1].subject = 0;
1401 expected_hits[1].query_range =
TRange(95, 377);
1402 expected_hits[1].subject_range =
TRange(42, 325);
1403 expected_hits[1].score = 885;
1406 expected_hits[2].query =
"lcl|1buc_A";
1407 expected_hits[2].subject = 2;
1408 expected_hits[2].query_range =
TRange(1, 382);
1409 expected_hits[2].subject_range =
TRange(19, 405);
1410 expected_hits[2].score = 718;
1413 expected_hits[3].query =
"lcl|Q8jzn5";
1414 expected_hits[3].subject = 2;
1415 expected_hits[3].query_range =
TRange(41, 448);
1416 expected_hits[3].subject_range =
TRange(0, 408);
1417 expected_hits[3].score = 1779;
1420 expected_hits[4].query =
"lcl|Q8jzn5";
1421 expected_hits[4].subject = 1;
1422 expected_hits[4].query_range =
TRange(88, 440);
1423 expected_hits[4].subject_range =
TRange(22, 367);
1424 expected_hits[4].score = 981;
1427 expected_hits[5].query =
"lcl|Q8jzn5";
1428 expected_hits[5].subject = 0;
1429 expected_hits[5].query_range =
TRange(151, 440);
1430 expected_hits[5].subject_range =
TRange(42, 325);
1431 expected_hits[5].score = 872;
1434 expected_hits[6].query =
"lcl|Q8jzn5";
1435 expected_hits[6].subject = 0;
1436 expected_hits[6].query_range =
TRange(511, 581);
1437 expected_hits[6].subject_range =
TRange(208, 280);
1438 expected_hits[6].score = 75;
1445 BOOST_REQUIRE_MESSAGE(hits_match, errors);
1452 m_Aligner->SetInterruptCallback(
1456 const size_t kNumExpectedHits = 10;
1457 BOOST_REQUIRE(kNumExpectedHits > kNumExpectedPreHits);
1458 expected_hits.resize(kNumExpectedHits);
1462 expected_hits[7].query =
"lcl|Q10535";
1463 expected_hits[7].subject = 2;
1464 expected_hits[7].query_range =
TRange(27, 432);
1465 expected_hits[7].subject_range =
TRange(0, 400);
1466 expected_hits[7].score = 768;
1469 expected_hits[8].query =
"lcl|Q10535";
1470 expected_hits[8].subject = 0;
1471 expected_hits[8].query_range =
TRange(138, 433);
1472 expected_hits[8].subject_range =
TRange(42, 326);
1473 expected_hits[8].score = 738;
1476 expected_hits[9].query =
"lcl|Q10535";
1477 expected_hits[9].subject = 1;
1478 expected_hits[9].query_range =
TRange(75, 434);
1479 expected_hits[9].subject_range =
TRange(24, 369);
1480 expected_hits[9].score = 704;
1486 BOOST_REQUIRE_MESSAGE(hits_match, errors);
1492 m_Options->SetUseQueryClusters(
false);
1496 vector< CRef<CSeq_loc> > queries;
1497 queries.push_back(m_Sequences.back());
1498 queries.push_back(m_Sequences.back());
1501 m_Aligner->SetQueries(queries,
m_Scope);
1517 vector< CRef<CSeq_loc> > queries;
1520 queries.push_back(seq);
1521 queries.push_back(seq);
1524 m_Options->SetRpsEvalue(10);
1525 m_Options->SetUseQueryClusters(
false);
1532 m_Aligner->SetQueries(queries,
m_Scope);
1543 m_Options->SetRpsEvalue(0.00001);
1547 m_Aligner->SetQueries(queries,
m_Scope);
1554 BOOST_REQUIRE_EQUAL(
1564 m_Options->SetUseQueryClusters(
false);
1565 m_Options->SetDomainHits(m_RpsArchive);
1566 BOOST_REQUIRE(m_Options->Validate());
1568 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1575 m_Options->SetUseQueryClusters(
true);
1576 BOOST_REQUIRE(m_Options->CanGetDomainHits());
1578 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1587 m_Options->SetRpsEvalue(10);
1588 m_Options->SetDomainHits(m_RpsArchive);
1589 BOOST_REQUIRE(m_Options->Validate());
1595 m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
m_Scope);
1605 BOOST_REQUIRE(m_Options->CanGetDomainHits());
1608 m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
m_Scope);
1618 m_Options->SetUseQueryClusters(
false);
1620 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1625 BOOST_REQUIRE(istr);
1634 (
string)
"lcl|1buc_A");
1636 (
string)
"gnl|CDD|273847");
1646 m_Options->SetUseQueryClusters(
false);
1648 m_Aligner->SetQueries(m_Sequences,
m_Scope);
1651 CPssm& pssm = m_RpsArchive->SetRequest().SetBody().SetQueue_search()
1652 .SetQueries().SetPssm().SetPssm();
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
vector< TSingleCluster > TClusters
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used)
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
An ordered collection of CHit objects.
int Size() const
Retrieve number of hits in list.
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
A generalized representation of a pairwise alignment.
int m_Score
Score of alignment.
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
TRange m_SeqRange1
The range of offsets on the first sequence.
TRange m_SeqRange2
The range of offsets on the second sequence.
Fixture class initialized for each multialigner test.
static CRef< CBlast4_archive > m_RpsArchive
static CRef< CObjectManager > m_Objmgr
static void Initialize(void)
Initialize static attributes.
static CRef< CScope > m_Scope
CRef< CMultiAlignerOptions > m_Options
static vector< CRef< CSeq_loc > > m_Sequences
static void x_ReadSequences(void)
Read test sequences in FASTA format from file.
static void x_InitScope(void)
Initialize scope.
static void x_ReadAlignments(void)
Read test MSAs from files.
static void x_ReadRpsArchive(void)
Read test RPS-BLAST output in the archive format from file.
CMultiAlignerFixture(void)
static CRef< CSeq_align > m_Align2
CRef< CMultiAligner > m_Aligner
static void Finalize(void)
Release static attributes.
static CRef< CSeq_align > m_Align1
Options and parameters for multiple alignement.
void SetRpsDb(const string &dbname)
Use RPS Blast with given database.
const TConstraints & GetUserConstraints(void) const
Get user constraints.
@ fNoQueryClusters
No query clustering.
@ fNoRpsBlast
Do not use RPS Blast.
vector< SConstraint > TConstraints
Test class for accessing CMultiAligner private attributes and methods.
static void SetDomainHits(CMultiAligner &aligner, CConstRef< CBlast4_archive > archive)
Set pre-computed domain hits without invoking CMlultiAligner::Run()
static bool InterruptAfterRpsBlastSearch(CMultiAligner::SProgress *progress)
Quit after doing RPS-BLAST search.
static void SetQuerySeqlocs(CMultiAligner &aligner, const vector< CRef< CSeq_loc > > &queries)
Set queries in the aligner only as Seq-locs do not retrieve sequences.
static const vector< bool > & GetIsDomainSearched(const CMultiAligner &aligner)
static const CHitList & GetDomainHits(const CMultiAligner &aligner)
static bool CompareDomainHits(const vector< SHit > &expected_hits, const CMultiAligner &aligner, string &err)
Compare domain hits in CMultiAligner with reference alignements.
Simultaneously align multiple protein sequences.
CConstRef< CMultiAlignerOptions > GetOptions(void) const
Get mutli aligner parameters.
const vector< CSequence > & GetSeqResults(void) const
Retrieve the current aligned results in CSequence format.
const vector< CRef< objects::CSeq_loc > > & GetQueries(void) const
Get query sequences.
CRef< objects::CSeq_align > GetResults(void) const
Retrieve the current aligned results in Seq-align format.
vector< CRef< objects::CSeq_loc > > m_tQueries
TStatus Run(void)
Align the current set of input sequences (reset any existing alignment information).
const TPhyTreeNode * GetTree(void) const
Get ree used guide in progressive alignment.
@ eOutOfMemory
Out of memory error.
@ eSuccess
Alignment successfully completed.
@ eInterrupt
Alignment interruped through callback function.
const CClusterer::TClusters & GetQueryClusters(void) const
Get clusters of query sequences.
vector< bool > m_IsDomainSearched
Marks sequences with pre-computed domain hits.
void SetQueries(const vector< CRef< objects::CSeq_loc > > &queries, CRef< objects::CScope > scope)
Set query sequences.
CRef< objects::CScope > GetScope(void)
Get scope.
void x_SetDomainHits(const blast::TSeqLocVector &queruies, const vector< int > &indices, const objects::CBlast4_archive &archive)
Set pre-computed domain hits using BLAST archive format.
CRef< objects::CBioTreeContainer > GetTreeContainer(void) const
Get serializable tree used as guide in progressive alignment.
void x_CreateBlastQueries(blast::TSeqLocVector &queries, vector< int > &indices)
Create query set for RPS Blast and Blastp searches along with indices in multiple alignment queries a...
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
static const unsigned char kGapChar
The ncbistdaa code for a gap.
definition of a Culling tree
iterator_bool insert(const value_type &val)
Interface for CMultiAligner.
int ReadFastaQueries(const string &filename, vector< CRef< objects::CSeq_loc > > &seqs, CRef< objects::CScope > &scope, bool parse_deflines, objects::CSeqIdGenerator *id_generator)
int ReadMsa(const string &filename, CRef< CSeq_align > &align, CRef< CScope > scope, bool parse_deflines, objects::CSeqIdGenerator *id_generator)
static void s_TestResultAlignment(const vector< CRef< CSeq_loc > > &queries, const CRef< CSeq_align > &seqalign, const vector< CSequence > &seqs, CRef< CScope > scope, const string &aln_ref="")
static void s_TestResultTreeContainer(int num_queries, const CBioTreeContainer &btc)
static void s_TestResults(CMultiAligner &aligner, const string &ref_aln="")
BOOST_AUTO_TEST_CASE(TestSetQueries)
static bool s_Interrupt(CMultiAligner::SProgress *progress)
static void s_MakeBioseqs(const vector< CRef< CSeq_loc > > &seqlocs, CRef< CScope > scope, vector< CRef< CBioseq > > &bioseqs)
void s_TestAlignmentFromMSAs(CRef< CSeq_align > result, CRef< CSeq_align > in_first, CRef< CSeq_align > in_second)
static void s_TestResultTree(int num_queries, const TPhyTreeNode *tree)
static void s_TestQueriesAsBioseqs(const vector< CRef< CBioseq > > &bioseqs)
static void s_TestQueriesAsSeq_locs(const vector< CRef< CSeq_loc > > &seqlocs, CRef< CScope > scope)
static void s_TestResultClusters(int num_queries, const CClusterer::TClusters &clusters, const CMultiAlignerOptions::TConstraints &constraints)
static void s_TestTree(vector< bool > &queries, const TPhyTreeNode *node)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define MSerial_AsnText
I/O stream manipulators –.
const string AsFastaString(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
TNodeList::const_iterator TNodeList_CI
bool IsLeaf() const
Report whether this is a leaf node.
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
const TValue & GetValue(void) const
Return node's value.
const TTreeType * GetParent(void) const
Get node's parent.
static const char label[]
list< CRef< CFeatureDescr > > Tdata
list< CRef< CNodeFeature > > Tdata
list< CRef< CNode > > Tdata
const Tdata & Get(void) const
Get the member data.
const Tdata & Get(void) const
Get the member data.
const TFdict & GetFdict(void) const
Get the Fdict member data.
const TNodes & GetNodes(void) const
Get the Nodes member data.
const TAlignments & GetAlignments(void) const
Get the Alignments member data.
const TResults & GetResults(void) const
Get the Results member data.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetNumColumns(TNumColumns value)
Assign a value to NumColumns data member.
void SetNumRows(TNumRows value)
Assign a value to NumRows data member.
const TDenseg & GetDenseg(void) const
Get the variant data.
TLens & SetLens(void)
Assign a value to Lens data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
TDim GetDim(void) const
Get the Dim member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
const TLens & GetLens(void) const
Get the Lens member data.
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
TDim GetDim(void) const
Get the Dim member data.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TType GetType(void) const
Get the Type member data.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TIds & GetIds(void) const
Get the Ids member data.
TIds & SetIds(void)
Assign a value to Ids data member.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
unsigned int
A callback function used to compare two keys in a database.
const struct ncbi::grid::netcache::search::fields::SIZE size
double f(double x_, const double &y_)
Options for CMultiAligner.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure for reporting alignment progress.
Representation of a hit for computing constraints.
int subject
subject ordinal id in the database
TRange query_range
alignment extents
Utility stuff for more convenient using of Boost.Test library.