108 const unsigned int kQuerySize = 10;
109 const unsigned int kNumSeqs = 2;
110 const unsigned char kQuery[] = { 3, 9, 14, 20, 6, 23, 1, 7, 16, 5 };
112 m_query =
new unsigned char[kQuerySize];
113 memcpy((
void*) m_query, (
void*)
kQuery, kQuerySize*
sizeof(*
kQuery));
115 m_dim.query_length = kQuerySize;
116 m_dim.num_seqs = kNumSeqs;
120 for (
unsigned int i = 0;
i < m_dim.query_length;
i++) {
121 for (
unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
122 m_msa->data[j][
i].letter =
kQuery[
i];
123 m_msa->data[j][
i].is_aligned =
true;
128 m_msa->data[1][0].letter =
129 m_msa->data[2][0].letter =
130 m_msa->data[2][m_dim.query_length-1].letter =
137 memset((
void*) &m_diag_request, 0,
sizeof(m_diag_request));
152 return &m_diag_request;
171 for (
unsigned int i = 0;
i < m_dim.query_length;
i++) {
172 for (
unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
173 m_msa->data[j][
i].letter = m_query[
i];
174 m_msa->data[j][
i].is_aligned =
true;
180 int gap_position =
r.GetRand(0, GetQueryLength() - 1);
182 m_msa->data[0][gap_position].letter = m_query[gap_position];
272 case eNearIdenticalHits:
273 SetupNearIdenticalHits();
276 case eMsaHasUnalignedRegion:
277 SetupMsaHasUnalignedRegion();
280 case eQueryAlignedWithInternalGaps:
281 SetupQueryAlignedWithInternalGaps();
284 case eHenikoffsPaper:
285 SetupHenikoffsPositionBasedSequenceWeights();
289 throw std::logic_error(
"Unsupported alignment test data");
303 static const size_t kQueryLength = 232;
307 const Uint4 kNumAlignedSeqs = 1;
309 m_dim.query_length = kQueryLength;
310 m_dim.num_seqs = kNumAlignedSeqs;
312 m_query =
new unsigned char[kQueryLength];
315 for (
unsigned int i = 0;
i < kQueryLength;
i++) {
316 for (
unsigned int seq_idx = 0; seq_idx < kNumAlignedSeqs + 1;
318 m_msa->data[seq_idx][
i].letter = m_query[
i] =
kQuery[
i];
319 m_msa->data[seq_idx][
i].is_aligned =
true;
327 BOOST_REQUIRE(score_matrix);
338 if (score > max_score) {
348 const Uint4 kNumAlignedSeqs = 2;
350 m_dim.query_length = kQueryLength;
351 m_dim.num_seqs = kNumAlignedSeqs;
353 m_query =
new unsigned char[kQueryLength];
356 for (
unsigned int i = 0;
i < kQueryLength;
i++) {
357 m_msa->data[0][
i].letter = m_query[
i] =
kQuery[
i];
358 m_msa->data[0][
i].is_aligned =
true;
368 for (
unsigned int i = kFirstAlignment.first;
369 i < kFirstAlignment.second;
i++) {
370 m_msa->data[1][
i].letter =
371 FindNonIdenticalHighScoringResidue(
kQuery[
i], score_matrix);
372 m_msa->data[1][
i].is_aligned =
true;
380 for (
unsigned int i = kSecondAlignment.first;
381 i < kSecondAlignment.second;
i++) {
382 m_msa->data[2][
i].letter =
383 FindNonIdenticalHighScoringResidue(
kQuery[
i], score_matrix);
384 m_msa->data[2][
i].is_aligned =
true;
393 const Uint4 kNumAlignedSeqs = 1;
394 const size_t kLocalQueryLength = 87;
396 m_dim.query_length = kLocalQueryLength;
397 m_dim.num_seqs = kNumAlignedSeqs;
399 m_query =
new unsigned char[kLocalQueryLength];
401 string query_seq(
"MFKVYGYDSNIHKCGPCDNAKRLLTVKKQPFEFINIM");
402 query_seq +=
string(
"PEKGVFDDEKIAELLTKLGRDTQIGLTMPQVFAPDGSHIGGFD");
403 query_seq +=
string(
"QLREYFK");
405 typedef pair<TAlignedSegment, string> TAlignedSequence;
406 vector<TAlignedSequence> aligned_sequence;
408 TAlignedSequence region(make_pair(make_pair(0
U, 8U),
409 string(
"KVVVFIKP")));
410 aligned_sequence.push_back(region);
412 region = make_pair(make_pair(12U, 39U),
413 string(
"TCPFCRKTQELLSQLPFLLEFVDITAT"));
414 aligned_sequence.push_back(region);
416 region = make_pair(make_pair(41U, 57U),
string(
"SDTNEIQDYLQQLTGA"));
417 aligned_sequence.push_back(region);
419 region = make_pair(make_pair(62U, 71U),
string(
"RTVPRVFIG"));
420 aligned_sequence.push_back(region);
422 region = make_pair(make_pair(72U, 87U),
string(
"KECIGGCTDLESMHK"));
423 aligned_sequence.push_back(region);
427 for (
Uint4 i = 0;
i < kLocalQueryLength;
i++) {
429 query_seq.substr(
i, 1));
430 m_msa->data[0][
i].letter = m_query[
i];
431 m_msa->data[0][
i].is_aligned =
true;
434 m_msa->data[1][
i].letter = kGapResidue;
435 m_msa->data[1][
i].is_aligned =
true;
439 ITERATE(vector<TAlignedSequence>, itr, aligned_sequence) {
441 string sequence_data = itr->second;
443 for (
Uint4 i = loc.first, j = 0;
i < loc.second;
i++, j++) {
444 m_msa->data[1][
i].letter =
446 sequence_data.substr(j, 1));
452 const Uint4 kNumAlignedSeqs = 3;
453 const Uint1 kQuerySequence[5] = { 7, 22, 19, 7, 17 };
454 const Uint1 kSeq1[5] = { 7, 6, 4, 7, 6 };
455 const Uint1 kSeq2[5] = { 7, 22, 4, 7, 6 };
456 const Uint1 kSeq3[5] = { 7, 22, 15, 7, 7 };
458 m_dim.query_length =
sizeof(
kQuery);
459 m_dim.num_seqs = kNumAlignedSeqs;
461 m_query =
new unsigned char[
sizeof(kQuerySequence)];
464 for (
Uint4 s = 0; s < kNumAlignedSeqs; s++) {
468 case 0: sequence = kSeq1;
break;
469 case 1: sequence = kSeq2;
break;
470 case 2: sequence = kSeq3;
break;
474 for (
Uint4 i = 0;
i <
sizeof(kQuerySequence);
i++) {
475 m_query[
i] = kQuerySequence[
i];
476 m_msa->data[s][
i].letter = sequence[
i];
477 m_msa->data[s][
i].is_aligned =
true;
483 const Uint4 kNumAlignedSeqs = 2;
487 const Uint1 kGi_129296_[388] = {
488 12, 4, 17, 9, 17, 19, 18, 13, 1, 10, 6, 3, 6, 4, 19,
489 6, 13, 5, 12, 10, 19, 8, 8, 19, 13, 5, 13, 9, 11, 22,
490 3, 14, 11, 17, 9, 11, 18, 1, 11, 1, 12, 19, 22, 11, 7,
491 1, 16, 7, 13, 18, 5, 17, 15, 12, 10, 10, 19, 11, 8, 6,
492 4, 17, 9, 18, 7, 1, 7, 17, 18, 18, 4, 17, 15, 3, 7,
493 17, 17, 5, 22, 19, 8, 13, 11, 6, 10, 5, 11, 11, 17, 5,
494 9, 18, 16, 14, 13, 1, 18, 22, 17, 11, 5, 9, 1, 4, 10,
495 11, 22, 19, 4, 10, 18, 6, 17, 19, 11, 14, 5, 22, 11, 17,
496 3, 1, 16, 10, 6, 22, 18, 7, 7, 19, 5, 5, 19, 13, 6,
497 10, 18, 1, 1, 5, 5, 1, 16, 15, 11, 9, 13, 17, 20, 19,
498 5, 10, 5, 18, 13, 7, 15, 9, 10, 4, 11, 11, 19, 17, 17,
499 17, 9, 4, 6, 7, 18, 18, 12, 19, 6, 9, 13, 18, 9, 22,
500 6, 10, 7, 9, 20, 10, 9, 1, 6, 13, 18, 5, 4, 18, 16,
501 5, 12, 14, 6, 17, 12, 18, 10, 5, 5, 17, 10, 14, 19, 15,
502 12, 12, 3, 12, 13, 13, 17, 6, 13, 19, 1, 18, 11, 14, 1,
503 5, 10, 12, 10, 9, 11, 5, 11, 14, 22, 1, 17, 7, 4, 11,
504 17, 12, 11, 19, 11, 11, 14, 4, 5, 19, 17, 7, 11, 5, 16,
505 9, 5, 10, 18, 9, 13, 6, 4, 10, 11, 16, 5, 20, 18, 17,
506 18, 13, 1, 12, 1, 10, 10, 17, 12, 10, 19, 22, 11, 14, 16,
507 12, 10, 9, 5, 5, 10, 22, 13, 11, 18, 17, 9, 11, 12, 1,
508 11, 7, 12, 18, 4, 11, 6, 17, 16, 17, 1, 13, 11, 18, 7,
509 9, 17, 17, 19, 4, 13, 11, 12, 9, 17, 4, 1, 19, 8, 7,
510 19, 6, 12, 5, 19, 13, 5, 5, 7, 18, 5, 1, 18, 7, 17,
511 18, 7, 1, 9, 7, 13, 9, 10, 8, 17, 11, 5, 11, 5, 5,
512 6, 16, 1, 4, 8, 14, 6, 11, 6, 6, 9, 16, 22, 13, 14,
513 18, 13, 1, 9, 11, 6, 6, 7, 16, 22, 20, 17, 14};
515 m_dim.query_length = kQueryLength;
516 m_dim.num_seqs = kNumAlignedSeqs;
518 m_query =
new unsigned char[kQueryLength];
520 for (
unsigned int i = 0;
i < kQueryLength;
i++) {
525 for (
unsigned int i = 1;
i < kNumAlignedSeqs + 1;
i++) {
526 for (
unsigned int j = 0; j < kQueryLength; j++) {
527 m_msa->data[
i][j].letter = kGi_129296_[j];
528 m_msa->data[
i][j].is_aligned =
true;
536 const Uint4 kHitIndex = 2;
537 const Uint4 kNumIdenticalResidues = (
Uint4) (GetQueryLength() *
540 for (
Uint4 i = kNumIdenticalResidues;
i < GetQueryLength();
i++) {
541 Uint1& residue = m_msa->data[kHitIndex][
i].letter;
550 15, 9, 10, 4, 11, 11, 19, 17, 17, 17, 18, 4, 11, 4, 18,
551 18, 11, 19, 11, 19, 13, 1, 9, 22, 6, 10, 7, 12, 20, 10,
552 18, 1, 6, 13, 1, 5, 4, 18, 16, 5, 12, 14, 6, 8, 19,
553 18, 10, 15, 5, 17, 10, 14, 19, 15, 12, 12, 3, 12, 13, 13,
554 17, 6, 13, 19, 1, 18, 11, 14, 1, 5, 10, 12, 10, 9, 11,
555 5, 11, 14, 6, 1, 17, 7, 4, 11, 17, 12, 11, 19, 11, 11,
556 14, 4, 5, 19, 17, 4, 11, 5, 16, 9, 5, 10, 18, 9, 13,
557 6, 5, 10, 11, 18, 5, 20, 18, 13, 14, 13, 18, 12, 5, 10,
558 16, 16, 19, 10, 19, 22, 11, 14, 15, 12, 10, 9, 5, 5, 10,
559 22, 13, 11, 18, 17, 19, 11, 12, 1, 11, 7, 12, 18, 4, 11,
560 6, 9, 14, 17, 1, 13, 11, 18, 7, 9, 17, 17, 1, 5, 17,
561 11, 10, 9, 17, 15, 1, 19, 8, 7, 1, 6, 12, 5, 11, 17,
562 5, 4, 7, 9, 5, 12, 1, 7, 17, 18, 7, 19, 9, 5, 4,
563 9, 10, 8, 17, 14, 5, 17, 5, 15, 6, 16, 1, 4, 8, 14,
564 6, 11, 6, 11, 9, 10, 8, 13, 14, 18, 13, 18, 9, 19, 22,
565 6, 7, 16, 22, 20, 17, 14};
573 const string seqalign(
"data/nr-129295.new.asn.short");
574 unique_ptr<CObjectIStream>
in
580 CSeq_id qid(
"gi|129295"), sid(
"gi|6");
588 memset((
void*) &request, 0,
sizeof(request));
594 request.
sigma =
true;
598 const string kTitle(
"Test defline");
620 if((*iter)->IsTitle()) {
621 query_descr += (*iter)->GetTitle();
625 BOOST_REQUIRE_EQUAL(query_descr,
kTitle);
628 const size_t kNumElements =
633 BOOST_REQUIRE_EQUAL(kNumElements, res_freqs.size());
637 BOOST_REQUIRE_EQUAL(kNumElements, wres_freqs.size());
641 BOOST_REQUIRE_EQUAL(kNumElements, freq_ratios.size());
651 CSeq_id qid(
"gi|129295"), sid(
"gi|6");
656 BOOST_REQUIRE(sasv.size() != 0);
668 unique_ptr<CPsiBlastInputData> pssm_input(
671 sasv[0], q->scope, *opts));
674 pssm_input->Process();
692 vector<PSIMsaCell> aligned_pos(pssm_input->GetQueryLength());
693 fill(aligned_pos.begin(), aligned_pos.end(), kNullPSIMsaCell);
699 const CDense_seg& ds = (*hsp)->GetSegs().GetDenseg();
703 const vector<TSignedSeqPos>& starts = ds.
GetStarts();
704 const vector<TSeqPos>& lengths = ds.
GetLens();
710 #define GAP_IN_ALIGNMENT -1
712 for (
TSeqPos pos = 0; pos < lengths[
i]; pos++) {
718 s_index += lengths[
i];
721 s_index = (
i == 0) ? 0 : (s_index - starts[1]);
722 for (
TSeqPos pos = 0; pos < lengths[
i]; pos++) {
724 pd.
letter = subj[s_index++];
733 for (
TSeqPos i = 0;
i < pssm_input->GetQueryLength();
i++) {
734 BOOST_REQUIRE(seq_index < nseqs);
736 pssm_input->GetData()->data[seq_index][
i];
738 ss <<
"Sequence " << seq_index <<
", position " <<
i
740 BOOST_REQUIRE_MESSAGE(aligned_pos[
i].
letter == pos_desc.
letter &&
741 aligned_pos[
i].is_aligned == pos_desc.
is_aligned, ss.str());
745 }
catch (
const exception& e) {
746 cerr << e.what() << endl;
747 BOOST_REQUIRE(
false);
749 cerr <<
"Unknown exception" << endl;
750 BOOST_REQUIRE(
false);
763 unique_ptr<IPssmInputData> pssm_input
765 pssm_input->Process();
769 const Uint4 kSelfHitIndex = 1;
770 BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
771 BOOST_REQUIRE_EQUAL(
false, !!msa->use_sequence[kSelfHitIndex]);
775 unique_ptr<IPssmInputData> pssm_input
777 pssm_input->Process();
781 const Uint4 kDuplicateHitIndex = 2;
782 BOOST_REQUIRE_EQUAL(
false, !!msa->use_sequence[kDuplicateHitIndex]);
783 BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
784 BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex + 1]);
788 unique_ptr<IPssmInputData> pssm_input
790 pssm_input->Process();
794 const Uint4 kRemovedHitIndex = 2;
795 BOOST_REQUIRE_EQUAL(
false,
796 !! msa->use_sequence[kRemovedHitIndex]);
797 BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
798 BOOST_REQUIRE_EQUAL(
true, !! msa->use_sequence[
kQueryIndex + 1]);
802 unique_ptr<IPssmInputData> pssm_input
805 BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
806 string(pssm_input->GetMatrixName()));
810 unique_ptr< CNcbiMatrix<int> > pssm
826 (
size_t)pssm->GetCols());
828 (
size_t)pssm->GetRows());
829 for (
int i = 0;
i < pssm_asn->
GetPssm().GetNumColumns();
i++) {
830 for (
int j = 0; j < pssm_asn->
GetPssm().GetNumRows(); j++) {
836 if (pssm_input->GetData()->data[1][
i].is_aligned
837 && pssm_input->GetData()->data[1][
i].letter != kGapResidue) {
842 if (j == kGapResidue || j == kBResidue || j == kZResidue
843 || j == kUResidue || j >= kOResidue) {
845 ss <<
"Position " <<
i <<
" residue "
851 pssm_input->GetQuery()[
i], j);
854 ss <<
"Position " <<
i <<
" residue "
861 BOOST_REQUIRE_MESSAGE (score - (*pssm)(j,
i) <= 3, ss.str());
868 unique_ptr<IPssmInputData> pssm_input
871 pssm_input->Process();
872 BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
873 string(pssm_input->GetMatrixName()));
881 BOOST_REQUIRE_EQUAL(
true,
883 BOOST_REQUIRE_EQUAL(
true, !! packed_msa->use_sequence[1]);
884 BOOST_REQUIRE_EQUAL(
true, !! packed_msa->use_sequence[2]);
894 ss <<
"_PSIComputeAlignmentBlocks failed: "
896 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
899 vector<CPssmInputTestData::TAlignedSegment> aligned_regions;
900 aligned_regions.push_back(make_pair(0
U, 99U));
901 aligned_regions.push_back(make_pair(200U,
902 pssm_input->GetQueryLength()-1));
904 for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator
i =
905 aligned_regions.begin();
906 i != aligned_regions.end(); ++
i) {
907 for (
TSeqPos pos =
i->first; pos < i->second; pos++) {
909 ss <<
"Alignment extents differ at position "
911 BOOST_REQUIRE_MESSAGE((
int)
i->first == (
int)aligned_blocks->pos_extnt[pos].left, ss.str());
912 BOOST_REQUIRE_MESSAGE((
int)
i->second == (
int)aligned_blocks->pos_extnt[pos].right, ss.str());
913 BOOST_REQUIRE_MESSAGE( (
int)(
i->second -
i->first + 1) == (
int)aligned_blocks->size[pos], ss.str());
919 for (
size_t i = kUnalignedRange.first;
920 i < kUnalignedRange.second;
i++) {
922 ss <<
"Alignment extents differ at position "
924 BOOST_REQUIRE_MESSAGE((
int)-1 == (
int)aligned_blocks->pos_extnt[
i].left, ss.str());
925 BOOST_REQUIRE_MESSAGE( (
int)pssm_input->GetQueryLength() == (
int)aligned_blocks->pos_extnt[
i].right, ss.str());
926 BOOST_REQUIRE_MESSAGE(
927 (
int)(aligned_blocks->pos_extnt[
i].right - aligned_blocks->pos_extnt[
i].left + 1) == (
int)aligned_blocks->size[
i],
934 pssm_input->GetQueryLength()));;
938 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
946 ss <<
"_PSIComputeSequenceWeights failed: "
948 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
954 for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator
i =
955 aligned_regions.begin();
956 i != aligned_regions.end(); ++
i) {
957 for (
TSeqPos pos =
i->first; pos < i->second; pos++) {
958 double total_sequence_weights_for_column = 0.0;
959 for (
size_t res = 0; res < msa->alphabet_size; res++) {
960 if (res == kXResidue)
continue;
961 total_sequence_weights_for_column +=
962 seq_weights->match_weights[pos][res];
964 BOOST_REQUIRE(total_sequence_weights_for_column > 0.99 &&
965 total_sequence_weights_for_column < 1.01);
969 for (
size_t pos = kUnalignedRange.first;
970 pos < kUnalignedRange.second; pos++) {
971 double total_sequence_weights_for_column = 0.0;
972 for (
size_t res = 0; res < msa->alphabet_size; res++) {
973 if (res == kXResidue)
continue;
974 total_sequence_weights_for_column +=
975 seq_weights->match_weights[pos][res];
977 BOOST_REQUIRE(total_sequence_weights_for_column == 0.0);
987 internal_pssm.
get());
989 ss <<
"_PSIComputeResidueFrequencies failed: "
991 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
997 seq_weights->std_prob);
999 ss <<
"_PSIConvertResidueFreqsToPSSM failed: "
1001 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1005 seq_weights->std_prob,
1006 internal_pssm.
get(),
1009 ss <<
"_PSIScaleMatrix failed: "
1011 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1013 BOOST_REQUIRE_EQUAL(msa->dimensions->num_seqs, 3u);
1025 for (
Uint4 i = 0;
i < pssm_input->GetQueryLength();
i++) {
1029 if (msa->cell[1][
i].is_aligned || msa->cell[2][
i].is_aligned
1030 || msa->cell[3][
i].is_aligned) {
1036 if (j == kBResidue || j == kZResidue || j == kUResidue
1037 || j >= kOResidue) {
1042 if (j == kGapResidue) {
1044 ss <<
"Position " <<
i <<
" residue "
1046 BOOST_REQUIRE_MESSAGE(
BLAST_SCORE_MIN == internal_pssm->pssm[
i][j], ss.str());
1052 ss <<
"Position " <<
i <<
" residue "
1057 BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[
i][j] && internal_pssm->pssm[
i][j] <= score+1, ss.str());
1067 unique_ptr<IPssmInputData> pssm_input
1069 pssm_input->Process();
1070 BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
1071 string(pssm_input->GetMatrixName()));
1079 const Uint4 kSelfHitIndex = 1;
1080 BOOST_REQUIRE_EQUAL(
true,
1082 BOOST_REQUIRE_EQUAL(
false,
1083 !! packed_msa->use_sequence[kSelfHitIndex]);
1093 ss <<
"_PSIComputeAlignmentBlocks failed: "
1095 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1097 for (
size_t i = 0;
i < pssm_input->GetQueryLength();
i++) {
1098 BOOST_REQUIRE_EQUAL((
int)-1,
1099 (
int)aligned_blocks->pos_extnt[
i].left);
1100 BOOST_REQUIRE_EQUAL((
int)pssm_input->GetQueryLength(),
1101 (
int)aligned_blocks->pos_extnt[
i].right);
1102 BOOST_REQUIRE_EQUAL((
int)pssm_input->GetQueryLength() + 2,
1103 (
int)aligned_blocks->size[
i]);
1109 pssm_input->GetQueryLength()));;
1113 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
1123 ss <<
"_PSIComputeSequenceWeights failed: "
1125 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1134 internal_pssm.
get());
1136 ss <<
"_PSIComputeResidueFrequencies failed: "
1138 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1144 seq_weights->std_prob);
1146 ss <<
"_PSIConvertResidueFreqsToPSSM failed: "
1148 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1152 seq_weights->std_prob,
1153 internal_pssm.
get(),
1156 ss <<
"_PSIScaleMatrix failed: "
1158 BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS == rv, ss.str());
1170 for (
Uint4 i = 0;
i < pssm_input->GetQueryLength();
i++) {
1174 if (j == kGapResidue || j == kBResidue || j == kZResidue
1175 || j == kUResidue || j >= kOResidue) {
1177 ss <<
"Position " <<
i <<
" residue "
1179 BOOST_REQUIRE_MESSAGE(
BLAST_SCORE_MIN == internal_pssm->pssm[
i][j], ss.str());
1185 ss <<
"Position " <<
i <<
" residue "
1190 BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[
i][j] && internal_pssm->pssm[
i][j] <= score+1, ss.str());
1224 unique_ptr<IPssmInputData> bad_pssm_data(
new
1235 BOOST_REQUIRE(msa ==
NULL);
1242 BOOST_REQUIRE(freq_ratios !=
NULL);
1244 BOOST_REQUIRE(freq_ratios ==
NULL);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBl2Seq (BLAST 2 Sequences) class.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Declares the BLAST exception class.
Definitions which are dependant on the NCBI C++ Object Manager.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
int _PSIComputeAlignmentBlocks(const _PSIMsa *msa, _PSIAlignedBlock *aligned_blocks)
Main function to compute aligned blocks' properties for each position within multiple alignment (stag...
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
int _PSIComputeFreqRatios(const _PSIMsa *msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, const _PSIAlignedBlock *aligned_blocks, Int4 pseudo_count, Boolean nsg_compatibility_mode, _PSIInternalPssmData *internal_pssm)
Main function to compute the PSSM's frequency ratios (stage 5).
_PSISequenceWeights * _PSISequenceWeightsNew(const PSIMsaDimensions *dimensions, const BlastScoreBlk *sbp)
Allocates and initializes the _PSISequenceWeights structure.
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
_PSIAlignedBlock * _PSIAlignedBlockNew(Uint4 query_length)
Allocates and initializes the _PSIAlignedBlock structure.
int _PSIComputeSequenceWeights(const _PSIMsa *msa, const _PSIAlignedBlock *aligned_blocks, Boolean nsg_compatibility_mode, _PSISequenceWeights *seq_weights)
Main function to calculate the sequence weights.
int _PSIPurgeBiasedSegments(_PSIPackedMsa *msa)
Main function for keeping only those selected sequences for PSSM construction (stage 2).
_PSIMsa * _PSIMsaNew(const _PSIPackedMsa *msa, Uint4 alphabet_size)
Allocates and initializes the internal version of the PSIMsa structure (makes a deep copy) for intern...
const double kPSINearIdentical
Percent identity threshold for discarding near-identical matches.
const unsigned int kQueryIndex
Index into multiple sequence alignment structure for the query sequence.
int _PSIScaleMatrix(const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp)
Scales the PSSM (stage 7)
_PSIPackedMsa * _PSIPackedMsaNew(const PSIMsa *msa)
Allocates and initializes the compact version of the PSIMsa structure (makes a deep copy) for interna...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
#define PSIERR_BADPARAM
Bad parameter used in function.
#define PSI_SUCCESS
Successful operation.
Utilities initialize/setup BLAST.
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eBlastp
Protein-Protein.
TSeqPos GetLength(void) const
Runs the BLAST algorithm between 2 sequences.
Defines BLAST error codes (user errors included)
Wrapper class for BlastScoreBlk .
Wrapper class for PSIBlastOptions .
This class exists merely to call private methods in CPsiBlastInputData and CPssmEngine.
static unsigned int GetNumAlignedSequences(const CPsiBlastInputData &input)
Accesses CPsiBlastInputData private method.
static string x_ErrorCodeToString(int error_code)
Gets error strings from a CPssmEngine private method.
static unsigned char * x_GuardProteinQuery(const unsigned char *query, unsigned int query_length)
Accesses CPssmEngine private method.
static void x_GetSubjectSequence(const objects::CDense_seg &ds, objects::CScope &scope, string &sequence_data)
Gets Subject sequence from a CPsiBlastInputData private method.
Exception class for the CPssmEngine class.
Computes a PSSM as specified in PSI-BLAST.
static TIndex GetIndex(CSeq_data::E_Choice code_type, const string &code)
static CTestObjMgr & Instance()
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
static CNcbiMatrix< int > * GetScores(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
TAutoUint1Ptr data
Sequence data.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
void Reset(BlastScoreBlk *p=NULL)
AutoPtr< Uint1, CDeleter< Uint1 > > TAutoUint1Ptr
Declares TAutoUint1Ptr (for Uint1 arrays allocated with malloc/calloc)
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingProtein
NCBIstdaa.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
element_type * get(void) const
Get pointer.
@ eSerial_AsnText
ASN.1 text.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const TFreqRatios & GetFreqRatios(void) const
Get the FreqRatios member data.
TNumRows GetNumRows(void) const
Get the NumRows member data.
const TWeightedResFreqsPerPos & GetWeightedResFreqsPerPos(void) const
Get the WeightedResFreqsPerPos member data.
list< int > TResFreqsPerPos
TNumColumns GetNumColumns(void) const
Get the NumColumns member data.
list< double > TWeightedResFreqsPerPos
const TIntermediateData & GetIntermediateData(void) const
Get the IntermediateData member data.
list< double > TFreqRatios
const TResFreqsPerPos & GetResFreqsPerPos(void) const
Get the ResFreqsPerPos member data.
const TPssm & GetPssm(void) const
Get the Pssm member data.
const TStarts & GetStarts(void) const
Get the Starts member data.
const TLens & GetLens(void) const
Get the Lens member data.
TDim GetDim(void) const
Get the Dim member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
list< CRef< CSeqdesc > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
@ e_Ncbistdaa
consecutive codes for std aas
unsigned int
A callback function used to compare two keys in a database.
SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)
Deallocate the frequency ratios structure.
SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)
Retrive the matrix's frequency ratios.
const TYPE & Get(const CNamedParameterList *param)
char GetResidue(unsigned int res)
Returns character representation of a residue from ncbistdaa.
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Declarations of auxiliary functions/classes for PSI-BLAST.
C++ API for the PSI-BLAST PSSM engine.
BlastScoreBlk * InitializeBlastScoreBlk(const unsigned char *query, Uint4 query_size)
Utilities to develop and debug unit tests that deal with PSSM computation.
BOOST_AUTO_TEST_CASE(testFullPssmEngineRunWithDiagnosticsRequest)
const SNCBIPackedScoreMatrix NCBISM_Blosum62
TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix *sm, int aa1, int aa2)
Look up an entry in a packed score matrix.
Int2 alphabet_size
size of alphabet.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Boolean information_content
request information content
Boolean frequency_ratios
request frequency ratios
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Boolean gapless_column_weights
request gapless column weights
Boolean num_matching_seqs
request number of matching sequences
Boolean sigma
request sigma
Boolean residue_frequencies
request observed residue frequencies
Boolean interval_sizes
request interval sizes
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Boolean is_aligned
Is this letter part of the alignment?
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Structure representing the dimensions of the multiple sequence alignment data structure.
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Stores the frequency ratios along with their bit scale factor.
Utility stuff for more convenient using of Boost.Test library.
static const string kTitle
CTraceGlyph inline method implementation.
static Uint4 letter(char c)