77 size_t RestoreFivePrime(
size_t beg)
const;
78 size_t RestoreThreePrime(
size_t end)
const;
110 const string&
nuc = alignment_text.
GetDNA();
111 const string& outp = alignment_text.
GetProtein();
112 const string& orig_match = alignment_text.
GetMatch();
113 list<CNPiece> m_AliPiece;
116 string::size_type n1 = outp.find_first_not_of(
GAP_CHAR);
117 string::size_type n2 = outp.find_last_not_of(
GAP_CHAR);
119 m_AliPiece.push_back(
CNPiece(n1, n2+1, 0, 0));
123 string match = orig_match;
124 for (
size_t i = 1;
i <
match.size()-1; ++
i) {
135 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
137 m_AliPiece.splice(it,
tmp);
138 it = m_AliPiece.erase(it);
140 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
142 m_AliPiece.splice(it,
tmp);
143 it = m_AliPiece.erase(it);
148 if( !m_AliPiece.empty() ) {
149 m_AliPiece.front().beg = trim.
CutFromLeft(m_AliPiece.front(), m_options);
150 m_AliPiece.back().end = trim.
CutFromRight(m_AliPiece.back(), m_options);
155 string::size_type beg = m_AliPiece.front().beg;
156 string::size_type end = m_AliPiece.back().end;
158 m_AliPiece.push_back(
CNPiece(beg, end, 0, 0));
163 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
164 list<CNPiece>::iterator sit = it;
166 if(sit == m_AliPiece.end())
break;
170 int nuc_cnt = 0, prot_cnt = 0;
171 for(
int pos = hbeg; pos < hend; ++pos) {
177 it = m_AliPiece.erase(it);
185 if( !m_AliPiece.empty() ) {
186 bool keep_trimming =
true;
187 while( keep_trimming ) {
188 CNPiece& pc = *m_AliPiece.rbegin();
192 for(;
n >= pc.
beg; --
n) {
198 m_AliPiece.pop_back();
204 if( !m_AliPiece.empty() && m_options.
GetCutNs() ) {
205 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
206 int pos = it->end - 1;
207 for(; pos >= it->beg &&
nuc[pos] ==
'N' ; --pos);
209 it = m_AliPiece.erase(it);
219 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
220 int pos = it->end - 1;
228 it = m_AliPiece.erase(it);
237 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
239 int pos = it->end - 1;
242 it = m_AliPiece.erase(it);
248 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
253 it = m_AliPiece.erase(it);
264 if( !m_AliPiece.empty() ) {
275 list<CNPiece> m_AliPiece;
276 const string&
match = match_all_pos;
279 if(
n == string::npos ||
n >= (
unsigned)pc.
end)
return m_AliPiece;
282 string::size_type beg =
n;
286 for(;
n<(unsigned)pc.
end; ++
n) {
290 m_AliPiece.push_back(
CNPiece(beg,
n, 0, efflen));
297 m_AliPiece.push_back(
CNPiece(beg,
n, efflen, efflen));
314 m_AliPiece.push_back(
CNPiece(beg,
n, efflen, efflen));
317 list<CNPiece>::iterator itb, ite, itc;
318 list<CNPiece>::size_type pnum = m_AliPiece.size() + 1;
319 while(pnum > m_AliPiece.size()) {
320 pnum = m_AliPiece.size();
322 for(itb = m_AliPiece.begin(); ; ) {
324 int slen = 0, spos = 0;
327 while(itc != m_AliPiece.end()) {
328 if(m_options.
Bad(itc))
break;
331 if(m_options.
Dropof(slen, spos, itb))
break;
333 if(m_options.
Perc(itc, slen, spos, itb)) {
334 if(m_options.
BackCheck(itb, itc)) ite = itc;
341 m_options.
Join(itb, ite);
342 m_AliPiece.erase(itb, ite);
346 if(itb == m_AliPiece.end())
break;
350 itb = m_AliPiece.end();
352 while(itb != m_AliPiece.begin()) {
354 int slen = 0, spos = 0;
356 while(itc != m_AliPiece.begin()) {
358 if(m_options.
Bad(itc))
break;
361 if(m_options.
Dropof(slen, spos, itb))
break;
363 if(m_options.
Perc(itc, slen, spos, itb)) {
364 if(m_options.
ForwCheck(itc, itb)) ite = itc;
370 m_options.
Join(ite, itb);
371 m_AliPiece.erase(ite, itb);
373 if(itb == m_AliPiece.begin())
break;
379 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
380 if(it->posit == 0) it = m_AliPiece.erase(it);
381 else if (it->efflen < m_options.
GetMinGoodLen()) it = m_AliPiece.erase(it);
390 const string&
match = match_all_pos;
392 vector<pair<int, int> > exons;
394 bool in_exon =
false;
398 exons.push_back(make_pair(
n, 0));
403 exons.back().second =
n;
407 int cur_beg = pc.
beg;
408 for(vector<pair<int, int> >::iterator eit = exons.begin(); eit != exons.end(); ++eit) {
411 int len = eit->second - eit->first;
412 for(
int i = eit->first; i < eit->second; ++
i) {
422 for(
n = eit->first - 1;
n > cur_beg; --
n) {
426 if(
n > cur_beg) alip.push_back(
CNPiece(cur_beg,
n, 0, 0));
428 for(
n = eit->second;
n < pc.
end; ++
n) {
434 if(cur_beg < pc.
end) alip.push_back(
CNPiece(cur_beg, pc.
end, 0, 0));
442 const string&
nuc = alignment_text.
GetDNA();
445 int cnuc = 0, cprot = 0, cmax = 18;
451 score -= scoring.
sm_Ig;
487 if(cnuc >= cmax && cprot >= cmax)
break;
490 score -= scoring.
sm_Ig;
525 if(
Dropof(efflen, posit, add))
return false;
526 if(
GetTotalPositives()*(efflen+cur->efflen+add->efflen) > 100*(posit+cur->posit+add->posit))
return false;
532 int posit =
last->posit;
533 int efflen =
last->efflen;
534 for(list<prosplign::CNPiece>::iterator it1 = it; it1 !=
last; ++it1) {
536 efflen += it1->efflen;
539 last->efflen = efflen;
545 int efflen = it1->efflen;
546 int pos = it1->posit;
549 if(
Dropof(efflen, pos, it1))
return false;
550 efflen += it1->efflen;
553 efflen += it1->efflen;
561 int efflen = it2->efflen;
562 int pos = it2->posit;
565 if(
Dropof(efflen, pos, it2))
return false;
566 efflen += it2->efflen;
569 efflen += it2->efflen;
585 if ((*field)->CanGetLabel() && (*field)->GetLabel().IsStr() && (*field)->GetLabel().GetStr()==
"CompartmentId") {
586 return (*field)->GetData().GetInt();
598 int compartment_id = GetCompNum(seqalign);
602 int nuc_from =
bounds.GetFrom()+1;
603 int nuc_to =
bounds.GetTo()+1;
606 out<<endl<<
"************************************************************************"<<endl;
607 out<<
"************************************************************************"<<endl;
608 out<<
"************************************************************************"<<endl;
609 out<<compartment_id<<
"\t"<<contig_name<<
"\t"<<prot_id<<
"\t"<<nuc_from<<
"\t"<<nuc_to<<
"\t";
610 out<<(is_plus_strand?
'+':
'-')<<endl;
613 const string& dna = align_text.GetDNA();
614 const string& translation = align_text.GetTranslation();
615 string match = align_text.GetMatch();
616 const string& protein = align_text.GetProtein();
618 good_parts.append(
match.size(),
'*');
619 for (
size_t i = 0;
i <
match.size(); ++
i) {
621 match[
i] = good_parts[
i] =
' ';
624 int npos1 = is_plus_strand?nuc_from:nuc_to;
626 int prot_beg_pos =
static_cast<int>(protein.find_first_not_of(
GAP_CHAR));
627 int prot_end_pos =
static_cast<int>(protein.find_last_not_of(
GAP_CHAR));
629 for(
int i=0;
i<prot_end_pos;
i+=width) {
630 int apos =
i+width-1;
631 if (apos >= (
int)dna.length()) {
632 apos = (
int)dna.length() - 1;
636 #ifdef NCBI_COMPILER_WORKSHOP
639 int real_bases = width-gaps;
641 int real_bases =
static_cast<int>(width-
count(dna.begin()+
i, dna.begin()+(
i+width),
GAP_CHAR));
644 int npos2 = is_plus_strand?npos1+real_bases-1:npos1-(real_bases-1);
647 if (apos > prot_beg_pos) {
648 out.setf(IOS_BASE::left, IOS_BASE::adjustfield);
650 out<<setw(12)<<npos1<<dna.substr(
i, width)<<
" "<<npos2<<endl;
652 out<<setw(12)<<
"-"<<dna.substr(
i, width)<<
" "<<
"-"<<endl;
654 out<<setw(12)<<
" "<<translation.substr(
i, width)<<endl;
655 out<<setw(12)<<
" "<<
match.substr(
i, width)<<endl;
656 out<<setw(12)<<
" "<<protein.substr(
i, width)<<endl;
657 out<<setw(12)<<
" "<<good_parts.substr(
i, width)<<endl;
660 npos1 = is_plus_strand?npos2+1:npos2-1;
669 CAliChunk(
TSeqPos ali_pos,
TSeqPos nuc_pos,
TSeqPos prot_pos, CSpliced_seg::TExons::iterator exon_iter, CSpliced_exon::TParts::iterator chunk_iter) :
670 m_nuc_pos(nuc_pos), m_prot_pos(prot_pos), m_exon_iter(exon_iter), m_chunk_iter(chunk_iter), m_bad(
false)
694 m_ali_range =
TSeqRange(ali_pos, ali_pos +
max(m_nuc_len,m_prot_len)-1);
702 CSpliced_seg::TExons::iterator m_exon_iter;
703 CSpliced_exon::TParts::iterator m_chunk_iter;
707 typedef list<CAliChunk> TAliChunkCollection;
708 typedef TAliChunkCollection::iterator TAliChunkIterator;
716 int nuc_from =
bounds.GetFrom();
717 int nuc_to =
bounds.GetTo();
720 int alignment_pos = 0;
722 TAliChunkCollection chunks;
731 alignment_pos +=
max(prot_cur_start-prot_from, nuc_cur_start-nuc_from);
732 nuc_from = nuc_cur_start;
734 alignment_pos +=
max(prot_cur_start-prot_from, nuc_to-nuc_cur_end);
735 nuc_to = nuc_cur_end;
737 prot_from = prot_cur_start;
740 CAliChunk chunk(alignment_pos, strand ==
eNa_strand_plus?nuc_from:nuc_to, prot_from, e_it, p_it);
741 alignment_pos = chunk.m_ali_range.GetTo()+1;
742 prot_from += chunk.m_prot_len;
744 nuc_from += chunk.m_nuc_len;
746 nuc_to -= chunk.m_nuc_len;
749 chunks.push_back(chunk);
758 list<TSeqRange> InvertPartList(
const list<CNPiece>& good_parts,
TSeqRange total_range)
760 list<TSeqRange> bad_parts;
762 int tail_beg = total_range.
GetFrom();
763 int tail_end = total_range.
GetTo();
764 ITERATE(list<CNPiece>,
i, good_parts) {
765 if (tail_beg < i->beg)
766 bad_parts.push_back(
TSeqRange(tail_beg,
i->beg-1));
769 if (tail_beg <= tail_end)
770 bad_parts.push_back(
TSeqRange(tail_beg,tail_end));
806 _ASSERT( nuc_cur_end-nuc_cur_start+1 == nuc_len );
807 _ASSERT( prot_cur_end-prot_cur_start+1 == prot_len );
812 void SplitChunk(TAliChunkCollection& chunks, TAliChunkIterator iter,
TSeqPos start_of_second_chunk,
bool genomic_plus)
814 _DEBUG_CODE( TestExonLength(**iter->m_exon_iter); );
815 _ASSERT( iter->m_ali_range.GetFrom() < start_of_second_chunk );
816 _ASSERT( start_of_second_chunk <= iter->m_ali_range.GetTo());
817 _ASSERT( iter->m_nuc_len == iter->m_prot_len );
820 new_chunk->Assign(**iter->m_chunk_iter);
821 int first_len = start_of_second_chunk - iter->m_ali_range.GetFrom();
822 int second_len = iter->m_ali_range.GetTo() - start_of_second_chunk+1;
824 TAliChunkIterator first_iter = chunks.insert(iter, *iter);
827 iter->m_nuc_pos += first_len;
829 iter->m_nuc_pos -= first_len;
831 iter->m_prot_pos += first_len;
833 if (new_chunk->IsDiag()) {
834 new_chunk->SetDiag(first_len);
835 (*iter->m_chunk_iter)->SetDiag(second_len);
836 }
else if (new_chunk->IsMatch()) {
837 new_chunk->SetMatch(first_len);
838 (*iter->m_chunk_iter)->SetMatch(second_len);
839 }
else if (new_chunk->IsMismatch()) {
840 new_chunk->SetMismatch(first_len);
841 (*iter->m_chunk_iter)->SetMismatch(second_len);
844 first_iter->m_ali_range.SetTo(start_of_second_chunk-1);
845 iter->m_ali_range.SetFrom(start_of_second_chunk);
847 first_iter->m_nuc_len = first_iter->m_prot_len = first_len;
848 iter->m_nuc_len = iter->m_prot_len = second_len;
850 first_iter->m_chunk_iter = (*iter->m_exon_iter)->SetParts().insert(iter->m_chunk_iter, new_chunk);
852 _DEBUG_CODE( TestExonLength(**iter->m_exon_iter); );
857 exons.erase(exon_iter);
858 exon_iter = exons.end();
861 void DropExonHead(TAliChunkIterator chunk_iter,
bool genomic_plus)
867 size_t chunks_count = cur_exon->
GetParts().size();
870 cur_exon->
SetParts().erase(cur_exon->
SetParts().begin(), chunk_iter->m_chunk_iter);
894 size_t chunks_count = cur_exon->
GetParts().size();
898 new_exon->Assign(*cur_exon);
900 CSpliced_exon::TParts::iterator new_exon_chunk = new_exon->SetParts().begin();
902 if (old_exon_chunk==chunk_iter->m_chunk_iter)
907 new_exon->SetParts().erase(new_exon_chunk, new_exon->SetParts().end());
910 new_exon->SetGenomic_end(chunk_iter->m_nuc_pos-1);
912 new_exon->SetGenomic_start(chunk_iter->m_nuc_pos+1);
915 new_exon->SetPartial(
true);
916 if (new_exon->IsSetDonor_after_exon())
917 new_exon->ResetDonor_after_exon();
919 _ASSERT( new_exon->GetGenomic_start() <= new_exon->GetGenomic_end() );
920 _ASSERT( new_exon->GetProduct_start().AsSeqPos() <= new_exon->GetProduct_end().AsSeqPos() );
922 exons.insert(chunk_iter->m_exon_iter, new_exon);
924 DropExonHead(chunk_iter, genomic_plus);
926 _ASSERT( 0 < new_exon->GetParts().size() && 0 < cur_exon->
GetParts().size() );
927 _ASSERT( new_exon->GetParts().size()+cur_exon->
GetParts().size() == chunks_count );
935 void prosplign::SetScores(objects::CSeq_align& seq_align, objects::CScope& scope,
const string& matrix_name) {
937 const string&
prot = pro_text.GetProtein();
938 const string& dna = pro_text.GetDNA();
939 const string&
match = pro_text.GetMatch();
941 int pos = 0, ident = 0,
len = 0, neg = 0, pgap = 0, ngap = 0;
942 for(string::size_type
i=0;
i<
match.size(); ++
i) {
943 if( (prot[
i] !=
'.') && (
match[
i] !=
'X') ) {
947 }
else if(dna[
i] ==
'-') {
953 if(triple) ident +=3;
967 seq_align.SetNamedScore(
"num_ident", ident);
968 seq_align.SetNamedScore(
"num_positives", pos);
969 seq_align.SetNamedScore(
"num_negatives", neg);
970 seq_align.SetNamedScore(
"product_gap_length", pgap);
971 seq_align.SetNamedScore(
"genomic_gap_length", ngap);
972 seq_align.SetNamedScore(
"align_length",
len);
976 for(ibeg = 0; ibeg<(
int)(
prot.size()) && ( (
prot[ibeg] ==
'.') || (
match[ibeg] ==
'X') || (
prot[ibeg] ==
'-' ) ); ++ibeg) {}
977 for(iend =
prot.size() - 1; iend >=0 && ( (
prot[iend] ==
'.') || (
match[iend] ==
'X') || (
prot[iend] ==
'-' ) ); --iend) {}
978 for(
int i=ibeg;
i<=iend; ++
i) {
979 if( (prot[
i] !=
'.') && (
match[
i] !=
'X') ) {
985 seq_align.SetNamedScore(
"product_internal_gap_length", ipgap);
991 TAliChunkCollection chunks = ExtractChunks(scope, seq_align);
998 list<TSeqRange> bad_parts = InvertPartList(good_parts,
TSeqRange(chunks.front().m_ali_range.GetFrom(),chunks.back().m_ali_range.GetTo()));
1000 TAliChunkIterator chunk_iter = chunks.begin();
1002 ITERATE(list<TSeqRange>, bad_part, bad_parts) {
1003 while (chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetTo() < bad_part->GetFrom()) {
1007 if (chunk_iter == chunks.end())
1009 if (bad_part->GetTo() < chunk_iter->m_ali_range.GetFrom())
1012 if (chunk_iter->m_ali_range.GetFrom() < bad_part->GetFrom())
1013 SplitChunk(chunks, chunk_iter, bad_part->GetFrom(), genomic_plus);
1015 while (chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetTo() <= bad_part->GetTo())
1016 chunk_iter++->m_bad =
true;
1018 if (chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetFrom() <= bad_part->GetTo()) {
1019 chunk_iter->m_bad =
true;
1020 SplitChunk(chunks, chunk_iter, bad_part->GetTo()+1, genomic_plus);
1021 chunk_iter->m_bad =
false;
1025 CSpliced_seg::TExons::iterator prev_exon_iter = sps.
SetExons().end();
1028 while(chunk_it != chunks.end() && !chunk_it->m_bad) {
1029 prev_exon_iter = chunk_it->m_exon_iter;
1032 if (chunk_it == chunks.end())
1034 if (prev_exon_iter != chunk_it->m_exon_iter) {
1035 if ((*chunk_it->m_exon_iter)->IsSetAcceptor_before_exon())
1036 (*chunk_it->m_exon_iter)->ResetAcceptor_before_exon();
1038 SplitExon(sps.
SetExons(),chunk_it, genomic_plus);
1041 prev_exon_iter = chunk_it->m_exon_iter;
1042 TAliChunkIterator next_chunk_iter = chunk_it;
1044 while (next_chunk_iter != chunks.end() && next_chunk_iter->m_bad && next_chunk_iter->m_exon_iter==prev_exon_iter) {
1045 chunk_it = next_chunk_iter++;
1048 if (next_chunk_iter == chunks.end() || next_chunk_iter->m_exon_iter!=prev_exon_iter) {
1049 DropExon(sps.
SetExons(), prev_exon_iter);
1051 DropExonHead(next_chunk_iter, genomic_plus);
1057 TestExonLength(**e_it);
1067 : m_alignment_text(alignment_text) {
1068 const string& outp = alignment_text.
GetProtein();
1071 for (
size_t i = 1;
i <
match.size()-1; ++
i) {
1092 if(pbeg == string::npos)
return beg;
1093 if( pbeg >= beg )
return beg;
1094 int ali_len = (
int)(beg - pbeg);
1096 if( ali_len > 36 )
return beg;
1099 int mismatch_cnt = 0;
1101 for(
size_t i = pbeg;
i < beg; ++
i) {
1105 if( in_gap != -1 ) {
1123 if( gap_cnt == 0 && mismatch_cnt < 10)
return pbeg;
1124 if( gap_cnt < 3 && 100 * posit_cnt >= 60 * ali_len )
return pbeg;
1125 if( gap_cnt < 2 && 100 * posit_cnt >= 50 * ali_len )
return pbeg;
1137 if(pend == string::npos)
return end;
1140 if( end >= pend )
return end;
1141 int ali_len = (
int)(pend-end);
1142 if( ali_len > 36 )
return end;
1145 int mismatch_cnt = 0;
1147 for(
size_t i = end;
i<pend; ++
i) {
1150 if( tran_row[
i] ==
'*' )
return end;
1152 if( in_gap != -1 ) {
1170 if( gap_cnt == 0 && mismatch_cnt < 10)
return pend;
1171 if( gap_cnt < 3 && 100 * posit_cnt >= 60 * ali_len )
return pend;
1172 if( gap_cnt < 2 && 100 * posit_cnt >= 50 * ali_len )
return pend;
1192 bool keep_trimming =
true;
1196 while ( keep_trimming ) {
1198 int begpos = pc.
beg;
1199 int endpos = pc.
end;
1201 double cur_max_drop = 0;
1202 int cur_cut = begpos;
1204 int cur_pos = begpos;
1215 int ps_dna_gap_len = 0;
1216 int ps_prot_gap_len = 0;
1222 if( cur_end >= endpos )
return pc.
beg;
1224 for(
int pos = cur_pos; pos < cur_end; ++pos ) {
1240 if( max_cut_len < cur_pos - begpos + 1 ) {
1257 ps_len += ps_len_increment;
1258 ps_pos += ps_len_increment;
1259 ps_prot_gap_len = 0;
1261 }
else if( dna[cur_pos] ==
GAP_CHAR ) {
1263 if( ps_dna_gap_len < 3 ) {
1264 ps_len += ps_len_increment;
1269 ps_prot_gap_len = 0;
1271 if( ps_prot_gap_len < 3 ) {
1272 ps_len += ps_len_increment;
1279 ps_len += ps_len_increment;
1280 ps_prot_gap_len = 0;
1287 double posit_drop = rposit/(double)
window_size - ps_pos/(
double)ps_len;
1288 if( posit_drop >= dropoff && ( posit_drop > cur_max_drop || cur_cut == begpos ) ) {
1289 cur_max_drop = posit_drop;
1292 }
while( cur_end < endpos );
1294 if( cur_cut == begpos ) {
1295 keep_trimming =
false;
1302 for( ; cur_cut < endpos; ++cur_cut ) {
1307 if( cur_cut >= endpos )
return pc.
beg;
1314 for( ; cur_cut >= begpos; --cur_cut) {
1320 if( cur_cut <= begpos )
return pc.
beg;
1344 bool keep_trimming =
true;
1346 while ( keep_trimming ) {
1348 int begpos = pc.
beg;
1349 int endpos = pc.
end;
1351 double cur_max_drop = 0;
1352 int cur_cut = endpos;
1354 int win_end = endpos;
1366 int ps_dna_gap_len = 0;
1367 int ps_prot_gap_len = 0;
1373 for(
int pos = win_beg; pos < win_end; ++pos ) {
1383 while( win_beg > begpos ) {
1392 if( max_cut_len < endpos - win_end ) {
1409 int cur_pos = win_end;
1413 ps_len += ps_len_increment;
1414 ps_pos += ps_len_increment;
1415 ps_prot_gap_len = 0;
1417 }
else if( dna[cur_pos] ==
GAP_CHAR ) {
1419 if( ps_dna_gap_len < 3 ) {
1420 ps_len += ps_len_increment;
1425 ps_prot_gap_len = 0;
1427 if( ps_prot_gap_len < 3 ) {
1428 ps_len += ps_len_increment;
1435 ps_len += ps_len_increment;
1436 ps_prot_gap_len = 0;
1441 double posit_drop = wposit/(double)
window_size - ps_pos/(
double)ps_len;
1442 if( posit_drop >= dropoff && ( posit_drop > cur_max_drop || cur_cut == endpos ) ) {
1443 cur_max_drop = posit_drop;
1448 if( cur_cut == endpos ) {
1449 keep_trimming =
false;
1455 for( --cur_cut; cur_cut >= begpos; --cur_cut ) {
1461 if( cur_cut <= begpos )
return pc.
end;
1468 for( ; cur_cut < endpos; ++cur_cut ) {
1473 if(cur_cut >= endpos)
return pc.
end;
CRef< CProduct_pos > NultriposToProduct_pos(int nultripos)
Convert linear coordinate into (amin,frame)
list< CNPiece > ExcludeBadExons(const CNPiece pc, const string &match_all_pos, const string &protein, CProSplignOutputOptionsExt m_options)
const char BAD_OR_MISMATCH[]
list< CNPiece > FindGoodParts(const CProteinAlignText &alignment_text, CProSplignOutputOptionsExt m_options, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
const char BAD_PIECE_CHAR
USING_SCOPE(ncbi::objects)
bool TrimNegativeTail(CNPiece &pc, const CProteinAlignText &alignment_text, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
const char INTRON_OR_GAP[]
void RefineAlignment(objects::CScope &scope, objects::CSeq_align &seq_align, const list< CNPiece > &good_parts)
void SetScores(objects::CSeq_align &seq_align, objects::CScope &scope, const string &matrix_name="BLOSUM62")
CNPiece(string::size_type obeg, string::size_type oend, int oposit, int oefflen)
Extended output filtering parameters deprecated, used in older programs.
CProSplignOutputOptionsExt(const CProSplignOutputOptions &options)
bool Perc(list< prosplign::CNPiece >::iterator it, int efflen, int posit, list< prosplign::CNPiece >::iterator last)
bool ForwCheck(list< prosplign::CNPiece >::iterator it1, list< prosplign::CNPiece >::iterator it2)
void Join(list< prosplign::CNPiece >::iterator it, list< prosplign::CNPiece >::iterator last)
bool Bad(list< prosplign::CNPiece >::iterator it)
bool BackCheck(list< prosplign::CNPiece >::iterator it1, list< prosplign::CNPiece >::iterator it2)
bool Dropof(int efflen, int posit, list< prosplign::CNPiece >::iterator it)
Output filtering parameters.
bool GetCutFlankPartialCodons() const
int GetTotalPositives() const
int GetCutFlanksWithPositGapRatio() const
int GetCutFlanksWithPositWindow() const
int GetCutFlanksWithPositMaxLen() const
bool GetFillHoles() const
bool GetCutFlanksWithPositDrop() const
int GetMinHoleLen() const
int GetMinFlankingExonLen() const
int GetMinGoodLen() const
int GetFlankPositives() const
int GetCutFlanksWithPositDropoff() const
int GetMinExonPos() const
int GetStartBonus() const
bool IsPassThrough() const
static void Output(const objects::CSeq_align &seqalign, objects::CScope &scope, ostream &out, int width, const string &matrix_name="BLOSUM62")
Outputs formatted text.
size_t RestoreThreePrime(size_t end) const
CProSplignTrimmer(const CProteinAlignText &alignment_text)
CProSplignTrimmer implementation.
size_t RestoreFivePrime(size_t beg) const
checks if alignment ends should be restored beyond 'beg' or 'end' returns new flanking coord or 'beg'...
int CutFromRight(CNPiece pc, const CProSplignOutputOptionsExt &options) const
trim right flank with positives dropoff over a cutoff, iterative 'pc' should not be dropped completel...
int CutFromLeft(CNPiece pc, const CProSplignOutputOptionsExt &options) const
trim flanks with positives dropoff over a cutoff, iterative flank 'good pieces' should not be dropped...
const CProteinAlignText & m_alignment_text
Text representation of ProSplign alignment.
const string & GetDNA() const
const string & GetMatch() const
const string & GetProtein() const
static CRef< objects::CSeq_loc > GetGenomicBounds(objects::CScope &scope, const objects::CSeq_align &seqalign)
const string & GetTranslation() const
Substitution Matrix for Scoring Amino-Acid Alignments.
int ScaledScore(char amin1, char amin2) const
static const char * bounds[]
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define _DEBUG_CODE(code)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
const TData & GetData(void) const
Get the Data member data.
vector< CRef< CUser_field > > TData
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
void SetProduct_start(TProduct_start &value)
Assign a value to Product_start data member.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TExons & SetExons(void)
Assign a value to Exons data member.
TDiag GetDiag(void) const
Get the variant data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CUser_object > > TExt
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
void SetPartial(TPartial value)
Assign a value to Partial data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
void ResetAcceptor_before_exon(void)
Reset Acceptor_before_exon data member.
TParts & SetParts(void)
Assign a value to Parts data member.
bool IsDiag(void) const
Check if variant Diag is selected.
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CSpliced_exon_chunk > > TParts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)