43 int from = ext_rna->exons[0].from;
44 int to = ext_rna->exons[ext_rna->exons.size()-1].to;
46 for(;seq!=seqs.end(); seq++, nseq++)
48 int from2 = seq->exons[0].from;
49 int to2 = seq->exons[seq->exons.size()-1].to;
50 bool over_origin = seq->exons.size()>1 && to2-from2 >
m_length/2;
53 <<
"[" << ext_rna_range <<
"]"
54 <<
"[" << ext_rna_range2 <<
"]" <<
", trying..." <<
NcbiEndl;
56 if(to2>=from || over_origin)
60 if(from2<=to || over_origin)
64 TSimpleSeqs::iterator seq2 = seq;
65 for(;seq2!=seqs.end(); seq2++)
67 int from2 = seq2->exons[0].from;
71 <<
"[" << ext_rna_range <<
"]"
72 <<
"[" << ext_rna_range2 <<
"]" <<
", trying 2..." <<
NcbiEndl;
75 overlaps(ext_rna, seq2, this_overlap);
77 <<
"[" << ext_rna_range <<
"]"
78 <<
"[" << ext_rna_range2 <<
"]" <<
", overlap = " << this_overlap <<
NcbiEndl;
82 best_seq.push_back(*seq2);
96 int from = ext_rna->exons[0].from;
97 int to = ext_rna->exons[ext_rna->exons.size()-1].to;
98 CNcbiStrstream ext_rna_range_stream; ext_rna_range_stream << from <<
"..." << to <<
'\0';
99 string ext_rna_range = ext_rna_range_stream.str();
100 TSimpleSeqs::iterator& best_seq = seq;
101 for(;seq!=seqs.end(); seq++, nseq++)
103 int from2 = seq->exons[0].from;
104 int to2 = seq->exons[seq->exons.size()-1].to;
105 CNcbiStrstream ext_rna_range_stream2; ext_rna_range_stream2 << from2 <<
"..." << to2 <<
'\0';
106 string ext_rna_range2 = ext_rna_range_stream2.str();
108 <<
"[" << ext_rna_range <<
"]"
109 <<
"[" << ext_rna_range2 <<
"]" <<
", trying..." <<
NcbiEndl;
117 TSimpleSeqs::iterator seq2 = seq;
118 for(;seq2!=seqs.end(); seq2++)
120 int from2 = seq2->exons[0].from;
121 int to2 = seq2->exons[seq->exons.size()-1].to;
122 CNcbiStrstream ext_rna_range_stream2; ext_rna_range_stream2 << from2 <<
"..." << to2 <<
'\0';
123 string ext_rna_range2 = ext_rna_range_stream2.str();
125 <<
"[" << ext_rna_range <<
"]"
126 <<
"[" << ext_rna_range2 <<
"]" <<
", trying 2..." <<
NcbiEndl;
129 overlaps(ext_rna, seq2, this_overlap);
131 <<
"[" << ext_rna_range <<
"]"
132 <<
"[" << ext_rna_range2 <<
"]" <<
", overlap = " << this_overlap <<
NcbiEndl;
133 if(this_overlap>overlap)
135 overlap=this_overlap;
149 for(TSimplePairs::const_iterator e1=seq1->exons.begin(); e1!=seq1->exons.end(); e1++)
151 for(TSimplePairs::const_iterator e2=seq2->exons.begin(); e2!=seq2->exons.end(); e2++)
153 int o =
min(e2->to, e1->to)-
max(e1->from, e2->from)+1;
171 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
191 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
194 lres =
overlaps_na((*gen_feature)->GetData().GetFtable());
225 getFromTo(seq_interval, from2, to2, strand2);
230 if( !(*f1)->GetData().IsRna() )
continue;
232 bool lres=
overlaps(seq_interval, (*f1)->GetLocation(), overlap);
239 if(trna_type.size()>0) name1 = trna_type;
246 getFromTo((*f1)->GetLocation(), from1, to1, strand1);
247 int min1, min2, max1, max2;
248 min1 =
min(from1, to1);
249 min2 =
min(from2, to2);
250 max1 =
max(from1, to1);
251 max2 =
max(from2, to2);
257 int left_frame = (from1-1)%3+1;
258 int right_frame = (from2-1)%3+1;
270 report->
space = overlap;
273 report->
loc1 = &((*f1)->GetLocation());
274 report->
loc2 = &seq_interval;
276 char bufferchar[20480]; memset(bufferchar, 0, 20480);
277 strstream
buffer(bufferchar, 20480);
282 <<
"potential RNA location ("
283 << name1 <<
") that overlaps protein (" <<
get_title(seq) <<
")" <<
'\0';
286 buff_misc_feat_protein
287 <<
"potential protein location ("
288 <<
get_title(seq) <<
") that overlaps RNA (" << name1 <<
")" <<
'\0';
292 misc_feat_rna << buff_misc_feat_rna.str() <<
'\0';
294 misc_feat_protein << buff_misc_feat_protein.str() <<
'\0';
296 if(
PrintDetails())
NcbiCerr <<
"overlaps_prot_na[seq,feats]: created RNA buffer: " << buff_misc_feat_rna.str() <<
"\n";
297 if(
PrintDetails())
NcbiCerr <<
"overlaps_prot_na[seq,feats]: created protein buffer: " << buff_misc_feat_protein.str() <<
"\n";
299 m_diag[n1].problems.push_back(problem);
305 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: RNA location "
306 << n1 <<
" marked for deletion (pseudo)" <<
"\n";
312 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: RNA location "
313 << n1 <<
" marked for deletion (atypical)" <<
"\n";
328 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: CDS and gene "
329 << n2 <<
" marked for deletion (hypothetical)" <<
"\n";
343 m_diag[removen].problems.push_back(problemCOH);
348 m_diag[removen].problems.push_back(problemCOH);
351 <<
"[" << removen <<
"]"
352 <<
" is marked for removal"
361 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for " << removen <<
NcbiEndl;
387 if ( !(*f1)->GetData().IsRna() )
continue;
393 if ( !(*f1)->GetData().GetRna().CanGetExt() )
continue;
394 try { type1 =
Get3type((*f1)->GetData().GetRna());}
397 NcbiCerr <<
"overlaps_na[feats]: FATAL: cannot get aminoacid type for one trna feats" <<
NcbiEndl;
405 if(type1.size()==0)
continue;
425 string n2=
"not gene";
466 template <
typename t1,
typename t2>
bool
482 TSeqPos from1, to1, from2, to2;
484 int min1, min2, max1, max2;
486 min1 =
min(from1, to1);
487 max1 =
max(from1, to1);
491 min2 =
min(from2, to2);
492 max2 =
max(from2, to2);
493 int overlap_start, overlap_end;
494 overlap_end =
min(max1, max2);
495 overlap_start =
max(min1, min2);
497 bool result2 = overlap_end >= overlap_start;
498 if(!result2)
continue;
499 overlap+=overlap_end - overlap_start + 1;
516 int min1, min2, max1, max2;
517 min2 =
min(from2, to2);
518 max2 =
max(from2, to2);
523 min1 =
min(from1, to1);
524 max1 =
max(from1, to1);
525 int overlap_start, overlap_end;
526 overlap_end =
min(max1, max2);
527 overlap_start =
max(min1, min2);
529 bool result2 = overlap_end >= overlap_start;
531 overlap+=overlap_end - overlap_start + 1;
545 TSeqPos from1, to1, from2, to2;
547 int min1, min2, max1, max2;
548 getFromTo( *i1, from1, to1, strand1);
549 min1 =
min(from1, to1);
550 max1 =
max(from1, to1);
554 getFromTo( *i2, from2, to2, strand2);
556 min2 =
min(from2, to2);
557 max2 =
max(from2, to2);
558 if(min2<=min1 && max2>=max1)
560 if(PrintDetails())
NcbiCerr <<
"complete_overlap: "
561 << from1 <<
" ... " << to1 <<
" "
562 << from2 <<
" ... " << to2 <<
" "
599 if(!hasGenomicLocation(left))
return result;
600 if(!hasGenomicLocation(right))
return result;
601 const CSeq_loc& left_genomic_int = getGenomicLocation(left);
602 const CSeq_loc& right_genomic_int = getGenomicLocation(right);
605 TSeqPos from1, to1, from2, to2;
608 getFromTo(left_genomic_int, from1, to1, left_strand);
609 getFromTo(right_genomic_int, from2, to2, right_strand);
612 int left_frame=-0xFF, right_frame=-0xFF;
613 if(left_genomic_int.
IsInt())
615 left_frame = (from1-1)%3+1;
617 if(right_genomic_int.
IsInt())
619 right_frame = (from2-1)%3+1;
631 (
min((
int)to1, (
int)to2)-
632 max((
int)from2, (
int)from1)
636 bool complete_overlaps =
false;
638 result = overlaps(left_genomic_int, right_genomic_int, scratch_overlap);
639 bool left_covered_by_right=
false;
640 bool right_covered_by_left=
false;
641 if(
result) complete_overlaps = (left_covered_by_right=complete_overlap(left_genomic_int, right_genomic_int))
642 || (right_covered_by_left=complete_overlap(right_genomic_int, left_genomic_int));
643 if(PrintDetails())
NcbiCerr <<
"space = " << space
644 <<
", complete_overlap = " << complete_overlaps
645 <<
", result = " <<
result
647 if(
result && scratch_overlap >= m_cds_overlapThreshold)
660 report->
space = space;
664 char bufferchar[20480]; memset(bufferchar, 0, 20480);
665 strstream
buffer(bufferchar, 20480);
666 printOverlapReport(report,
buffer);
677 <<
"potential protein location (" << GetProtName(left)
678 <<
") that overlaps protein (" << GetProtName(right) <<
")" <<
NcbiEndl <<
'\0';
682 <<
"potential protein location (" << GetProtName(right)
683 <<
") that overlaps protein (" << GetProtName(left) <<
")" <<
NcbiEndl <<
'\0';
691 if(complete_overlaps)
694 if(report->
q_name_left.find(
"hypothetical")!=string::npos && left_covered_by_right && !right_covered_by_left)
696 NcbiCerr <<
"CReadBlastApp::overlaps: WARNING: sequence of a hypothetical protein "
697 <<
"[" << qname <<
"]"
698 <<
" is marked for removal because of a complete overlap"
702 m_diag[qname].problems.push_back(problemCOH);
703 m_diag[qname].problems.push_back(problemCO);
711 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for "
715 if(report->
q_name_right.find(
"hypothetical")!=string::npos && right_covered_by_left)
717 NcbiCerr <<
"CReadBlastApp::overlaps: WARNING: sequence of a hypothetical protein "
718 <<
"[" << qrname <<
"]"
719 <<
" is marked for removal because of a complete overlap"
723 m_diag[qrname].problems.push_back(problemCOH);
724 m_diag[qrname].problems.push_back(problemCO);
732 NcbiCerr <<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for "
739 m_diag[qname].problems.push_back(problemO_l);
740 m_diag[qrname].problems.push_back(problemO_r);
747 m_diag[qname].problems.push_back(problemO_l);
748 m_diag[qrname].problems.push_back(problemO_r);
void GetLabel(string *label) const
static bool PrintDetails(int current_verbosity=m_current_verbosity)
static string GetProtName(const CBioseq &seq)
CConstBeginInfo ConstBegin(void)
void GetLocMap(LocMap &loc_map, const CSeq_annot::C_Data::TFtable &feats)
static void IncreaseVerbosity(void)
bool overlaps_na(const CBioseq::TAnnot &annots)
static void printOverlapReport(distanceReportStr *report, ostream &out=NcbiCout)
int find_overlap(TSimpleSeqs::iterator &seq, const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs &seqs, int &overlap)
bool complete_overlap(const CSeq_loc &l1, const CSeq_loc &l2)
static int m_rna_overlapThreshold
static void DecreaseVerbosity(void)
int overlaps(const TSimpleSeqs::iterator &seq1, const TSimpleSeqs::iterator &seq2, int &overlap)
static const CSeq_loc & getGenomicLocation(const CBioseq &seq)
bool overlaps_prot_na(CBioseq &seq, const CBioseq::TAnnot &annots)
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
bool match_na(const CSeq_feat &f1, const string &type1)
void append_misc_feature(CBioseq_set::TSeq_set &seqs, const string &name, EProblem problem_type)
namespace ncbi::objects::
Template class for iteration on objects of class C (non-medifiable version)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
@ eOverlap
CSeq_locs overlap.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
const TGene & GetGene(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
bool IsInt(void) const
Check if variant Int is selected.
list< CRef< CSeq_entry > > TSeq_set
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
unsigned int
A callback function used to compare two keys in a database.
@ eMyFeatureType_normal_tRNA
@ eMyFeatureType_atypical_tRNA
@ eMyFeatureType_pseudo_tRNA
@ eMyFeatureType_hypo_CDS
string get_trna_string(const CSeq_feat &feat)
string GetStringDescr(const CBioseq &bioseq)
string Get3type(const CRNA_ref &rna)
EMyFeatureType get_my_feat_type(const CSeq_feat &feat, const LocMap &loc_map)
string get_title(const CBioseq &seq)
EMyFeatureType get_my_seq_type(const CBioseq &seq)
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
CBioseq_set::TSeq_set * get_parent_seqset(const CBioseq &seq)
string GetRNAname(const CSeq_feat &feat)
list< TSimpleSeq > TSimpleSeqs
string GetRRNAtype(const CRNA_ref &rna)
string printed_range(const TSeqPos from2, const TSeqPos to2)
CRef< const CSeq_loc > loc1
CRef< const CSeq_loc > loc2