49 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
73 if( !(*f1)->GetData().IsRna() )
continue;
77 if( !(*f1)->GetData().GetRna().CanGetExt() )
79 NcbiCerr <<
"CReadBlastApp::CheckMissingRibosomalRNA[feats]: FATAL: no ext feature in rRNA" <<
NcbiEndl;
83 if(
type ==
"5S") {has5S =
true; }
84 if(
type ==
"16S") {has16S =
true; }
85 if(
type ==
"23S") {has23S =
true; }
89 NcbiCerr <<
"CReadBlastApp::CheckMissingRibosomalRNA[feats]: ERROR: 5S ribosomal RNA is missing" <<
NcbiEndl;
91 NcbiCerr <<
"CReadBlastApp::CheckMissingRibosomalRNA[feats]: ERROR: 16S ribosomal RNA is missing" <<
NcbiEndl;
93 NcbiCerr <<
"CReadBlastApp::CheckMissingRibosomalRNA[feats]: ERROR: 23S ribosomal RNA is missing" <<
NcbiEndl;
100 TSimpleSeqs::iterator& ext_rna,
101 TSimpleSeqs::iterator& first_user_in_range, TSimpleSeqs::iterator& first_user_non_in_range,
103 TSimpleSeqs::iterator& first_ext_in_range, TSimpleSeqs::iterator& first_ext_non_in_range,
108 if(first_user_in_range==seqs.end())
110 NcbiCerr <<
"ugly_simple_overlaps_call: first_user_in_range is already at the end" <<
NcbiEndl;
124 if(first_user_in_range==seqs.end())
126 NcbiCerr <<
"ugly_simple_overlaps_call: after call: first_user_in_range is already at the end" <<
NcbiEndl;
136 for(TSimpleSeqs::iterator entry = first_ext_in_range; entry!= first_ext_non_in_range; entry++)
138 if(entry==ext_rna)
continue;
141 for(TSimpleSeqs::iterator entry = first_user_in_range; entry!=first_user_non_in_range; entry++)
157 TSimpleSeqs::iterator first_user_in_range = seqs.begin();
158 TSimpleSeqs::iterator first_user_non_in_range = seqs.begin();
159 TSimpleSeqs::iterator first_ext_in_range =
m_extRNAtable2.begin();
160 TSimpleSeqs::iterator first_ext_non_in_range =
m_extRNAtable2.begin();
161 TSimpleSeqs::iterator seq = seqs.begin();
165 from = ext_rna->exons[0].from;
166 to = ext_rna->exons[ext_rna->exons.size()-1].to;
168 int range_scale = to - from;
170 string type2 = ext_rna->name;
177 string diag_name = ext_rna->name;
179 int n_user_neighbors=0;
int n_ext_neighbors = 0;
string bufferstr=
"";
188 <<
"[" << ext_rna_range <<
"]"
189 <<
"[" << seq2_range <<
"]"
192 <<
"Overlap = " << overlap
198 absent = absent && (!overlap || ext_rna->type != seq2->type);
199 bool bad_strand = (overlap>0 && ext_rna->type == seq2->type && strand != seq2->exons[0].strand);
200 if(!bad_strand)
continue;
201 string diag_name2 = seq2->name;
203 from2 = seq2->exons[0].from;
204 to2 = seq2->exons[seq2->exons.size()-1].to;
206 if(!bufferstr.size())
210 if(first_user_in_range==seqs.end())
212 NcbiCerr <<
"simple_overlaps: first_user_in_range is already at the end" <<
NcbiEndl;
220 ext_rna, first_user_in_range, first_user_non_in_range, seqs, max_distance,
221 first_ext_in_range, first_ext_non_in_range, bufferstr);
226 misc_feat <<
"RNA does not match strand for feature located at " << seq_range <<
NcbiEndl;
229 problemStr problem = {trnaStrandProblem, bufferstr, misc_feat.str(),
"",
"", from2, to2, strand};
230 m_diag[diag_name2].problems.push_back(problem);
233 <<
"eTRNABadStrand" <<
"\t"
237 problemStr problem2 = {trnaStrandProblem, bufferstr,
"",
"",
"", from, to, strand};
238 m_diag[diag_name].problems.push_back(problem2);
244 if(!bufferstr.size())
247 ext_rna, first_user_in_range, first_user_non_in_range, seqs, max_distance,
248 first_ext_in_range, first_ext_non_in_range, bufferstr);
251 misc_feat <<
"no RNA in the input this type: " <<type2 <<
"[" << ext_rna_range <<
"]" <<
NcbiEndl;
254 m_diag[diag_name].problems.push_back(problem);
257 <<
"eTRNAAbsent" <<
"\t"
261 m_diag[diag_name].problems.push_back(problem2);
265 if(first_user_in_range==seqs.end()) first_user_in_range=seq;
276 const TSimpleSeqs::iterator& ext_rna,
277 TSimpleSeqs::iterator& first_user_in_range, TSimpleSeqs::iterator& first_user_non_in_range,
282 int from = ext_rna->exons[0].from;
283 int to = ext_rna->exons[ext_rna->exons.size()-1].to;
285 bool first_in_range_set =
false;
289 if(first_user_in_range==seqs.end())
291 NcbiCerr <<
"get_neighboring_sequences: first_user_in_range is already at the end" <<
NcbiEndl;
295 NcbiCerr <<
"get_neighboring_sequences: first_user_in_range = "
300 TSimpleSeqs::iterator seq = first_user_in_range;
301 for(; seq !=seqs.end(); seq++)
304 int from2 = seq->exons[0].from;
305 int to2 = seq->exons[seq->exons.size()-1].to;
308 NcbiCerr <<
"get_neighboring_sequences: WARNING: span of annotation "
309 << seq->locus_tag <<
""
310 <<
"[" << seq->name<<
"],"
311 <<
"[" << seq->description<<
"]"
312 <<
" is > 50000, probably a break in a circular molecule cutting across the annotation. This annotation will be ignored." <<
NcbiEndl;
318 NcbiCerr <<
"get_neighboring_sequences: first_in_range_set = " << first_in_range_set <<
NcbiEndl;
321 if(proximity<0)
continue;
324 if(!first_in_range_set) { first_user_in_range = seq; first_in_range_set=
true; }
334 first_user_non_in_range = seq;
335 if(!first_in_range_set) {first_user_in_range = first_user_non_in_range = seqs.end();}
336 if(first_user_non_in_range==seqs.end())
340 if(first_user_in_range==seqs.end())
349 const int from,
const int to,
const int key)
352 int range_scale = target_to - target_from;
359 int neighbor_factor = 10;
361 int max_range = 5000;
362 int max_distance = range_scale * neighbor_factor;
363 if(max_distance < min_range) max_distance = min_range;
364 if(max_distance > max_range) max_distance = max_range;
369 const int from,
const int to,
const int key,
const int max_distance)
371 if(to < target_from - max_distance )
return -1;
372 if(from > target_to + max_distance )
return +1;
377 const int max_distance)
383 << max_distance <<
"\t"
384 << ext_rna->type <<
"\t"
385 << ext_rna->name <<
"(" << ext_rna->locus_tag <<
")" <<
"\t"
386 << ext_rna->exons[0].from <<
"\t"
387 << ext_rna->description <<
"\t"
static int m_verbosity_threshold
static bool PrintDetails(int current_verbosity=m_current_verbosity)
static int sequence_proximity(const int target_from, const int target_to, const int from, const int to, const int key)
static void IncreaseVerbosity(void)
static int get_neighboring_sequences(const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs::iterator &first_user_in_range, TSimpleSeqs::iterator &first_user_non_in_range, TSimpleSeqs &seqs, const int max_distance)
TSimpleSeqs m_simple_seqs
int find_overlap(TSimpleSeqs::iterator &seq, const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs &seqs, int &overlap)
static void addSimpleTab(CNcbiStrstream &buffer, const string tag, const TSimpleSeqs::iterator &ext_rna, const int max_distance)
int simple_overlaps(void)
static void DecreaseVerbosity(void)
int overlaps(const TSimpleSeqs::iterator &seq1, const TSimpleSeqs::iterator &seq2, int &overlap)
TSimpleSeqs m_extRNAtable2
bool CheckMissingRibosomalRNA(const CBioseq::TAnnot &annots)
void ugly_simple_overlaps_call(int &n_user_neighbors, int &n_ext_neighbors, TSimpleSeqs::iterator &ext_rna, TSimpleSeqs::iterator &first_user_in_range, TSimpleSeqs::iterator &first_user_non_in_range, TSimpleSeqs &seqs, int max_distance, TSimpleSeqs::iterator &first_ext_in_range, TSimpleSeqs::iterator &first_ext_non_in_range, string &bufferstr)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
ENa_strand
strand of nucleic acid
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
unsigned int
A callback function used to compare two keys in a database.
static int get_max_distance(const int range_scale)
const struct ncbi::grid::netcache::search::fields::KEY key
static pcre_uint8 * buffer
vector< TSimplePair > TSimplePairs
list< TSimpleSeq > TSimpleSeqs
string GetRRNAtype(const CRNA_ref &rna)
string printed_range(const TSeqPos from2, const TSeqPos to2)