41 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
51 if( !(*f)->GetData().IsGene() )
continue;
52 string qname; (*f)->GetData().GetGene().GetLabel(&qname);
53 if( diag.
find(qname) == diag.
end() )
continue;
69 if( !(problem->type &
type) )
continue;
71 if(!problem->message.size())
continue;
82 for(list<problemStr>::iterator problem = diag[qname].problems.
begin(); problem!=diag[qname].problems.
end();)
85 if( !(problem->type &
type) )
91 problem=diag[qname].problems.
erase(problem);
115 qnames[0]=qname1; qnames[1]=qname2;
116 for(
int i=0;
i<2;
i++)
118 string& qname = qnames[
i];
119 if( diag.
find(qname) != diag.
end() )
131 string qname = problem->first;
132 if(
done.find(qname)!=
done.end())
continue;
155 ITERATE(list<problemStr>, problem, diag[qname].problems)
161 if (problem->type &
type)
180 strres <<
"unknown_problem_type=" <<
type <<
'\0';
181 string result=strres.str();
183 result =
"Potential overlap found";
185 result =
"Potential RNA overlap found";
187 result =
"Complete overlap found";
189 result =
"overlap marked for removal";
191 result =
"Something relevant to frame shift found";
193 result =
"Potential frame shift evidence found";
195 result =
"Evidence absolving from the frame shift accusation found";
197 result =
"Potential partial protein annotation found";
199 result =
"Short annotation found";
201 result =
"tRNA is missing in the list of independently annotated tRNAs";
203 result =
"RNA is missing in the list of annotated RNAs in the input";
205 result =
"RNA is present at the wrong strand";
207 result =
"RNA is present with undefined strand";
209 result =
"tRNA is a complete mismatch";
211 result =
"tRNA has mismatched ends";
243 out << name.c_str() ;
261 if (!feat->GetData().IsRna() && !feat->GetData().IsGene())
continue;
262 const CSeq_loc& loc = feat->GetLocation();
269 if(problem_locs.
find(
range)==problem_locs.
end())
continue;
271 problem_locs[
range].count++;
272 if(feat->GetData().IsRna()) problem_locs[
range].rnacount++;
273 if(feat->GetData().IsGene()) problem_locs[
range].genecount++;
278 if (!feat->GetData().IsRna() && !feat->GetData().IsGene())
continue;
280 CSeq_loc& loc = feat->SetLocation();
285 if(problem_locs.
find(
range)==problem_locs.
end())
continue;
287 if( problem_locs[
range].count!=2
288 || problem_locs[
range].rnacount!=1
289 || problem_locs[
range].genecount!=1
294 NcbiCerr <<
"CReadBlastApp::FixStrands: "
296 <<
"location found, but the number of features with that location is confusing, "
298 <<
"[" << problem_locs[
range].name <<
"]"
299 <<
"(" <<
range <<
")"
307 inter->SetStrand(problem_locs[
range].strand);
308 NcbiCerr <<
"CReadBlastApp::FixStrands: "
309 <<
"[" << problem_locs[
range].name <<
"] "
328 for(CSeq_submit::C_Data::TEntrys::iterator entry =
m_Submit.
SetData().SetEntrys().begin();
335 <<
"RemoveProblems(void): doing entry: removeme = "
340 NcbiCerr <<
"RemoveProblems(): WARNING: "
341 <<
"CSeq_entry deleted, loss of annotation might occur"
357 <<
"RemoveProblems(void): case is single entry "
376 <<
"RemoveProblems(CSeq_entry)(seq case): removeme = "
390 <<
"RemoveProblems(CSeq_entry)(set case): removeme = "
392 <<
", entries.size = "
405 if(!entry.
IsSet())
return;
414 for(CSeq_descr::Tdata::iterator desc = descs.begin(); desc!=descs.end(); )
416 seq.
SetDescr().Set().push_back(*desc);
417 desc=descs.erase(desc);
423 NcbiCerr <<
"NormalizeSeqentry(CSeq_entry...): "
425 <<
"converted sequence set to sequence"
439 if(all_entries_removed > 0) {; noseqs=
true;}
444 if(all_annot_removed > 0) {setseq.
ResetAnnot(); noannot=
true;}
446 if(noseqs ) removeme = 1;
449 <<
"RemoveProblems(CBioseq_set): noseqs = "
453 <<
", removeme (return) = "
477 string origName = thisName;
478 string::size_type ipos = thisName.rfind(
'|');
if(ipos!=string::npos) thisName.erase(0, ipos+1);
479 ipos = thisName.rfind(
'_');
if(ipos!=string::npos) ipos= thisName.rfind(
'_', ipos-1);
482 <<
"RemoveProblems(CBioseq): remove? sequence "
483 <<
"[" << origName <<
"]"
485 <<
"[" << thisName <<
"]"
488 if(problem_seqs.
find(thisName) != problem_seqs.
end())
491 <<
"RemoveProblems(CBioseq): sequence "
492 <<
"[" << origName <<
"]"
493 <<
" is marked for removal, because of a match to "
494 <<
"[" << thisName <<
"]"
501 <<
"RemoveProblems(CBioseq): remove = "
514 for(CBioseq_set::TSeq_set::iterator entries_end =
entries.end(), entry=
entries.begin(); entry != entries_end; )
519 <<
"RemoveProblems(CBioseq_set::TSeq_set): removeseq = "
523 if(removeseq) entry=
entries.erase(entry);
529 <<
"RemoveProblems(CBioseq_set::TSeq_set): nentries = "
541 for(CBioseq::TAnnot::iterator annot=annots.begin(); annot!=annots.end(); )
544 if( (*annot)->GetData().IsFtable()) removeme=
RemoveProblems((*annot)->SetData().SetFtable(), problem_seqs, loc_map);
547 NcbiCerr <<
"RemoveProblems(annots, problem_seqs): "
549 <<
"annotation has empty feature table and it will be removed"
551 annot=annots.erase(annot);
555 if(annots.size()==0)
remove=1;
567 for(CSeq_annot::C_Data::TFtable::iterator feat_end =
table.end(), feat =
table.begin(); feat != feat_end;)
571 gene = (*feat)->GetData().IsGene();
572 cdregion = (*feat)->GetData().IsCdregion();
573 bool del_feature=
false;
579 if(
PrintDetails())
NcbiCerr <<
"RemoveProblems(CSeq_annot::C_Data::TFtable): feat: (" << real_loc_string <<
")(" << loc_string <<
")" <<
NcbiEndl;
583 if(problem_seqs.
find(loc_string) != problem_seqs.
end())
585 if((*feat)->GetData().IsImp() &&
586 (*feat)->GetData().GetImp().CanGetKey())
588 NcbiCerr <<
"RemoveProblems: INFO: feature " << loc_string <<
": imp, key = " << (*feat)->GetData().GetImp().GetKey() <<
NcbiEndl;
590 if((*feat)->GetData().IsImp() &&
591 (*feat)->CanGetComment() )
593 NcbiCerr <<
"RemoveProblems: INFO: feature " << loc_string <<
": imp, comment = " << (*feat)->GetComment() <<
NcbiEndl;
603 if((*feat)->GetData().IsImp() &&
604 (*feat)->GetData().GetImp().CanGetKey() &&
605 (*feat)->GetData().GetImp().GetKey() ==
"misc_feature"
606 ) del_feature =
false;
607 else del_feature=
true;
612 NcbiCerr <<
"RemoveProblems: feature " << loc_string <<
": ";
613 if(del_feature)
NcbiCerr <<
"WILL BE REMOVED";
614 else NcbiCerr <<
"stays until further analysis for it";
619 NcbiCerr <<
"RemoveProblems: WARNING: feature "
620 <<
"{" << (*feat)->GetData().SelectionName((*feat)->GetData().Which()) <<
"} "
621 << loc_string <<
": ";
622 NcbiCerr <<
"will be removed because of a problem: ";
626 if(!del_feature && gene && (*feat)->GetData().GetGene().CanGetLocus_tag() )
631 string locus_tag = (*feat)->GetData().GetGene().GetLocus_tag();
632 if(problem_seqs.
find(locus_tag) != problem_seqs.
end()) del_feature=
true;
635 NcbiCerr <<
"RemoveProblems: gene " << locus_tag <<
": ";
644 NcbiCerr <<
"RemoveProblems: WARNING: gene " << locus_tag <<
": ";
645 NcbiCerr <<
"will be removed because of a problem: ";
650 if(!del_feature && cdregion && (*feat)->CanGetProduct() )
656 if( (*feat)->CanGetProduct() &&
657 (*feat)->GetProduct().IsWhole() &&
658 (*feat)->GetProduct().GetWhole().IsGeneral() &&
659 (*feat)->GetProduct().GetWhole().GetGeneral().CanGetTag() &&
660 (*feat)->GetProduct().GetWhole().GetGeneral().GetTag().IsStr() )
662 productName = (*feat)->GetProduct().GetWhole().GetGeneral().GetTag().GetStr();
665 (*feat)->CanGetProduct() &&
666 (*feat)->GetProduct().IsWhole())
668 productName = (*feat)->GetProduct().GetWhole().AsFastaString();
671 string::size_type ipos=productName.rfind(
'_', productName.size());
672 if(ipos != string::npos)
674 string::size_type ipos2;
675 ipos2=productName.rfind(
'_', ipos-1);
676 if(ipos2 != string::npos) productName.erase(0, ipos2+1);
680 ipos2=productName.rfind(
'|', ipos-1);
681 if(ipos2 != string::npos) productName.erase(0, ipos2+1);
685 if(productName.length() && problem_seqs.
find(productName) != problem_seqs.
end()) del_feature=
true;
688 NcbiCerr <<
"RemoveProblems: cdregion " << productName <<
": ";
698 if(problem_seqs.
find(real_loc_string) == problem_seqs.
end())
700 problem_seqs[real_loc_string]=problem_seqs[loc_string];
703 if(del_feature) feat=
table.erase(feat);
706 if(
table.size()==0) removeme=1;
722 if(seq->IsSetAnnot() && seq->IsAa()) nremoved+=
RemoveInterim(seq->SetAnnot());
723 if(seq->IsSetAnnot() && seq->IsNa()) nremoved+=
RemoveInterim2(seq->SetAnnot());
736 for(CBioseq::TAnnot::iterator annot=annots.begin(), annot_end = annots.end(); annot != annot_end; )
739 if((*annot)->GetData().IsAlign())
741 nremoved++; erased =
true;
743 if ( (*annot)->GetData().IsFtable())
747 for(CSeq_annot::C_Data::TFtable::iterator feat=
table.begin(), feat_end=
table.end(); feat != feat_end; )
749 string test =
"Genomic Location:";
750 if ((*feat)->IsSetData() && (*feat)->GetData().IsProt() &&
751 (*feat)->IsSetComment() && (*feat)->GetComment().substr(0,
test.size()) ==
test )
753 table.erase(feat++); dremoved++;
775 <<
", left=" << (*annot)->GetData().GetFtable().size()
777 if((*annot)->SetData().SetFtable().size() == 0)
783 if(erased) annot=annots.erase(annot);
800 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
804 for(CSeq_annot::C_Data::TFtable::iterator feat_end =
table.end(), feat =
table.begin(); feat != feat_end;)
813 if(feat_defined.
find(buff.str()) != feat_defined.
end())
816 feat=
table.erase(feat);
821 feat_defined[buff.str()]=
true;
844 int addProblems(list<problemStr>& dest,
const list<problemStr>& src)
847 ITERATE(list<problemStr>, src_p, src)
849 dest.push_back(*src_p);
859 ITERATE(list<problemStr>, problem, feat->second.problems)
862 string name = feat->first;
863 string::size_type ipos = name.rfind(
'|');
if(ipos!=string::npos) name.erase(0, ipos+1);
864 ipos = name.rfind(
'_');
if(ipos!=string::npos) ipos= name.rfind(
'_', ipos-1);
865 if(ipos!=string::npos) name.erase(0, ipos+1);
869 && !problem->misc_feat_message.empty()
872 problem_locs[
range].strand = problem->strand;
873 problem_locs[
range].name = name;
874 problem_locs[
range].count =
875 problem_locs[
range].rnacount =
876 problem_locs[
range].genecount = 0;
880 NcbiCerr <<
"CReadBlastApp::CollectRNAFeatures: " << feat->first
881 <<
"[" <<
range <<
"]: "
882 <<
"(" << name <<
")"
883 << (added ?
"added" :
"skipped") <<
NcbiEndl;
886 return problem_locs.
size();
893 bool keep_frameshifted = args[
"kfs"].HasValue();
896 ITERATE(list<problemStr>, problem, feat->second.problems)
899 string name = feat->first;
900 string::size_type ipos = name.rfind(
'|');
if(ipos!=string::npos) name.erase(0, ipos+1);
901 ipos = name.rfind(
'_');
if(ipos!=string::npos) ipos= name.rfind(
'_', ipos-1);
902 if(ipos!=string::npos) name.erase(0, ipos+1);
904 (problem->type ==
eFrameShift && !keep_frameshifted)
910 { problem_names[name]=
ProblemType(problem->type); added=
true; }
912 NcbiCerr <<
"CollectFrameshiftedSeqs: " << feat->first
914 <<
"(" << name <<
")"
915 << (added ?
"added" :
"skipped") <<
NcbiEndl;
918 return problem_names.
size();
926 NcbiCerr <<
"append_misc_feature: FATAL: do not have problems for " << name <<
NcbiEndl;
934 list<CRef<CSeq_id> >& na_id = (*na)->SetSeq().SetId();
937 if ( !(*gen_feature)->GetData().IsFtable() )
continue;
940 Tproblem_misced problem_misced;
943 if ( !(problem->type & problem_type) )
continue;
956 strand = problem->strand;
957 message = problem->misc_feat_message;
958 if(message.size()==0)
continue;
959 if(problem_misced.find(problem->type) != problem_misced.end() &&
960 problem_misced[problem->type].find(message) != problem_misced[problem->type].end()
962 else problem_misced[problem->type][message] =
true;
964 while((pos=message.find_first_of(
"\n\r"))!=string::npos)
971 feat->
SetData().SetImp().SetKey(
"misc_feature");
974 feat->
SetLocation().SetInt().SetId(**na_id.begin());
976 (*gen_feature)->SetData().SetFtable().push_back(feat);
int CollectFrameshiftedSeqs(map< string, string > &problem_names)
int CollectRNAFeatures(TProblem_locs &problem_locs)
static bool hasProblems(const CBioseq &seq, diagMap &diag, const EProblem type)
static bool PrintDetails(int current_verbosity=m_current_verbosity)
CConstBeginInfo ConstBegin(void)
void erase_problems(const string &qname, diagMap &diag, const EProblem type)
void GetLocMap(LocMap &loc_map, const CSeq_annot::C_Data::TFtable &feats)
int RemoveInterim2(CBioseq::TAnnot &annots)
static void IncreaseVerbosity(void)
void reportProblemMessage(const string &message, ostream &out=NcbiCout)
void reportProblemType(const EProblem type, ostream &out=NcbiCout)
static string ProblemType(const EProblem type)
void reportProblemSequenceName(const string &name, ostream &out=NcbiCout)
int RemoveProblems(map< string, string > &problem_seqs, LocMap &loc_map)
static bool is_prot_entry(const CBioseq &seq)
static void DecreaseVerbosity(void)
static void PopVerbosity(void)
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
void reportProblems(const bool report_and_forget, diagMap &diag, ostream &out, const CBioseq::TAnnot &annots, const EProblem type)
void NormalizeSeqentry(CSeq_entry &entry)
void append_misc_feature(CBioseq_set::TSeq_set &seqs, const string &name, EProblem problem_type)
static void PushVerbosity(void)
namespace ncbi::objects::
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define MSerial_AsnText
I/O stream manipulators –.
static string GetStringDescr(const CBioseq &bioseq, EStringFormat fmt)
@ eOverlap
CSeq_locs overlap.
NCBI_NS_STD::string::size_type SIZE_TYPE
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetData(TData &value)
Assign a value to Data data member.
ENa_strand
strand of nucleic acid
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
list< CRef< CSeqdesc > > Tdata
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
string GetLocationString(const CSeq_feat &f)
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
double f(double x_, const double &y_)
int addProblems(list< problemStr > &dest, const list< problemStr > &src)
string diagName(const string &type, const string &value)
string GetStringDescr(const CBioseq &bioseq)
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
string printed_range(const TSeqPos from2, const TSeqPos to2)
int test(int srctype, const void *srcdata, int srclen, int dsttype, int dstlen)
static wxAcceleratorEntry entries[3]