57 }
else if (name ==
"asnb") {
59 }
else if (name ==
"xml") {
63 THROW1_TRACE(runtime_error,
"Bad serial format name " + name);
74 string name = (*score)->GetId().GetStr();
75 if(name!=
"use_this_gi")
continue;
76 result.push_back((*score)->GetValue().GetInt());
88 ITERATE(vector<long>, gi1, left)
91 ITERATE(vector<long>, gi2, right)
117 string fn =
GetArgs()[
"in"].AsString();
118 string::size_type ipos=fn.rfind(
".");
119 if(ipos!=string::npos) fn.erase(ipos);
122 string fncdd = fn +
"_cdd_html.fsa";
128 unique_ptr<CNcbiOfstream> out2 (
new CNcbiOfstream (fn.c_str()));
131 unique_ptr<CNcbiOfstream> out2_cdd (
new CNcbiOfstream (fncdd.c_str()));
169 <<
"[" << range1 <<
"] " << report->
left_frame <<
"\t\t"
190 out <<
"General info extracted from the header of the submission:" <<
NcbiEndl;
200 out <<
"unspecified";
209 out <<
"unspecified";
216 out <<
"unspecified";
223 out <<
"unspecified";
252 <<
"\t" << report->
space
258 <<
"\t" <<
"..." <<
"\t" << report->
s_left_right <<
"\t" <<
"..."
261 <<
"s\t" <<
"..." <<
"\t" << report->
s_right_left <<
"\t" <<
"..."
269 out <<
"Potential deletetion of a nucleotide sequence equivalent to " << report->
diff_edge_left <<
" occurred." <<
NcbiEndl;
274 out <<
"Potential insertion of a nucleotide sequence equivalent to " << -report->
diff_edge_left <<
" occurred." <<
NcbiEndl;
280 out <<
"Potential sequencing error or replacement mutation without insertion or deletion of a nucleotide sequence occurred." <<
NcbiEndl;
298 if( !(*f)->GetData().IsRna() )
continue;
303 <<
"] feature location" <<
NcbiEndl;
305 if(aname == loc_map.
end())
308 NcbiCerr <<
"CReadBlastApp::GetRNAfeats(): ERROR: cannot find gene for location "
313 rna_feats.push_back(aname->second);
334 if(aname == loc_map.
end())
342 const CGene_ref& gene=aname->second->GetData().GetGene();
362 const CSeq_id& nu_id = inter->GetId();
366 if(!(*seq_nu)->IsSeq())
continue;
367 if(!(*seq_nu)->GetSeq().IsNa())
continue;
368 const CBioseq::TId& nu_ids = (*seq_nu)->GetSeq().GetId();
371 if((*cnu_id)->Compare(nu_id) ==
CSeq_id::e_YES)
return (*seq_nu)->GetSeq();
374 NcbiCerr <<
"CReadBlastApp::get_nucleotide_seq: INTERNAL FATAL ERROR: could not find nucleotide sequence" <<
NcbiEndl;
385 NcbiCerr <<
"get_parent_seqset: WARNING: " << name <<
": no parent\n";
394 NcbiCerr <<
"get_parent_seqset: WARNING: " << name <<
": no set ancestor\n";
402 NcbiCerr <<
"get_parent_seqset: WARNING: " << name <<
": (grand)parent set does not have Seq_set\n";
413 case 'A':
return "Ala";
break;
414 case 'Q':
return "Gln";
break;
415 case 'W':
return "Trp";
break;
416 case 'E':
return "Glu";
break;
417 case 'R':
return "Arg";
break;
418 case 'T':
return "Thr";
break;
419 case 'Y':
return "Tyr";
break;
420 case 'U':
return "Sec";
break;
421 case 'I':
return "Ile";
break;
422 case 'J':
return "Xeu";
break;
423 case 'P':
return "Pro";
break;
424 case 'S':
return "Ser";
break;
425 case 'D':
return "Asp";
break;
426 case 'F':
return "Phe";
break;
427 case 'G':
return "Gly";
break;
428 case 'H':
return "His";
break;
429 case 'K':
return "Lys";
break;
430 case 'L':
return "Leu";
break;
431 case 'Z':
return "Glx";
break;
432 case 'X':
return "Ukn";
break;
433 case 'C':
return "Cys";
break;
434 case 'V':
return "Val";
break;
435 case 'B':
return "Asx";
break;
436 case 'N':
return "Asn";
break;
437 case 'M':
return "Met";
break;
438 case 'O':
return "Pyl";
break;
441 NcbiCerr <<
"let1_2_let3: ERROR: char " << let1 <<
"(" << (
int)let1 <<
") is not handled" <<
NcbiEndl;
450 if(!(*annot)->GetData().IsAlign())
continue;
453 return perfect.size();
458 CBioseq::TAnnot::const_iterator& annot,
469 int qFrom, qTo, sFrom, sTo;
470 getBounds(annot, &qFrom, &qTo, &sFrom, &sTo);
474 int qtails = qLen - qTo + qFrom - 1;
475 int stails = sLen - sTo + sFrom - 1;
478 (double)qtails/qLen <
thr &&
479 (
double)stails/sLen <
thr &&
480 s_name.find(
"hypothetical") == string::npos;
562 const pair<int,int>& second)
564 if(
first.first != second.first)
return first.first < second.first;
565 return first.second < second.second;
581 if(
first->GetSeq().IsAa())
582 NcbiCerr <<
"less_seq first does not have genomic location or is nucleotide seq" <<
NcbiEndl;
588 NcbiCerr <<
"less_seq second does not have genomic location or is nucleotide seq" <<
NcbiEndl;
594 TSeqPos from1, to1, from2, to2;
599 getFromTo(left_genomic_int, from1, to1, strand1);
600 getFromTo(right_genomic_int, from2, to2, strand2);
607 return from1 < from2;
613 <<
": " << seqs.size()
618 if ((*seq)->IsSet())
SortSeqs((*seq)->SetSet().SetSeq_set());
640 if(
result.find(
"set") != string::npos)
698 for(nums=0 ;seq.
IsValid(); ++seq, nums++)
701 if(!inst.
IsAa())
continue;
710 for(nums=0 ;seq; ++seq, nums++)
713 if(!inst.
IsAa())
continue;
724 while( seq != seqs.end() &&
738 return (seq != seqs.end()) ;
748 while( seq != seqs.end() &&
761 return (seq != seqs.end()) ;
767 CBioseq_set::TSeq_set::const_iterator& seq,
771 while( seq != seqs.end() &&
785 return (seq != seqs.end()) ;
789 CBioseq_set::TSeq_set::iterator& seq,
793 while( seq != seqs.end() &&
807 return (seq != seqs.end()) ;
813 const unsigned int max_acc_len=0xF;
814 ifstream
in(
file.c_str());
815 if(!
in.is_open())
return false;
816 list<long> scratch_acc;
819 char buffer[max_acc_len+2];
828 string::size_type ipos=string::npos;
829 while((ipos=
test.find_first_of(
"\t\n\r ")) != string::npos)
832 if(
test.size()>max_acc_len)
return false;
834 if(
test.find_first_not_of(
"0123456789") != string::npos)
return false;
836 scratch_acc.push_back(atol(
test.c_str()));
838 if(!scratch_acc.size())
return false;
839 input_acc = scratch_acc;
847 int main(
int argc,
const char* argv[])
Class holding information about root of non-modifiable object hierarchy Do not use it directly.
CSeq_entry * GetParentEntry(void) const
Class holding information about root of non-modifiable object hierarchy Do not use it directly.
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
void printGeneralInfo(ostream &out=NcbiCerr)
static bool hasProblems(const CBioseq &seq, diagMap &diag, const EProblem type)
static int m_verbosity_threshold
static bool PrintDetails(int current_verbosity=m_current_verbosity)
static string GetProtName(const CBioseq &seq)
static bool less_pair(const pair< int, int > &first, const pair< int, int > &second)
static bool giMatch(const vector< long > &left, const vector< long > &right)
static int getQueryLen(const CBioseq &seq)
static vector< long > getGIs(CBioseq::TAnnot::const_iterator &annot)
CConstBeginInfo ConstBegin(void)
static stack< int > m_saved_verbosity
static bool has_blast_hits(const CBioseq &seq)
static void IncreaseVerbosity(void)
static char * next_w(char *w)
static int collectPerfectHits(vector< perfectHitStr > &perfect, const CBioseq &seq)
static void printOverlapReport(distanceReportStr *report, ostream &out=NcbiCout)
static string getAnnotName(CBioseq::TAnnot::const_iterator &annot)
static double m_trnascan_scoreThreshold
static bool hasGenomicLocation(const CBioseq &seq)
static int m_current_verbosity
bool ReadPreviousAcc(const string &file, list< long > &input_acc)
static int skip_toprot(CTypeIterator< CBioseq > &seq)
static void dumpAlignment(const string &alignment, const string &file)
static int m_cds_overlapThreshold
static double m_entireThreshold
static int m_rna_overlapThreshold
void GetRNAfeats(const LocMap &loc_map, CSeq_annot::C_Data::TFtable &rna_feats, const CSeq_annot::C_Data::TFtable &feats)
int SetParents(CSeq_entry *parent, CBioseq_set::TSeq_set &where)
static void check_alignment(CBioseq::TAnnot::const_iterator &annot, const CBioseq &seq, vector< perfectHitStr > &results)
static ECoreDataType getCoreDataType(istream &in)
static bool is_prot_entry(const CBioseq &seq)
const CBioseq & get_nucleotide_seq(const CBioseq &seq)
static char * skip_space(char *w)
static void DecreaseVerbosity(void)
static bool less_simple_seq(const TSimpleSeq &first, const TSimpleSeq &second)
static void getBounds(CBioseq::TAnnot::const_iterator &annot, int *qFrom, int *qTo, int *sFrom, int *sTo)
static void printPerfectHit(const perfectHitStr &hit, ostream &out=NcbiCout)
static int m_shortProteinThreshold
static void PopVerbosity(void)
ECoreDataType m_coreDataType
static const CSeq_loc & getGenomicLocation(const CBioseq &seq)
static void printReport(distanceReportStr *report, ostream &out=NcbiCout)
static int getLenScore(CBioseq::TAnnot::const_iterator &annot)
static bool less_seq(const CRef< CSeq_entry > &first, const CRef< CSeq_entry > &second)
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
static double m_small_tails_threshold
static double m_partThreshold
static double m_eThreshold
static bool skip_to_valid_seq_cand(CBioseq_set::TSeq_set::const_iterator &seq, const CBioseq_set::TSeq_set &seqs)
void dump_fasta_for_pretty_blast(diagMap &diag)
static void PushVerbosity(void)
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
static bool IsAa(EMol mol)
Template class for iteration on objects of class C (non-medifiable version)
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
std::ofstream out("events_result.xml")
main entry point for tests
#define test(a, b, c, d, e)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
#define THROW1_TRACE(exception_class, exception_arg)
Throw trace.
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
@ e_YES
SeqIds compared, but are different.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
vector< CRef< CScore > > TScore
ENa_strand
strand of nucleic acid
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
bool CanGetSeq_set(void) const
Check if it is safe to call GetSeq_set method.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSet(void) const
Check if variant Set is selected.
list< CRef< CSeq_entry > > TSeq_set
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
list< CRef< CSeq_id > > TId
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
const TTool & GetTool(void) const
Get the Tool member data.
const TReldate & GetReldate(void) const
Get the Reldate member data.
const TComment & GetComment(void) const
Get the Comment member data.
bool CanGetTool(void) const
Check if it is safe to call GetTool method.
bool CanGetReldate(void) const
Check if it is safe to call GetReldate method.
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
const TEntrys & GetEntrys(void) const
Get the variant data.
const TUser_tag & GetUser_tag(void) const
Get the User_tag member data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsEntrys(void) const
Check if variant Entrys is selected.
bool CanGetUser_tag(void) const
Check if it is safe to call GetUser_tag method.
unsigned int
A callback function used to compare two keys in a database.
string GetLocationString(const CSeq_feat &f)
std::istream & in(std::istream &in_, double &x_)
string GetStringDescr(const CBioseq &bioseq)
string printed_ranges(const CSeq_loc &seq_interval)
string printed_range(const TSeqPos from2, const TSeqPos to2)
ESerialDataFormat s_GetFormat(const string &name)
string let1_2_let3(char let1)
int main(int argc, const char *argv[])
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
CBioseq_set::TSeq_set * get_parent_seqset(const CBioseq &seq)
CRef< const CSeq_loc > loc1
CRef< const CSeq_loc > loc2
CRef< CTestThread > thr[k_NumThreadsMax]