46 #define NCBI_USE_ERRCODE_X Objtools_Rd_Glimmer
76 if (line.empty() || line[0] ==
'#' ||
77 (line.size() >= 2 && line[0] ==
'/' && line[1] ==
'/')) {
81 if (defline.empty()) {
85 string::size_type pos = s.find_first_of(
" ");
86 if (pos != string::npos) {
100 "Failed to find sequence: " + s);
105 ostr <<
"CGlimmerReader::ReadAnnot(): line "
106 << count <<
": failed to identify defline: " << line;
114 if (toks.size() != 5) {
116 ostr <<
"CGlimmerReader::ReadAnnot(): line "
117 << count <<
": invalid number of tokens: "
118 <<
"found " << toks.size() <<
", expected 5: " << line;
127 list<string>::iterator it = toks.begin();
130 string orf_name = *it++;
140 ostr <<
"CGlimmerReader::ReadAnnot(): line "
141 << count <<
": failed to identify start pos: " << line;
161 ostr <<
"CGlimmerReader::ReadAnnot(): line "
162 << count <<
": failed to identify stop pos: " << line;
180 if (frame > 3 || frame < -3) {
190 ostr <<
"CGlimmerReader::ReadAnnot(): line "
191 << count <<
": failed to identify frame: " << line;
210 ostr <<
"CGlimmerReader::ReadAnnot(): line "
211 << count <<
": failed to identify score: " << line;
235 ival->
SetTo (seq_length - 1);
236 cds_feat->
SetLocation().SetPacked_int().Set().push_back(ival);
240 ival->
SetTo (stop_pos);
241 cds_feat->
SetLocation().SetPacked_int().Set().push_back(ival);
249 ival->
SetTo (start_pos);
250 cds_feat->
SetLocation().SetPacked_int().Set().push_back(ival);
254 ival->
SetTo (seq_length - 1);
255 cds_feat->
SetLocation().SetPacked_int().Set().push_back(ival);
258 cds_feat->
SetLocation().SetInt().SetFrom(
min(start_pos, stop_pos));
259 cds_feat->
SetLocation().SetInt().SetTo (
max(start_pos, stop_pos));
265 if (genetic_code_idx) {
267 d->SetId(genetic_code_idx);
268 cdr.
SetCode().Set().push_back(d);
272 gene_feat->
SetData().SetGene().SetLocus(orf_name);
275 annot->
SetData().SetFtable().push_back(gene_feat);
276 annot->
SetData().SetFtable().push_back(cds_feat);
279 LOG_POST_X(7,
Info <<
"CGlimmerReader::Read(): parsed " << count <<
" lines, " << errs <<
" errors");
281 string prefix(
"lcl|prot");
303 ostr <<
prefix << setw(7) << setfill(
'0') << ++count;
307 bioseq.
SetId().push_back(
id);
User-defined methods of the data storage class.
CRef< objects::CSeq_entry > Read(CNcbiIstream &istr, objects::CScope &scope, int genetic_code_idx=11)
read in and create a seq-annot for the glimmer input we also optionally create proteins for the CDSs ...
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
ESubtype GetSubtype(void) const
namespace ncbi::objects::
unsigned int TSeqPos
Type for sequence locations and lengths.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST_X(err_subcode, message)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
CConstRef< CSeq_id > GetSeqId(void) const
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
void SetCode(TCode &value)
Assign a value to Code data member.
const TLocation & GetLocation(void) const
Get the Location member data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
void SetFrom(TFrom value)
Assign a value to From data member.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
list< CRef< CSeq_id > > TId
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetRepr(TRepr value)
Assign a value to Repr data member.
list< CRef< CSeq_feat > > TFtable
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
Useful/utility classes and methods.
static const char * prefix[]