71 const string& strGtfType,
72 const string& strRawAttributes )
78 for (
size_t u=0; u <
attributes.size(); ++u ) {
83 if (strGtfType ==
"gene") {
88 if (strGtfType ==
"transcript") {
103 if (
key.empty() &&
value.empty() ) {
122 const string& strAnnotName,
123 const string& strAnnotTitle,
127 CGff2Reader( uFlags, strAnnotName, strAnnotTitle, resolver, pRL)
166 for (
const auto& lineData: readerData) {
167 const auto& line = lineData.mData;
196 HANDLERMAP typeHandlers = {
212 HANDLERMAP::iterator it = typeHandlers.find(recType);
213 if (it != typeHandlers.end()) {
214 TYPEHANDLER
handler = it->second;
215 return (this->*handler)(gff, annot);
223 if (recType ==
"gene") {
226 if (recType ==
"mrna" || recType ==
"transcript") {
238 auto featId =
mpLocations->GetFeatureIdFor(gff,
"cds");
253 auto geneFeatId =
mpLocations->GetFeatureIdFor(gff,
"gene");
272 auto transcriptFeatId =
mpLocations->GetFeatureIdFor(gff,
"transcript");
281 mpLocations->AddRecordForId(transcriptFeatId, gff);
287 mpLocations->AddRecordForId(transcriptFeatId, gff);
302 static int seqNum(1);
304 string strFeatureId =
prefix;
305 if (strFeatureId.empty()) {
310 feature.
SetId().SetLocal().SetStr(strFeatureId);
320 auto featId =
mpLocations->GetFeatureIdFor(gff,
"gene");
337 (gff.
Type() ==
"gene") ?
351 list<string> ignoredAttrs = {
352 "locus_tag",
"transcript_id"
359 auto it = attrs.
begin();
360 for (; it != attrs.end(); ++it) {
361 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
362 if (cit != ignoredAttrs.end()) {
382 list<string> ignoredAttrs = {
387 auto it = attrs.
begin();
388 for (; it != attrs.end(); ++it) {
389 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
390 if (cit != ignoredAttrs.end()) {
410 list<string> ignoredAttrs = {
415 auto it = attrs.
begin();
416 for (; it != attrs.end(); ++it) {
417 auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
418 if (cit != ignoredAttrs.end()) {
438 auto featId =
mpLocations->GetFeatureIdFor(gff,
"cds");
464 auto featId =
mpLocations->GetFeatureIdFor(gff,
"transcript");
489 const string& featId)
496 return featIt->second;
508 string geneSynonym =
attributes.ValueOf(
"gene_synonym");
509 if (!geneSynonym.empty()) {
510 gene.
SetSyn().push_back(geneSynonym);
512 string locusTag =
attributes.ValueOf(
"locus_tag");
513 if (!locusTag.empty()) {
531 if (!product.empty()) {
532 rna.SetExt().SetName(product);
568 string proteinId =
attributes.ValueOf(
"protein_id");
569 if (!proteinId.empty()) {
575 string ribosomalSlippage =
attributes.ValueOf(
"ribosomal_slippage");
576 if (!ribosomalSlippage.empty()) {
580 string transTable =
attributes.ValueOf(
"transl_table");
581 if (!transTable.empty()) {
584 cdr.
SetCode().Set().push_back(pGc);
601 TQual& quals = feature.
SetQual();
602 for (TQual::iterator it = quals.begin(); it != quals.end(); ) {
603 const string& qualKey = (*it)->GetQual();
608 if (qualKey ==
"locus_tag") {
612 if (qualKey ==
"old_locus_tag") {
616 if (qualKey ==
"product") {
620 if (qualKey ==
"protein_id") {
624 const string& qualVal = (*it)->GetVal();
627 it = quals.erase(it);
657 for (
auto value: values) {
658 vector< string > tags;
660 for (
auto it = tags.begin(); it != tags.end(); ++it ) {
687 for (
auto value: values) {
710 for (
auto itLocation:
mpLocations->LocationMap()) {
711 auto id = itLocation.first;
719 featSubType, itLocation.second);
724 for (
auto itLocation:
mpLocations->LocationMap()) {
725 auto id = itLocation.first;
732 switch(featSubType) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static const struct attribute attributes[]
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
virtual bool xParseStructuredComment(const string &)
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
static CRef< CDbtag > x_ParseDbtag(const string &)
unsigned int mCurrentFeatureCount
static string xNormalizedAttributeValue(const CTempString &)
bool xSplitGffAttributes(const string &, vector< string > &) const
static string xNormalizedAttributeKey(const CTempString &)
const string & Type() const
const string & NormalizedType() const
void AddValue(const string &key, const string &value)
string ValueOf(const string &key) const
const MultiAttributes & Get() const
vector< string > MultiValue
bool HasValue(const string &key, const string &value="") const
CGtfAttributes mAttributes
bool xAssignAttributesFromGff(const string &, const string &)
const CGtfAttributes & GtfAttributes() const
bool xFeatureSetDataGene(const CGtfReadRecord &, CSeq_feat &)
void xFeatureAddQualifiers(const string &key, const CGtfAttributes::MultiValue &, CSeq_feat &)
bool xFeatureSetQualifiersCds(const CGtfReadRecord &record, CSeq_feat &)
bool xCreateParentCds(const CGtfReadRecord &, CSeq_annot &)
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=nullptr) override
bool xCreateParentGene(const CGtfReadRecord &, CSeq_annot &)
unique_ptr< CGtfLocationMerger > mpLocations
bool xFeatureSetQualifiersGene(const CGtfReadRecord &record, CSeq_feat &)
bool xCreateParentMrna(const CGtfReadRecord &, CSeq_annot &)
virtual bool xUpdateAnnotCds(const CGtfReadRecord &, CSeq_annot &)
void xPostProcessAnnot(CSeq_annot &) override
bool xFeatureSetDataMrna(const CGtfReadRecord &, CSeq_feat &)
CRef< CSeq_feat > xFindFeatById(const string &)
bool xFeatureSetQualifiersRna(const CGtfReadRecord &record, CSeq_feat &)
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
virtual bool xUpdateAnnotTranscript(const CGtfReadRecord &, CSeq_annot &)
CGtfReader(unsigned int=0, const string &="", const string &="", SeqIdResolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
bool xFeatureTrimQualifiers(const CGtfReadRecord &, CSeq_feat &)
virtual bool xFeatureSetDataRna(const CGtfReadRecord &, CSeq_feat &, CSeqFeatData::ESubtype)
bool xProcessQualifierSpecialCase(const string &, const CGtfAttributes::MultiValue &, CSeq_feat &)
bool xCreateFeatureId(const CGtfReadRecord &, const string &, CSeq_feat &)
void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &) override
void xProcessData(const TReaderData &, CSeq_annot &) override
bool xFeatureSetDataCds(const CGtfReadRecord &, CSeq_feat &)
@RNA_ref.hpp User-defined methods of the data storage class.
Common file reader utility functions.
SeqIdResolver mSeqIdResolve
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
vector< TReaderLine > TReaderData
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
virtual bool xIsTrackTerminator(const CTempString &)
ESubtype GetSubtype(void) const
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Abstract base class for lightweight line-by-line reading.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
void(*)(CSeq_entry_Handle seh, IWorkbench *wb, const CSerialObject &obj) handler
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
TSyn & SetSyn(void)
Assign a value to Syn data member.
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
void SetType(TType value)
Assign a value to Type data member.
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
void SetCode(TCode &value)
Assign a value to Code data member.
void SetExcept(TExcept value)
Assign a value to Except data member.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
vector< CRef< CGb_qual > > TQual
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsGenbank(void) const
Check if variant Genbank is selected.
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
static const char * prefix[]