71 const string& strGtfType,
72 const string& strRawAttributes )
78 for (
size_t u=0; u <
attributes.size(); ++u ) {
83 if (strGtfType ==
"gene") {
88 if (strGtfType ==
"transcript") {
103 if (
key.empty() &&
value.empty() ) {
122 const string& strAnnotName,
123 const string& strAnnotTitle,
127 CGff2Reader( uFlags, strAnnotName, strAnnotTitle, resolver, pRL)
166 for (
const auto& lineData: readerData) {
167 const auto& line = lineData.mData;
186 return (recType ==
"exon" || recType ==
"5utr" || recType ==
"3utr");
191 return (recType ==
"cds" || recType ==
"start_codon" || recType ==
"stop_codon");
220 if (recType ==
"gene") {
223 if (recType ==
"mrna" || recType ==
"transcript") {
233 const auto& xAttributes = x.
Get();
234 const auto& yAttributes = y.
Get();
236 auto xit = xAttributes.
begin();
237 auto yit = yAttributes.begin();
238 while (xit != xAttributes.end() && yit != yAttributes.end()) {
239 if (xit->first < yit->first) {
241 }
else if (yit->first < xit->first) {
247 result.AddValue(xit->first,
"");
252 set_intersection(begin(xVals), end(xVals),
253 begin(yVals), end(yVals),
254 inserter(commonVals, commonVals.
begin()));
255 if (!commonVals.
empty()) {
256 for (
const auto&
val : commonVals) {
278 auto featId =
mpLocations->GetFeatureIdFor(gff,
"cds");
287 const string& qualName,
293 if (!values.
empty()) {
302 const string& parentType,
312 auto parentFeatId =
mpLocations->GetFeatureIdFor(record, parentType);
315 if (parentType ==
"gene") {
334 if (
auto childIt = parentIt->second.find(recType);
335 childIt != parentIt->second.end()) {
337 auto& childAttributes = childIt->second;
342 childAttributes = accumulatedAttributes;
346 if (parentType ==
"gene") {
381 const string& prefix,
385 static int seqNum(1);
387 string strFeatureId = prefix;
388 if (strFeatureId.empty()) {
393 feature.
SetId().SetLocal().SetStr(strFeatureId);
403 auto featId =
mpLocations->GetFeatureIdFor(gff,
"gene");
422 (gff.
Type() ==
"gene") ?
438 "locus_tag",
"transcript_id",
"gene"
483 if (ignoredAttrs.
find(name) != ignoredAttrs.
end()) {
517 auto featId =
mpLocations->GetFeatureIdFor(gff,
"cds");
530 if (!transcriptId.empty()) {
531 if (
auto geneId = gff.
GeneKey(); !geneId.empty()) {
533 if (it->second != geneId) {
534 string msg =
"Gene id '" + geneId +
"' for transcript '" + transcriptId +
535 "' conflicts with previously-assigned '" + it->second +
"'";
557 auto featId =
mpLocations->GetFeatureIdFor(gff,
"transcript");
586 const string& featId)
593 return featIt->second;
605 string geneSynonym =
attributes.ValueOf(
"gene_synonym");
606 if (!geneSynonym.empty()) {
607 gene.
SetSyn().push_back(geneSynonym);
609 string locusTag =
attributes.ValueOf(
"locus_tag");
610 if (!locusTag.empty()) {
615 if (!locus.empty()) {
633 if (!product.empty()) {
634 rna.SetExt().SetName(product);
670 string proteinId =
attributes.ValueOf(
"protein_id");
671 if (!proteinId.empty()) {
677 string ribosomalSlippage =
attributes.ValueOf(
"ribosomal_slippage");
678 if (!ribosomalSlippage.empty()) {
682 string transTable =
attributes.ValueOf(
"transl_table");
683 if (!transTable.empty()) {
686 cdr.
SetCode().Set().push_back(pGc);
711 auto& quals = feature.
SetQual();
712 for (
auto it = quals.begin(); it != quals.end(); ) {
713 const string& qualKey = (*it)->GetQual();
716 qualKey ==
"locus_tag" ||
717 qualKey ==
"old_locus_tag" ||
718 qualKey ==
"product" ||
719 qualKey ==
"protein_id") {
724 const string& qualVal = (*it)->GetVal();
727 it = quals.erase(it);
747 auto& quals = feature.
SetQual();
748 for (
auto it = quals.begin(); it != quals.end(); ) {
749 const string& qualKey = (*it)->GetQual();
752 qualKey ==
"locus_tag" ||
753 qualKey ==
"old_locus_tag" ||
754 qualKey ==
"product" ||
755 qualKey ==
"protein_id") {
760 const string& qualVal = (*it)->GetVal();
761 if (!prevAttributes.
HasValue(qualKey, qualVal)) {
768 it = quals.erase(it);
800 for (
auto value: values) {
801 vector< string > tags;
803 for (
auto it = tags.begin(); it != tags.end(); ++it ) {
831 for (
const auto& pQual : feature.
GetQual()) {
832 if (pQual->GetQual() ==
key) {
833 existingVals.
insert(pQual->GetVal());
837 for (
auto value: values) {
862 for (
auto itLocation:
mpLocations->LocationMap()) {
863 auto id = itLocation.first;
871 featSubType, itLocation.second);
876 for (
auto itLocation:
mpLocations->LocationMap()) {
877 auto id = itLocation.first;
884 switch(featSubType) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
virtual bool xParseStructuredComment(const string &)
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
static CRef< CDbtag > x_ParseDbtag(const string &)
unsigned int mCurrentFeatureCount
static string xNormalizedAttributeValue(const CTempString &)
bool xSplitGffAttributes(const string &, vector< string > &) const
static string xNormalizedAttributeKey(const CTempString &)
const string & Type() const
const string & NormalizedType() const
void AddValue(const string &key, const string &value)
string ValueOf(const string &key) const
void GetValues(const string &key, MultiValue &values) const
const MultiAttributes & Get() const
bool HasValue(const string &key, const string &value="") const
string TranscriptId() const
CGtfAttributes mAttributes
bool xAssignAttributesFromGff(const string &, const string &)
const CGtfAttributes & GtfAttributes() const
bool xFeatureSetDataGene(const CGtfReadRecord &, CSeq_feat &)
void xFeatureAddQualifiers(const string &key, const CGtfAttributes::MultiValue &, CSeq_feat &)
bool xFeatureSetQualifiers(const CGtfReadRecord &record, const set< string > &ignoredAttrs, CSeq_feat &)
bool xFeatureSetQualifiersCds(const CGtfReadRecord &record, CSeq_feat &)
bool xCreateParentCds(const CGtfReadRecord &, CSeq_annot &)
void xCheckForGeneIdConflict(const CGtfReadRecord &record)
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=nullptr) override
bool xCreateParentGene(const CGtfReadRecord &, CSeq_annot &)
unique_ptr< CGtfLocationMerger > mpLocations
bool xFeatureSetQualifiersGene(const CGtfReadRecord &record, CSeq_feat &)
bool xCreateParentMrna(const CGtfReadRecord &, CSeq_annot &)
virtual bool xUpdateAnnotCds(const CGtfReadRecord &, CSeq_annot &)
void xPostProcessAnnot(CSeq_annot &) override
bool xFeatureSetDataMrna(const CGtfReadRecord &, CSeq_feat &)
CRef< CSeq_feat > xFindFeatById(const string &)
bool xFeatureSetQualifiersRna(const CGtfReadRecord &record, CSeq_feat &)
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
virtual bool xUpdateAnnotTranscript(const CGtfReadRecord &, CSeq_annot &)
CGtfReader(unsigned int=0, const string &="", const string &="", SeqIdResolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
bool xFeatureTrimQualifiers(const CGtfReadRecord &, CSeq_feat &)
map< string, string > m_TranscriptToGeneMap
bool xUpdateAnnotParent(const CGtfReadRecord &record, const string &parentType, CSeq_annot &annot)
TParentChildQualMap m_ParentChildQualMap
void xPropagateQualToParent(const CGtfReadRecord &record, const string &qualName, CSeq_feat &parent)
virtual bool xFeatureSetDataRna(const CGtfReadRecord &, CSeq_feat &, CSeqFeatData::ESubtype)
bool xProcessQualifierSpecialCase(const string &, const CGtfAttributes::MultiValue &, CSeq_feat &)
bool xCreateFeatureId(const CGtfReadRecord &, const string &, CSeq_feat &)
void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &) override
void xProcessData(const TReaderData &, CSeq_annot &) override
bool xFeatureSetDataCds(const CGtfReadRecord &, CSeq_feat &)
@RNA_ref.hpp User-defined methods of the data storage class.
Common file reader utility functions.
unique_ptr< CReaderMessageHandler > m_pMessageHandler
SeqIdResolver mSeqIdResolve
unsigned int m_uLineNumber
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
vector< TReaderLine > TReaderData
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
virtual bool xIsTrackTerminator(const CTempString &)
ESubtype GetSubtype(void) const
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Abstract base class for lightweight line-by-line reading.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const struct attribute attributes[]
@ eDiag_Error
Error message.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
TSyn & SetSyn(void)
Assign a value to Syn data member.
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
void SetType(TType value)
Assign a value to Type data member.
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TQual & GetQual(void) const
Get the Qual member data.
void SetCode(TCode &value)
Assign a value to Code data member.
void SetExcept(TExcept value)
Assign a value to Except data member.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsGenbank(void) const
Check if variant Genbank is selected.
static bool s_IsCDSType(const string &recType)
static bool s_IsTranscriptType(const string &recType)
CGtfAttributes g_GetIntersection(const CGtfAttributes &x, const CGtfAttributes &y)
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)