81 if (recordType ==
"cds") {
82 string cdsId = parentId;
106 const string& strRawInput )
120 if (
m_strType ==
"pseudogenic_transcript") {
153 const string& strRawKey )
176 return "Derives_from";
186 return "Ontology_term";
245 for (
const auto& lineData: readerData) {
246 const auto& line = lineData.mData;
279 annot.
SetData().SetAlign().push_back(pAlign);
299 if (!pRecord->AssignFromGff(line)) {
317 if (pRecord->IsMultiParent()) {
322 "This GFF3 reader does not support multiparented features"));
339 const string& strLine)
350 if ( !pRecord->AssignFromGff(strLine) ) {
355 if ( !pRecord->GetAttribute(
"ID",
id) ) {
359 if (alignments.find(
id) == alignments.end()) {
368 alignments[id].push_back(alignment);
393 if (recType ==
"exon" || recType ==
"five_prime_utr" || recType ==
"three_prime_utr") {
396 if (recType ==
"cds") {
399 if (recType ==
"gene") {
409 if (recType ==
"region") {
421 const string& mrnaId,
427 string message =
"Bad data line: ";
428 message += exon.
Type();
429 message +=
" referring to non-existent parent feature.";
436 const CSeq_interval& containingInt = cit->second.GetObject();
440 containedInt.
GetTo() > containingInt.
GetTo()) {
441 string message =
"Bad data line: ";
442 message += exon.
Type();
443 message +=
" extends beyond parent feature.";
500 list<string> parents;
550 if (!parentId.empty()) {
563 const string& parent,
572 const string &grandParentsStr = pParent->
GetNamedQual(
"Parent");
573 list<string> grandParents;
574 NStr::Split(grandParentsStr,
",", grandParents, 0);
575 for (list<string>::const_iterator gpcit = grandParents.begin();
576 gpcit != grandParents.end(); ++gpcit) {
587 pGrandParentXref->SetId(*pGrandParentId);
588 pFeature->
SetXref().push_back(pGrandParentXref);
594 pGrandChildXref->SetId(*pGrandChildId);
595 pGrandParent->
SetXref().push_back(pGrandChildXref);
602 const string& parent,
616 pParentXref->SetId(*pParentId);
617 pChild->
SetXref().push_back(pParentXref);
623 pChildXref->SetId(*pChildId);
624 pParent->
SetXref().push_back(pChildXref);
646 "Bad data line: Duplicate feature ID \"" +
id +
"\".");
654 underConstruction = it->second;
671 string featType = record.
Type();
672 if (featType ==
"stop_codon_read_through" || featType ==
"selenocysteine") {
678 "Bad data line: Unassigned code break.");
686 "Bad data line: Code break assigned to missing feature.");
699 pCodeBreak->
SetAa().SetNcbieaa(
700 (featType ==
"selenocysteine") ?
'U' :
'X');
703 list< CRef< CCode_break > >& codeBreaks = cdRegion.
SetCode_break();
704 codeBreaks.push_back(pCodeBreak);
744 list<string> parents;
746 for (list<string>::const_iterator cit = parents.begin();
747 cit != parents.end();
753 "Bad data line: mRNA record with bad parent assignment.");
769 "Internal error: Unexpected location type.");
775 list<CGff2Record> pendingExons;
777 for (
auto exonRecord: pendingExons) {
815 list<CGff2Record> pendingExons;
849 annot.
SetData().SetFtable().push_back( pFeature ) ;
869 if (it->second == parentId) {
875 "Bad data line: CDS record with bad parent assignments.");
892 const string& featureType)
899 static const char*
const ignoredTypesAlways_[] = {
905 STRINGARRAY::const_iterator cit = ignoredTypesAlways.find(ftype);
906 if (cit != ignoredTypesAlways.end()) {
914 static const char*
const specialTypesGenbank_[] = {
916 "autocatalytically_spliced_intron",
918 "hammerhead_ribozyme",
932 "stop_codon_read_through",
939 static const char*
const ignoredTypesGenbank_[] = {
940 "apicoplast_chromosome",
943 "chloroplast_chromosome",
944 "chromoplast_chromosome",
947 "cyanelle_chromosome",
950 "expressed_sequence_match",
952 "leucoplast_chromosome",
953 "macronuclear_chromosome",
956 "micronuclear_chromosome",
957 "mitochondrial_chromosome",
958 "nuclear_chromosome",
959 "nucleomorphic_chromosome",
961 "nucleotide_to_protein_match",
962 "partial_genomic_sequence_assembly",
968 "translated_nucleotide_match",
973 cit = specialTypesGenbank.find(ftype);
974 if (cit != specialTypesGenbank.end()) {
978 cit = ignoredTypesGenbank.find(ftype);
979 if (cit != ignoredTypesGenbank.end()) {
997 const auto it = attrs.
find(
"ID");
998 if (it != attrs.end()) {
1007 const string& rnaId,
1021 const string& rnaId,
1022 list<CGff2Record>& pendingExons)
1050 "Bad data line: Record references non-existent Parent=" + it.first);
1055 for (
auto itLocation:
mpLocations->LocationMap()) {
1056 auto id = itLocation.first;
1063 mpLocations->MergeLocation(pNewLoc, frame, itLocation.second);
1067 auto& cdrData = pFeature->
SetData().SetCdregion();
1078 const string& pragma)
1082 vector<string> tokens;
1084 if (tokens.size() < 2) {
1088 "Bad sequence-region pragma - ignored.");
1091 if (tokens.size() >= 4) {
1099 "Bad sequence-region pragma - ignored.");
1103 mpLocations->SetSequenceSize(tokens[1], sequenceSize);
1105 mpLocations->SetSequenceSize(resolvedId, sequenceSize);
1118 const string& seqId)
const
1121 return mpLocations->GetSequenceSize(seqId);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void fatal(const char *msg,...)
virtual bool x_CreateAlignment(const CGff2Record &gff, CRef< CSeq_align > &pAlign)
virtual bool xParseStructuredComment(const string &)
static bool IsAlignmentData(const string &)
CRef< CAnnotdesc > m_CurrentBrowserInfo
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
bool xFeatureSetQualifier(const string &, const string &, CRef< CSeq_feat >)
bool x_MergeAlignments(const list< CRef< CSeq_align >> &alignment_list, CRef< CSeq_align > &processed)
virtual bool xIsIgnoredFeatureId(const string &)
unsigned int mCurrentFeatureCount
bool IsInGenbankMode() const
bool GetAttribute(const string &, string &) const
virtual bool UpdateFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
virtual bool AssignFromGff(const string &)
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
const TAttributes & Attributes() const
static string xNormalizedAttributeKey(const CTempString &)
string x_NormalizedAttributeKey(const string &)
bool AssignFromGff(const string &) override
static string xNextGenericId()
void xPostProcessAnnot(CSeq_annot &) override
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
virtual bool xFindFeatureUnderConstruction(const CGff2Record &, CRef< CSeq_feat > &)
bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *) override
virtual bool xUpdateAnnotCds(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *) override
virtual bool xUpdateAnnotGeneric(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
virtual bool xUpdateAnnotRegion(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
map< string, string > mIdToSeqIdMap
CGff3ReadRecord * x_CreateRecord() override
virtual bool xUpdateAnnotExon(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
void xVerifyExonLocation(const string &, const CGff2Record &)
virtual void xAddPendingExon(const string &, const CGff2Record &)
virtual bool xJoinLocationIntoRna(const CGff2Record &, ILineErrorListener *)
virtual bool xFeatureSetXrefParent(const string &, CRef< CSeq_feat >)
void xProcessAlignmentData(CSeq_annot &pAnnot)
TSeqPos SequenceSize() const
virtual bool xUpdateAnnotRna(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
map< string, string > mCdsParentMap
virtual bool xInitializeFeature(const CGff2Record &, CRef< CSeq_feat >)
bool xReadInit() override
CGff3Reader(TReaderFlags uFlags, const string &name="", const string &title="", SeqIdResolver resolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
SAlignmentData mAlignmentData
shared_ptr< CGff3LocationMerger > mpLocations
PENDING_EXONS mPendingExons
TSeqPos GetSequenceSize(const string &) const
bool xIsIgnoredFeatureType(const string &) override
virtual void xValidateAnnot(const CSeq_annot &) override
string xMakeRecordId(const CGff2Record &record)
map< string, CRef< CSeq_interval > > mMrnaLocs
void xProcessSequenceRegionPragma(const string &pragma) override
static unsigned int msGenericIdCounter
void xVerifyCdsParents(const CGff2Record &)
virtual bool xParseAlignment(const string &strLine)
virtual bool xUpdateAnnotGene(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
virtual void xGetPendingExons(const string &, list< CGff2Record > &)
virtual bool xFeatureSetXrefGrandParent(const string &, CRef< CSeq_feat >)
void xProcessData(const TReaderData &, CSeq_annot &) override
bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &) override
CRef< CSeq_loc > GetSeqLoc(TReaderFlags, SeqIdResolver seqidresolve=nullptr) const
const string & Type() const
ENa_strand Strand() const
const string & Id() const
virtual void SetType(const string &recType)
const string & NormalizedType() const
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
Common file reader utility functions.
unique_ptr< CReaderMessageHandler > m_pMessageHandler
SeqIdResolver mSeqIdResolve
unsigned int m_uLineNumber
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
void ProcessError(CObjReaderLineException &, ILineErrorListener *)
vector< TReaderLine > TReaderData
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
ESubtype GetSubtype(void) const
void SetNameDesc(const string &name)
void SetTitleDesc(const string &title)
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
static bool SoTypeToFeature(const string &, CSeq_feat &, bool=false)
static string ResolveSoAlias(const string &)
Abstract base class for lightweight line-by-line reading.
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
void SetAa(TAa &value)
Assign a value to Aa data member.
TXref & SetXref(void)
Assign a value to Xref data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TId & GetId(void) const
Get the Id member data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
bool IsRna(void) const
Check if variant Rna is selected.
@ eFrame_not_set
not set, code uses one
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
@ e_not_set
No variant selected.
Lightweight interface for getting lines of data with minimal memory copying.
constexpr bool empty(list< Ts... >) noexcept
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
MAP_ID_TO_ALIGN mAlignments