88 mCurrentFeatureCount(0),
89 mParsingAlignment(
false),
90 mAtSequenceData(
false)
124 annots.push_back(pNext);
145 for (TAnnots::iterator it = annots.begin();
146 it != annots.end(); ++it) {
241 const string& givenId)
248 string annotId(givenId);
252 if (annotId.empty()) {
256 pAnnotId->SetLocal().SetStr(annotId);
257 annot.
SetId().push_back(pAnnotId);
263 const string& strLine)
290 if (!pRecord->AssignFromGff(line)) {
324 score_values.clear();
326 if (!alignment.IsSetScore()) {
330 for (
const CRef<CScore>& score : alignment.GetScore()) {
332 if (!score->IsSetId() ||
333 !score->GetId().IsStr() ||
334 !score->IsSetValue()) {
337 const string name = score->GetId().GetStr();
340 score_values[name]->Assign(
value);
351 if (score_val1.
IsInt() &&
352 score_val2.
IsInt() &&
357 if (score_val1.
IsReal() &&
373 matching_scores.clear();
375 for (
const auto& score1 : scores_1) {
376 const string& name = score1.first;
379 const auto& it = scores_2.find(name);
380 if (it != scores_2.end() &&
382 matching_scores.insert(name);
398 for (
const string&
id : id_list) {
412 pAnnot->
SetData().SetAlign().push_back(pAlign);
420 const string& strLine,
421 list<string>& id_list,
427 if ( !pRecord->AssignFromGff(strLine) ) {
432 if ( !pRecord->GetAttribute(
"ID",
id) ) {
436 if (alignments.find(
id) == alignments.end()) {
437 id_list.push_back(
id);
445 alignments[id].push_back(alignment);
459 const list<string> score_names {
"num_ident",
"num_mismatch"};
461 for (
const string& score_name : score_names) {
462 if (score_values.find(score_name) != score_values.end()) {
463 summed_scores[score_name] = score_values.at(score_name)->GetInt();
475 const list<string> summed_score_names {
"num_ident",
"num_mismatch"};
478 x_GetAlignmentScores(alignment, new_scores);
480 for (
const string& score_name : summed_score_names) {
481 if (new_scores.
find(score_name) == new_scores.
end()) {
482 summed_scores.erase(score_name);
483 }
else if (summed_scores.find(score_name) != summed_scores.end()) {
484 summed_scores[score_name] += new_scores[score_name]->GetInt();
485 new_scores.
erase(score_name);
490 x_FindMatchingScores(common_scores,
492 matching_score_names);
494 common_scores.clear();
495 for (
string score_name : matching_score_names) {
497 common_scores[score_name]->Assign(*new_scores[score_name]);
508 if (alignment_list.empty()) {
512 if (alignment_list.size() == 1) {
513 processed = alignment_list.front();
518 const list<string> summed_score_names {
"num_ident",
"num_mismatch"};
521 list<CRef<CSeq_align>>::const_iterator align_it = alignment_list.
begin();
529 while (align_it != alignment_list.end() &&
530 !score_values.
empty()) {
538 const auto first_alignment = alignment_list.front();
539 if (first_alignment->IsSetSegs() &&
540 first_alignment->GetSegs().IsSpliced()) {
544 if (first_alignment->IsSetDim()) {
545 processed->
SetDim(first_alignment->GetDim());
548 for (
auto& kv : summed_scores) {
550 score->SetId().SetStr(kv.first);
551 score->SetValue().SetInt(kv.second);
552 processed->
SetScore().push_back(score);
555 for (
auto& kv : score_values) {
557 score->SetId().SetStr(kv.first);
558 score->SetValue().Assign(*(kv.second));
559 processed->
SetScore().push_back(score);
563 spliced->
Assign(first_alignment->GetSegs().GetSpliced());
564 processed->
SetSegs().SetSpliced(*spliced);
566 auto align_it = alignment_list.cbegin();
569 while(align_it != alignment_list.end()) {
570 const auto& spliced_seg = (*align_it)->GetSegs().GetSpliced();
571 if (spliced_seg.IsSetExons()) {
572 for (
auto exon : spliced_seg.GetExons()) {
573 processed->
SetSegs().SetSpliced().SetExons().push_back(exon);
584 for (
auto& kv : summed_scores) {
586 score->SetId().SetStr(kv.first);
587 score->SetValue().SetInt(kv.second);
588 processed->
SetScore().push_back(score);
591 for (
auto& kv : score_values) {
593 score->SetId().SetStr(kv.first);
594 score->SetValue().Assign(*(kv.second));
595 processed->
SetScore().push_back(score);
598 for (
auto current : alignment_list) {
600 new_align->Assign(*current);
601 new_align->ResetScore();
604 const string& score_name = score->GetId().GetStr();
605 if (score_values.
find(score_name) == score_values.
end()) {
606 new_align->SetScore().push_back(score);
609 processed->
SetSegs().SetDisc().Set().push_back(new_align);
680 annot.
SetData().SetAlign().push_back( pAlign ) ;
716 segment.
SetExons().push_back(pExon);
729 vector<string> targetParts;
730 if (!xGetTargetParts(gff, targetParts)) {
734 pExon->SetGenomic_start(
static_cast<TSeqPos>(gff.SeqStart()-1));
735 pExon->SetGenomic_end(
static_cast<TSeqPos>(gff.SeqStop()-1));
736 if (gff.IsSetStrand()) {
737 pExon->SetGenomic_strand(gff.Strand());
746 pExon->SetProduct_start().SetNucpos(product_start);
747 pExon->SetProduct_end().SetNucpos(product_end);
750 if (targetParts[3] ==
"-") {
753 pExon->SetProduct_strand(targetStrand);
764 if (!gff.GetAttribute(
"Target", targetInfo)) {
769 if (targetParts.size() != 4) {
779 const vector<string>& gapParts,
781 vector<int>& starts)
const
785 const size_t gapCount = gapParts.size();
787 for (
size_t i=0;
i<gapCount; ++
i) {
788 char changeType = gapParts[
i][0];
790 switch (changeType) {
795 starts.push_back(
offset+1-changeSize);
801 starts.push_back(
offset+1-changeSize);
804 starts.push_back(-1);
810 starts.push_back(-1);
812 starts.push_back(
offset+1-changeSize);
824 const vector<string>& gapParts,
826 vector<int>& starts)
const
830 const auto gapCount = gapParts.size();
832 for (
auto i=0;
i<gapCount; ++
i) {
833 char changeType = gapParts[
i][0];
835 switch (changeType) {
849 starts.push_back(-1);
855 starts.push_back(-1);
877 const size_t gapCount = gapParts.size();
879 const bool isTarget =
true;
880 vector<int> targetStarts;
898 vector<int> identStarts;
899 const bool isIdent = !isTarget;
921 for (
auto i=0;
i<gapCount; ++
i) {
922 denseg.SetStarts().push_back(targetStarts[
i]);
923 denseg.SetStarts().push_back(identStarts[
i]);
937 if (
type ==
"cDNA_match" ||
938 type ==
"EST_match" ||
939 type ==
"translated_nucleotide_match") {
953 vector<string> targetParts;
963 if (
type ==
"translated_nucleotide_match") {
970 spliced_seg.SetProduct_id(*product_id);
973 spliced_seg.SetGenomic_id(*genomic_id);
975 if (targetParts[3] ==
"+") {
979 if (targetParts[3] ==
"-") {
985 spliced_seg.SetGenomic_strand(ident_strand);
996 vector<string> gapParts;
1004 const auto gapCount = gapParts.size();
1006 for (
auto i=0;
i<gapCount; ++
i) {
1008 char changeType = gapParts[
i][0];
1010 switch (changeType) {
1015 chunk->SetMatch(changeSize);
1019 chunk->SetProduct_ins(changeSize);
1023 chunk->SetGenomic_ins(changeSize);
1027 exon->SetParts().push_back(chunk);
1030 spliced_seg.SetExons().push_back(exon);
1042 vector<string> targetParts;
1049 if (targetParts[3] ==
"-") {
1054 identStrand = gff.
Strand();
1059 vector<string> gapParts;
1067 int gapCount =
static_cast<int>(gapParts.size());
1073 denseg.SetNumseg(gapCount);
1076 denseg.SetIds().push_back(
1078 denseg.SetIds().push_back(
1095 for (
int i=0;
i < gapCount; ++
i) {
1099 for (
int i=0;
i < gapCount; ++
i) {
1100 denseg.SetStrands().push_back(targetStrand);
1101 denseg.SetStrands().push_back(identStrand);
1122 const string intScores[] = {
1143 const size_t intCount(
sizeof(intScores)/
sizeof(
string));
1144 for (
size_t i=0;
i < intCount; ++
i) {
1151 const string realScores[] = {
1156 "pct_identity_ungap",
1157 "pct_identity_gapopen_only",
1160 "comp_adjustment_method",
1161 "pct_coverage_hiqual",
1164 "inversion_merge_alignmer",
1168 const size_t realCount(
sizeof(realScores)/
sizeof(
string));
1169 for (
size_t i=0;
i < realCount; ++
i) {
1191 const string&
value,
1195 if (!pTargetFeature) {
1204 const string & strId,
1205 ncbi::CRef<CSeq_feat>& pFeature )
1211 pFeature = it->second;
1223 annot.
SetData().SetFtable().push_back(pFeature);
1234 static const char* digits =
"0123456789";
1235 string strDb, strTag;
1240 if ( strDb ==
"NCBI_gi" ) {
1246 if ( ! strTag.empty() ) {
1247 pDbtag->
SetDb( strDb );
1248 if (strTag.find_first_not_of(digits, 0) == string::npos)
1251 pDbtag->
SetTag().SetStr( strTag );
1255 pDbtag->
SetDb(
"unknown" );
1277 typedef list<CRef<CSeq_feat> > FTABLE;
1278 typedef list<string> PARENTS;
1284 for (
auto featIt =
ftable.begin(); featIt !=
ftable.end(); ++featIt) {
1289 for (
auto parentIt = parents.begin(); parentIt != parents.end(); ++parentIt) {
1290 const string& parent = *parentIt;
1300 const string& directParentStr)
1303 typedef list<string> PARENTS;
1305 string ancestorStr(directParentStr);
1307 while (!ancestorStr.empty()) {
1315 for (PARENTS::iterator it = ancestors.begin(); it != ancestors.end(); ++it) {
1316 const string& ancestorStr = *it;
1328 typedef vector<CRef<CSeqFeatXref> > XREFS;
1337 auto xrefId =
local.GetId();
1338 const XREFS& xrefs = feat.
GetXref();
1339 for (XREFS::const_iterator cit = xrefs.begin(); cit != xrefs.end(); ++cit) {
1345 if (contentId == xrefId) {
1351 if (
local.IsStr()) {
1352 auto xrefId =
local.GetStr();
1353 const XREFS& xrefs = feat.
GetXref();
1354 for (XREFS::const_iterator cit = xrefs.begin(); cit != xrefs.end(); ++cit) {
1360 if (contentId == xrefId) {
1379 pToXref->SetId(*pToId);
1380 from.
SetXref().push_back(pToXref);
1399 vector<CTempStringEx> columns;
1401 if (columns.size() < 9) {
1434 vector<string> columns;
1436 string seqId = columns[0];
1462 string lineLowerCase(line);
1472 string lineLowerCase(line);
1484 for (
const auto& lineData: readerData) {
1485 const auto& line = lineData.mData;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void xProcessData(const TReaderData &, CSeq_annot &) override
virtual bool xIsCurrentDataType(const string &)
void xSetAncestryLine(CSeq_feat &, const string &)
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
bool xSetDensegStarts(const vector< string > &gapParts, ENa_strand identStrand, ENa_strand targetStrand, const TSeqPos targetStart, const TSeqPos targetEnd, const CGff2Record &gff, CSeq_align::C_Segs::TDenseg &denseg)
void xGetData(ILineReader &, TReaderData &) override
bool xUpdateSplicedSegment(const CGff2Record &gff, CSpliced_seg &segment) const
virtual bool x_ProcessQualifierSpecialCase(CGff2Record::TAttrCit, CRef< CSeq_feat >)
bool xAlignmentSetSpliced_seg(const CGff2Record &, CRef< CSeq_align >)
CRef< CSerialObject > ReadObject(ILineReader &, ILineErrorListener *=nullptr) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
virtual bool x_UpdateAnnotAlignment(const CGff2Record &, CSeq_annot &, ILineErrorListener *=0)
virtual bool x_CreateAlignment(const CGff2Record &gff, CRef< CSeq_align > &pAlign)
bool xGetTargetParts(const CGff2Record &gff, vector< string > &targetParts) const
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
bool xNeedsNewSeqAnnot(const string &)
virtual bool xParseStructuredComment(const string &)
static bool xIsSequenceRegion(const string &line)
void x_ProcessAlignmentsGff(const list< string > &id_list, const map< string, list< CRef< CSeq_align >>> &alignments, CRef< CSeq_annot > pAnnot)
bool xAlignmentSetDenseg(const CGff2Record &, CRef< CSeq_align >)
bool xGetStartsOnMinusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
static bool IsAlignmentData(const string &)
CRef< CAnnotdesc > m_CurrentBrowserInfo
void xPostProcessAnnot(CSeq_annot &) override
virtual CGff2Record * x_CreateRecord()
CGff2Reader(TReaderFlags iFlags, const string &name="", const string &title="", SeqIdResolver resolver=CReadUtil::AsSeqId, CReaderListener *pListener=nullptr)
bool xAlignmentSetScore(const CGff2Record &, CRef< CSeq_align >)
bool xGetStartsOnPlusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
virtual bool xIsIgnoredFeatureType(const string &)
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
bool xFeatureSetQualifier(const string &, const string &, CRef< CSeq_feat >)
void x_FindMatchingScores(const TScoreValueMap &scores_1, const TScoreValueMap &scores_2, set< string > &matching_scores) const
bool xUpdateSplicedAlignment(const CGff2Record &gff, CRef< CSeq_align > pAlign) const
virtual void xAssignAnnotId(CSeq_annot &, const string &="")
virtual void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &)
void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=nullptr) override
Read all objects from given insput stream, returning them as a vector of Seq-annots.
bool xAlignmentSetSegment(const CGff2Record &, CRef< CSeq_align >)
static bool xIsFastaMarker(const string &line)
static CRef< CDbtag > x_ParseDbtag(const string &)
void x_GetAlignmentScores(const CSeq_align &alignment, TScoreValueMap &score_values) const
void x_InitializeScoreSums(const TScoreValueMap score_values, map< string, TSeqPos > &summed_scores) const
virtual bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=0)
bool x_MergeAlignments(const list< CRef< CSeq_align >> &alignment_list, CRef< CSeq_align > &processed)
virtual bool x_ParseAlignmentGff(const string &strLine, list< string > &id_list, map< string, list< CRef< CSeq_align >>> &alignments)
bool xSetSplicedExon(const CGff2Record &gff, CRef< CSpliced_exon > pExon) const
virtual bool xIsIgnoredFeatureId(const string &)
virtual bool xGenerateParentChildXrefs(CSeq_annot &)
unsigned int mCurrentFeatureCount
virtual bool xAnnotPostProcess(CSeq_annot &)
bool IsInGenbankMode() const
void x_ProcessAlignmentScores(const CSeq_align &alignment, map< string, TSeqPos > &summed_scores, TScoreValueMap &common_scores) const
CRef< CSeq_entry > ReadSeqEntry(ILineReader &, ILineErrorListener *=nullptr) override
Read an object from a given line reader, render it as a single Seq-entry, if possible.
virtual void xProcessSequenceRegionPragma(const string &)
TAttributes::const_iterator TAttrCit
bool GetAttribute(const string &, string &) const
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
static void TokenizeGFF(vector< CTempStringEx > &columns, const CTempStringEx &line)
const string & Type() const
ENa_strand Strand() const
const string & Id() const
Defines and provides stubs for a general interface to a variety of file readers.
SeqIdResolver mSeqIdResolve
virtual bool xUngetLine(ILineReader &)
unsigned int m_uDataCount
unsigned int m_uLineNumber
unique_ptr< CTrackData > m_pTrackDefaults
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
virtual bool xProgressInit(ILineReader &istr)
void ProcessError(CObjReaderLineException &, ILineErrorListener *)
vector< TReaderLine > TReaderData
virtual bool xGetLine(ILineReader &, string &)
virtual void xAddConversionInfo(CSeq_annot &, ILineErrorListener *)
virtual bool xParseTrackLine(const string &)
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
struct SReaderLine { SReaderLine(unsigned int line, string data):mLine(line), mData(data) {} TReaderLine
virtual bool xIsTrackTerminator(const CTempString &)
virtual bool xIsTrackLine(const CTempString &)
void SetNamedScore(const string &id, int score)
void SetNameDesc(const string &name)
void SetTitleDesc(const string &title)
bool IsFtable(void) const
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void AddOrReplaceQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature, or replace the value for the first one if it already exists.
Simple implementation of ILineReader for i(o)streams.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Abstract base class for lightweight line-by-line reading.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool s_CompareValues(const CScore::TValue &score_val1, const CScore::TValue &score_val2)
bool sFeatureHasXref(const CSeq_feat &feat, const CFeat_id &featId)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
bool IsStr(void) const
Check if variant Str is selected.
void SetTag(TTag &value)
Assign a value to Tag data member.
bool IsId(void) const
Check if variant Id is selected.
const TStr & GetStr(void) const
Get the variant data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
TScore & SetScore(void)
Assign a value to Score data member.
TDenseg & SetDenseg(void)
Select the variant.
bool IsReal(void) const
Check if variant Real is selected.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TExons & SetExons(void)
Assign a value to Exons data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TInt GetInt(void) const
Get the variant data.
bool IsInt(void) const
Check if variant Int is selected.
TSpliced & SetSpliced(void)
Select the variant.
TReal GetReal(void) const
Get the variant data.
@ eType_partial
mapping pieces together
@ eType_disc
discontinuous alignment
@ eProduct_type_transcript
TXref & SetXref(void)
Assign a value to Xref data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
bool IsLocal(void) const
Check if variant Local is selected.
const TId & GetId(void) const
Get the Id member data.
const TXref & GetXref(void) const
Get the Xref member data.
ENa_strand
strand of nucleic acid
TSet & SetSet(void)
Select the variant.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsAlign(void) const
Check if variant Align is selected.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
const TData & GetData(void) const
Get the Data member data.
TId & SetId(void)
Assign a value to Id data member.
@ eMol_not_set
> cdna = rna
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
static const char * str(char *buf, int n)