77 const string cdstr_start =
"(pos:";
78 const string cdstr_div =
",aa:";
79 const string cdstr_end =
")";
85 size_t pos_start = cdstr_start.length();
86 size_t pos_stop =
str.find(cdstr_div);
87 string posstr =
str.substr(pos_start, pos_stop-pos_start);
88 string aaa =
str.substr(pos_stop+cdstr_div.length());
89 aaa = aaa.substr(0, aaa.length()-cdstr_end.length());
91 const string posstr_compl =
"complement(";
94 posstr = posstr.substr(posstr_compl.length());
95 posstr = posstr.substr(0, posstr.length()-1);
98 const string posstr_div =
"..";
99 size_t pos_div = posstr.find(posstr_div);
100 if (pos_div == string::npos) {
116 pCodeBreak->
SetLoc().SetInt().SetId(
id);
117 pCodeBreak->
SetLoc().SetInt().SetFrom(from);
118 pCodeBreak->
SetLoc().SetInt().SetTo(to);
119 pCodeBreak->
SetLoc().SetInt().SetStrand(strand);
120 pCodeBreak->
SetAa().SetNcbieaa(aacode);
126 const string& genome,
132 GENOME_MAP& sGenomeMap = *s_GenomeMap;
133 if (sGenomeMap.empty()) {
157 GENOME_MAP::const_iterator cit = sGenomeMap.find(genome);
158 if (cit != sGenomeMap.end()) {
166 vector<CTempStringEx>& columns,
175 if (columns.size() == 9)
188 if (columns.size() == 5) {
196 if (!isNumericCol3 && isNumericCol4 && isNumericNext) {
199 size_t sizeof_col1 = (columns[2].begin() + columns[2].size() - columns[1].begin());
200 size_t startof_col1 = columns[1].begin() - in_line.
begin();
201 columns[1] = in_line.
substr(startof_col1, sizeof_col1);
202 columns[2] = columns[3];
203 columns[3] = columns[4];
208 columns.push_back(
next);
211 columns.push_back(in_line.
substr(current));
215 const string& strRawInput )
218 vector< CTempStringEx > columns;
221 if ( columns.size() < 9 ) {
226 "Bad data line: not enough columns",
243 string message =
"Bad data line: Both \"start\" and \"stop\" must be positive integers.";
251 string message =
"Bad data line: location start is greater than location stop (start="
252 +
string(columns[3]) +
", stop=" +
string(columns[4]) +
").";
260 if ( columns[5] !=
"." && columns[5] !=
"NA" ) {
265 switch (columns[6][0]) {
284 if ( columns[7] ==
"0" ) {
288 if ( columns[7] ==
"1" ) {
292 if ( columns[7] ==
"2" ) {
306 const string& strKey,
307 string& strValue )
const
310 TAttrCit it = m_Attributes.find( strKey );
311 if ( it == m_Attributes.end() ) {
315 strValue = it->second;
321 const string& strKey,
322 list<string>& values )
const
326 TAttrCit it = m_Attributes.find(strKey);
327 if (it == m_Attributes.end()) {
331 return !values.empty();
349 strValue = strValue.
substr( 1, string::npos );
362 bool inQuotes =
false;
364 for (;
i <
input.length();
i++)
401 bool inQuotes =
false;
439 semicolon =
input.length();
442 equal =
min(space, semicolon);
453 const string& strType,
454 const string& strRawAttributes )
472 const string& strRawAttributes,
476 string strCurrAttrib;
477 bool inQuotes =
false;
479 ITERATE (
string, iterChar, strRawAttributes) {
481 if (*iterChar ==
'\"') {
484 strCurrAttrib += *iterChar;
486 if (*iterChar ==
';') {
488 if(!strCurrAttrib.empty())
490 strCurrAttrib.clear();
492 if(*iterChar ==
'\"') {
495 strCurrAttrib += *iterChar;
501 if (!strCurrAttrib.empty())
517 return xMigrateAttributes(
flags, pFeature);
527 auto subtype = pFeature->GetData().GetSubtype();
528 auto recType = NormalizedType();
531 pFeature->SetLocation().SetMix().AddSeqLoc(*pAddLoc);
532 if (!xUpdateFeatureData(
flags, pFeature)) {
537 GetAttribute(
"ID", cdsId);
538 if (!cdsId.empty()) {
539 pFeature->AddOrReplaceQualifier(
"ID", cdsId);
552 const CSeq_loc& target = pFeature->GetLocation();
570 if (curStart == newStart) {
571 pFeature->SetData().SetCdregion().SetFrame(Phase());
578 if (curStop == newStop) {
579 pFeature->SetData().SetCdregion().SetFrame(Phase());
594 TAttributes attrs_left(m_Attributes.begin(), m_Attributes.end());
597 it = attrs_left.
find(
"Note");
598 if (it != attrs_left.
end()) {
599 pFeature->SetComment(xNormalizedAttributeValue(it->second));
600 attrs_left.
erase(it);
603 it = attrs_left.
find(
"Dbxref");
604 if (it != attrs_left.
end()) {
605 vector<string> dbxrefs;
607 for (vector<string>::iterator it1 = dbxrefs.begin(); it1 != dbxrefs.end();
609 string dbtag = xNormalizedAttributeValue(*it1);
612 attrs_left.
erase(it);
615 it = attrs_left.
find(
"Is_circular");
616 if (it != attrs_left.
end()) {
617 if (pFeature->GetData().IsBiosrc()) {
620 pSubSource->
SetName(
"is_circular");
621 pFeature->SetData().SetBiosrc().SetSubtype().push_back(pSubSource);
625 it = attrs_left.
find(
"Parent");
626 if (it != attrs_left.
end()) {
627 if (
Type() !=
"CDS") {
628 xMigrateAttributeSingle(
629 attrs_left,
"Parent", pFeature,
"Parent",
flags);
631 else attrs_left.
erase(it);
634 it = attrs_left.
find(
"Name");
635 if (it != attrs_left.
end()) {
636 auto soType = NormalizedType();
638 GetAttribute(
"gbkey", gbKey);
639 if (soType ==
"cds" || soType ==
"mrna" || soType ==
"biological_region" ||
640 (soType ==
"region" && gbKey !=
"Src")) {
641 attrs_left.
erase(it);
645 it = attrs_left.
find(
"codon_start");
646 if (it != attrs_left.
end()) {
649 switch(codon_start) {
662 attrs_left.
erase(it);
666 it = attrs_left.
find(
"description");
667 if (it != attrs_left.
end()) {
668 if (pFeature->GetData().IsGene()) {
669 string description = xNormalizedAttributeValue(it->second);
670 pFeature->SetData().SetGene().SetDesc(description);
671 attrs_left.
erase(it);
675 it = attrs_left.
find(
"exception");
676 if (it != attrs_left.
end()) {
677 pFeature->SetExcept(
true);
678 pFeature->SetExcept_text(xNormalizedAttributeValue(it->second));
679 attrs_left.
erase(it);
682 it = attrs_left.
find(
"exon_number");
683 if (it != attrs_left.
end()) {
686 pQual->
SetVal(it->second);
687 pFeature->SetQual().push_back(pQual);
688 attrs_left.
erase(it);
691 it = attrs_left.
find(
"experiment");
692 if (it != attrs_left.
end()) {
693 const string strExperimentDefault(
694 "experimental evidence, no additional details recorded" );
695 string value = xNormalizedAttributeValue(it->second);
696 if (
value == strExperimentDefault) {
703 pFeature->SetQual().push_back(pQual);
705 attrs_left.
erase(it);
708 it = attrs_left.
find(
"gbkey");
709 if (it != attrs_left.
end()) {
710 attrs_left.
erase(it);
713 it = attrs_left.
find(
"gene");
714 if (it != attrs_left.
end()) {
715 if (pFeature->GetData().IsGene()) {
716 list<string> geneValues;
719 list<string>::const_iterator cit = geneValues.begin();
720 if (cit != geneValues.end()) {
721 value = xNormalizedAttributeValue(*cit);
722 pFeature->SetData().SetGene().SetLocus(
value);
724 while (cit != geneValues.end()) {
725 value = xNormalizedAttributeValue(*cit);
726 pFeature->SetData().SetGene().SetSyn().push_back(
value);
730 attrs_left.
erase(it);
734 it = attrs_left.
find(
"genome");
735 if (it != attrs_left.
end()) {
736 if (pFeature->GetData().IsBiosrc()) {
737 pFeature->SetData().SetBiosrc().SetGenome(
739 attrs_left.
erase(it);
743 it = attrs_left.
find(
"gene_synonym");
744 if (it != attrs_left.
end()) {
745 if (pFeature->GetData().IsGene()) {
746 vector<string> synonyms;
748 for (vector<string>::iterator it1 = synonyms.begin(); it1 != synonyms.end();
750 string synonym = xNormalizedAttributeValue(*it1);
751 pFeature->SetData().SetGene().SetSyn().push_back(synonym);
754 attrs_left.
erase(it);
757 it = attrs_left.
find(
"inference");
758 if (it != attrs_left.
end()) {
759 auto inferenceVal = it->second;
760 const string strInferenceDefault(
761 "non-experimental evidence, no additional details recorded" );
762 auto value = xNormalizedAttributeValue(inferenceVal);
763 if (
value == strInferenceDefault) {
767 vector<string> inferenceVals;
769 for (
auto val: inferenceVals) {
772 pQual->
SetVal(xNormalizedAttributeValue(
val));
773 pFeature->SetQual().push_back(pQual);
776 attrs_left.
erase(it);
779 it = attrs_left.
find(
"locus_tag");
780 if (it != attrs_left.
end()) {
781 if (pFeature->GetData().IsGene()) {
782 string tag = xNormalizedAttributeValue(it->second);
783 pFeature->SetData().SetGene().SetLocus_tag(
tag);
785 attrs_left.
erase(it);
788 it = attrs_left.
find(
"map");
789 if (it != attrs_left.
end()) {
790 if (pFeature->GetData().IsGene()) {
791 pFeature->SetData().SetGene().SetMaploc(
792 xNormalizedAttributeValue(it->second));
796 it = attrs_left.
find(
"ncrna_class");
797 if (it != attrs_left.
end()) {
799 pFeature->SetData().SetRna().SetExt().SetGen().SetClass(
800 xNormalizedAttributeValue(it->second));
802 attrs_left.
erase(it);
805 it = attrs_left.
find(
"partial");
806 if (it != attrs_left.
end()) {
808 pFeature->AddQualifier(
"partial", it->second);
810 attrs_left.
erase(it);
813 it = attrs_left.
find(
"pseudo");
814 if (it != attrs_left.
end()) {
815 pFeature->SetPseudo(
true);
816 attrs_left.
erase(it);
819 it = attrs_left.
find(
"regulatory_class");
820 if (it != attrs_left.
end()) {
821 if (pFeature->GetData().IsImp() && (pFeature->GetData().GetImp().GetKey() ==
"regulatory")) {
823 pFeature->RemoveQualifier(
"regulatory_class");
824 pFeature->AddQualifier(
"regulatory_class", it->second);
825 attrs_left.
erase(it);
830 it = attrs_left.
find(
"rpt_type");
831 if (it != attrs_left.
end()) {
833 {
"microsatellite",
"microsatellite"},
834 {
"minisatellite",
"minisatellite"},
835 {
"satellite_DNA",
"satellite"},
836 {
"satellite",
"satellite"},
838 pFeature->RemoveQualifier(
"rpt_type");
839 pFeature->RemoveQualifier(
"satellite");
840 auto rpt_type = it->second;
841 auto satelliteIt = satellites.
find(rpt_type);
842 if (satelliteIt != satellites.
end()) {
843 pFeature->AddQualifier(
"satellite", satelliteIt->second);
846 pFeature->AddQualifier(
"rpt_type", rpt_type);
848 attrs_left.
erase(it);
851 it = attrs_left.
find(
"satellite");
852 if (it != attrs_left.
end()) {
853 if (pFeature->GetData().IsImp() && pFeature->GetData().GetImp().GetKey() ==
"repeat_region") {
854 attrs_left.
erase(it);
858 it = attrs_left.
find(
"transl_except");
859 if (it != attrs_left.
end()) {
860 if (pFeature->GetData().IsCdregion()) {
861 vector<string> codebreaks;
863 for (vector<string>::iterator it1 = codebreaks.begin();
864 it1 != codebreaks.end(); ++it1 ) {
865 string breakData = xNormalizedAttributeValue(*it1);
868 breakData, *pBreakId,
flags);
870 pFeature->SetData().SetCdregion().SetCode_break().push_back(
874 attrs_left.
erase(it);
878 it = attrs_left.
find(
"transl_table");
879 if (it != attrs_left.
end()) {
880 if (pFeature->GetData().IsCdregion()) {
883 pFeature->SetData().SetCdregion().SetCode().Set().push_back(pCe);
884 attrs_left.
erase(it);
888 if (!xMigrateAttributesGo(
flags, pFeature, attrs_left)) {
892 if (pFeature->GetData().IsBiosrc()) {
893 if (!xMigrateAttributesSubSource(
flags, pFeature, attrs_left)) {
896 if (!xMigrateAttributesOrgName(
flags, pFeature, attrs_left)) {
904 while (!attrs_left.
empty()) {
905 const string&
key = attrs_left.
begin()->first;
906 if (!xMigrateAttributeDefault(attrs_left,
key, pFeature,
key,
flags)) {
916 const string& attrKey,
918 const string& qualKey,
938 const string& attrKey,
940 const string& qualKey,
951 list<CTempStringEx> values;
953 for (list<CTempStringEx>::const_iterator cit = values.begin(); cit != values.end();
974 ORGMOD_MAP& sOrgModMap = *s_OrgModMap;
975 if (sOrgModMap.empty()) {
1015 list<CRef<COrgMod> >& orgMod =
1016 pFeature->SetData().SetBiosrc().SetOrg().SetOrgname().SetMod();
1017 for ( ORGMOD_MAP::const_iterator sit = sOrgModMap.begin();
1018 sit != sOrgModMap.end(); ++sit) {
1020 if (ait == attrs_left.end()) {
1026 orgMod.push_back(pOrgMod);
1027 attrs_left.erase(ait);
1039 for (
auto it = attrs.begin(); it != attrs.end(); ) {
1046 it = attrs.erase(it);
1064 SUBSOURCE_MAP& sSubSourceMap = *s_SubSourceMap;
1065 if (sSubSourceMap.empty()) {
1112 list<CRef<CSubSource> >& subType =
1113 pFeature->SetData().SetBiosrc().SetSubtype();
1114 for ( SUBSOURCE_MAP::const_iterator sit = sSubSourceMap.begin();
1115 sit != sSubSourceMap.end(); ++sit) {
1117 if (ait == attrs_left.end()) {
1122 pSubSource->
SetName(xNormalizedAttributeValue(ait->second));
1123 subType.push_back(pSubSource);
1124 attrs_left.erase(ait);
1135 auto recognizedType = NormalizedType();
1137 if (recognizedType ==
"region" || recognizedType ==
"biological_region") {
1139 if (GetAttribute(
"gbkey", gbkey)) {
1140 if (gbkey ==
"Src") {
1141 pFeature->SetData().SetBiosrc();
1147 GetAttribute(
"Name", name);
1148 pFeature->SetData().SetRegion(name);
1152 if (recognizedType ==
"start_codon" || recognizedType ==
"stop_codon") {
1153 recognizedType =
"cds";
1158 Type(), *pFeature, invalidFeaturesToRegion)) {
1159 string message =
"Bad data line: Invalid feature type \"" +
Type() +
"\"";
1174 list<string> parentIds;
1178 return (parentIds.size() > 1);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static const struct attribute attributes[]
@Gb_qual.hpp User-defined methods of the data storage class.
static CRef< CDbtag > x_ParseDbtag(const string &)
virtual bool xUpdateFeatureData(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
bool IsMultiParent() const
TAttributes::const_iterator TAttrCit
static string xNormalizedAttributeValue(const CTempString &)
virtual bool xMigrateAttributes(TReaderFlags, CRef< CSeq_feat >) const
TAttributes::iterator TAttrIt
bool GetAttribute(const string &, string &) const
virtual bool UpdateFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
virtual bool AssignFromGff(const string &)
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
static void TokenizeGFF(vector< CTempStringEx > &columns, const CTempStringEx &line)
virtual bool xInitFeatureData(TReaderFlags, CRef< CSeq_feat >) const
static bool xMigrateAttributeDefault(TAttributes &, const string &, CRef< CSeq_feat >, const string &, TReaderFlags)
static bool xMigrateAttributeSingle(TAttributes &, const string &, CRef< CSeq_feat >, const string &, TReaderFlags)
virtual bool xMigrateAttributesGo(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
virtual bool xMigrateAttributesOrgName(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
virtual bool xMigrateAttributesSubSource(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
bool xSplitGffAttributes(const string &, vector< string > &) const
static string xNormalizedAttributeKey(const CTempString &)
virtual bool xAssignAttributesFromGff(const string &, const string &)
virtual bool xInitFeatureData(TReaderFlags, CRef< CSeq_feat >) const
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
string m_strNormalizedType
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
@OrgMod.hpp User-defined methods of the data storage class.
static void AddGeneOntologyTerm(CSeq_feat &feature, const CTempString &qual, const CTempString &val)
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
static bool SoTypeToFeature(const string &, CSeq_feat &, bool=false)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
@ eProblem_FeatureBadStartAndOrStop
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
CBioSource::EGenome s_StringToGenome(const string &genome, CGff2Record::TReaderFlags flags)
CTempString x_GetNextAttribute(CTempString &input)
CRef< CCode_break > s_StringToCodeBreak(const string &str, CSeq_id &id, CGff2Record::TReaderFlags flags)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eDiag_Error
Error message.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
TSeqPos GetStop(ESeqLocExtremes ext) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
CTempStringEx substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
static string URLDecode(const CTempString str, EUrlDecode flag=eUrlDec_All)
URL-decode string.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
size_type length(void) const
Return the length of the represented array.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
static string & ToLower(string &str)
Convert string to lower case – string& version.
static const size_type npos
const_iterator begin() const
Return an iterator to the string's starting position.
@ fAllowLeadingSpaces
Ignore leading spaces in converted string.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
@ eUrlDec_Percent
Decode only XX.
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
void SetName(const TName &value)
Assign a value to Name data member.
EGenome
biological context
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_endogenous_virus_name
@ eSubtype_identified_by
name of person who identified the sample
@ eSubtype_whole_replicon
@ eGenome_endogenous_virus
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
void SetSubname(const TSubname &value)
Assign a value to Subname data member.
@ eSubtype_gb_acronym
used by taxonomy database
@ eSubtype_gb_synonym
used by taxonomy database
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
@ eSubtype_dosage
chromosome dosage of hybrid
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_specimen_voucher
@ eSubtype_gb_anamorph
used by taxonomy database
@ eSubtype_culture_collection
@ eSubtype_forma_specialis
void SetAa(TAa &value)
Assign a value to Aa data member.
void SetQual(const TQual &value)
Assign a value to Qual data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
@ eExp_ev_experimental
any reasonable experimental check
@ eExp_ev_not_experimental
similarity, pattern, etc
@ eFrame_not_set
not set, code uses one
@ eFrame_three
reading frame
ENa_strand
strand of nucleic acid
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
TStrand GetStrand(void) const
Get the Strand member data.
const TInt & GetInt(void) const
Get the variant data.
@ eNa_strand_both
in forward orientation
const struct ncbi::grid::netcache::search::fields::KEY key
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
static const char * str(char *buf, int n)