74 if (ID1.Empty() || ID2.Empty()) {
77 return ID1->Match(*ID2);
85 if (
id.
Empty())
return false;
89 CBioseq::TId::const_iterator cit = bioseqIds.begin(), cend = bioseqIds.end();
90 for (; cit != cend && !
result; ++cit) {
104 pssmId =
id->GetGeneral().GetTag().GetId();
113 list< CRef< CSeq_annot > >::const_iterator j;
114 list< CRef< CSeq_id > >::const_iterator k;
121 if ((*j)->GetData().IsIds()) {
122 for (k = (*j)->GetData().GetIds().begin(); k != (*j)->GetData().GetIds().end(); k++) {
123 if ((*k)->IsGeneral()) {
124 if ((*k)->GetGeneral().GetDb() ==
"mmdb") {
125 if ((*k)->GetGeneral().GetTag().IsId()) {
126 id = (*k)->GetGeneral().GetTag().GetId();
141 bool isTaxIdFound =
false;
143 list< CRef< CSeqdesc > >::const_iterator j, jend;
150 for (j=bioseq.
GetDescr().
Get().begin(); j!=jend; j++)
154 org = &((*j)->GetOrg());
155 else if ((*j)->IsSource())
156 org = &((*j)->GetSource().GetOrg());
159 vector < CRef< CDbtag > >::const_iterator k, kend = org->
GetDb().end();
160 for (k=org->
GetDb().begin(); k != kend; ++k) {
161 if ((*k)->GetDb() ==
"taxon") {
162 if ((*k)->GetTag().IsId()) {
168 if (isTaxIdFound && taxid != thisTaxid && taxid != -thisTaxid) {
171 taxid = (thisTaxid >
ZERO_TAX_ID) ? thisTaxid : -thisTaxid;
195 list< CRef< CSeqdesc > >::const_iterator j;
201 if ((*j)->IsSource()) {
203 if ((*j)->GetSource().GetOrg().IsSetTaxname()) {
204 return((*j)->GetSource().GetOrg().GetTaxname());
206 if ((*j)->GetSource().GetOrg().IsSetCommon()) {
207 return((*j)->GetSource().GetOrg().GetCommon());
245 if (Seq.
Empty() || Seq->
IsSet())
return false;
262 str->resize(vec.size());
265 }
catch (exception& e) {
276 if (
str.size() > 0) {
277 vec.reserve(
str.size());
340 seqData.assign(vec.begin(), vec.end());
353 static const char MY_NCBISTDAA_TO_AMINOACID[28] = {
354 '-',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'K',
'L',
'M',
355 'N',
'P',
'Q',
'R',
'S',
'T',
'V',
'W',
'X',
'Y',
'Z',
'U',
'*',
368 s.resize(vec.size());
369 for (
unsigned int i=0;
i<vec.size();
i++) {
371 s.at(
i) = MY_NCBISTDAA_TO_AMINOACID[(
int)vec[
i]];
395 if (zeroBased && pos < (
int)
str.size()) {
397 }
else if (!zeroBased && pos <= (
int)
str.size() && pos != 0) {
398 residue =
str[pos-1];
423 const list< CRef< CSeq_id > >& seqIds = bioseq->
GetId();
425 cit != seqIds.end(); cit++)
427 const CTextseq_id* textId = (*cit)->GetTextseq_Id();
442 return acc.size() != 0;
448 if (seqEntry->
IsSeq())
451 list< CRef< CSeqdesc > >::const_iterator cit = descrList.begin();
452 for (; cit != descrList.end(); cit++)
465 list< CRef< CSeqdesc > >::const_iterator cit = descrList.begin();
466 for (; cit != descrList.end(); cit++)
475 list< CRef< CSeq_entry > >::const_iterator lsei;
476 const list< CRef< CSeq_entry > >& seqEntryList = seqEntry->
GetSet().
GetSeq_set();
477 for (lsei = seqEntryList.begin(); lsei != seqEntryList.end(); ++lsei)
490 unsigned int ctr = 0;
491 CBioseq::TId::const_iterator idCit, idEnd;
493 idEnd = bioseq->
GetId().end();
494 for (idCit = bioseq->
GetId().begin(); idCit != idEnd && ctr < nth; ++idCit) {
495 if ((*idCit).NotEmpty() && (*idCit)->IsGi()) {
499 if (ctr != nth)
continue;
515 gi = giSeqId->
GetGi();
525 unsigned int ctr = 0;
526 CBioseq::TId::const_iterator idCit, idEnd;
528 idEnd = bioseq->
GetId().end();
529 for (idCit = bioseq->
GetId().begin(); idCit != idEnd && ctr < nth; ++idCit) {
530 if ((*idCit).NotEmpty() && (*idCit)->IsPdb()) {
534 if (ctr != nth)
continue;
536 pdbSeqId->
Assign(**idCit);
562 CBioseq::TId::const_iterator idCit = bioseq.
GetId().begin(), idEnd = bioseq.
GetId().end();
564 for (; idCit != idEnd && !
result; ++idCit) {
565 if ((*idCit)->Which() == choice) {
575 CBioseq_set::TSeq_set::const_iterator bssCit, bssEnd;
578 if (seqEntry->
IsSet()) {
581 for (; bssCit != bssEnd && !
result; ++bssCit) {
582 if ((*bssCit)->IsSeq()) {
584 }
else if ((*bssCit)->IsSet()) {
588 }
else if (seqEntry->
IsSeq()) {
597 CBioseq::TId::const_iterator idCit = bioseq.
GetId().begin(), idEnd = bioseq.
GetId().end();
600 for (; idCit != idEnd; ++idCit) {
601 if ((*idCit)->Which() == choice) {
604 idsOfType.push_back(
id);
607 return idsOfType.size();
612 list< CRef< CSeq_id > > tmpList;
613 CBioseq_set::TSeq_set::const_iterator bssCit, bssEnd;
617 if (seqEntry->
IsSet()) {
620 for (; bssCit != bssEnd; ++bssCit) {
622 if ((*bssCit)->IsSeq()) {
624 idsOfType.insert(idsOfType.end(), tmpList.begin(), tmpList.end());
626 }
else if ((*bssCit)->IsSet()) {
628 idsOfType.insert(idsOfType.end(), tmpList.begin(), tmpList.end());
632 }
else if (seqEntry->
IsSeq()) {
636 return idsOfType.size();
643 list< CRef< CSeq_id > > tmpList;
644 CBioseq_set::TSeq_set::const_iterator bssCit, bssEnd;
647 if (seqEntry->
IsSet()) {
651 for (; bssCit != bssEnd && !
result; ++bssCit) {
652 if ((*bssCit)->IsSeq()) {
655 seqEntryBioseq->
Assign((*bssCit)->GetSeq());
661 }
else if (seqEntry->
IsSeq()) {
675 list< CRef< CSeq_id > > tmpList;
676 CBioseq_set::TSeq_set::iterator bssIt, bssEnd;
679 if (seqEntry->
IsSet()) {
683 for (; bssIt != bssEnd && !
result; ++bssIt) {
684 if ((*bssIt)->IsSeq()) {
687 seqEntryBioseq = &(*bssIt)->SetSeq();
693 }
else if (seqEntry->
IsSeq()) {
695 seqEntryBioseq = &(seqEntry->
SetSeq());
712 seqDescr.
Set().push_back(seqdescComment);
720 bool hasSource =
false;
721 bool hasTitle =
false;
725 if (seqDescr.
IsSet()) {
726 list< CRef< CSeqdesc > >& descrList = seqDescr.
Set();
727 list< CRef< CSeqdesc > >::iterator it = descrList.begin();
730 while (!hasTitle && it != descrList.end()) {
731 hasTitle = ((*it)->IsTitle());
736 it = descrList.begin();
737 while (it != descrList.end()) {
739 if ((*it)->IsSource() && (!hasSource)) {
742 }
else if ((*it)->IsTitle()) {
744 }
else if ((*it)->IsComment() && find(keptComments.begin(), keptComments.end(), (*it)->GetComment()) != keptComments.end()) {
746 }
else if ((*it)->IsPdb()) {
752 if (newTitle.length() > 0) {
755 descrList.push_back(addedTitle);
762 it = descrList.erase(it);
765 it = descrList.erase(it);
780 if (seqEntry.
Empty())
return;
782 if (seqEntry->
IsSeq()) {
784 }
else if (seqEntry->
IsSet()) {
786 for (; bssIt != bssEnd; ++bssIt) {
798 string acc, dbSource;
805 string acc, dbSource;
814 accession =
"unknown";
825 else if (seqID->
IsPdb()) {
834 else if (pLocal.
IsStr()) {
835 accession = pLocal.
GetStr();
840 if (pGeneral.
IsSetDb() && !getGenericSource) {
841 dbSource = dbSource +
": " + pGeneral.
GetDb();
859 else if (seqID->
IsGiim()) {
872 if (!textseqId)
return;
884 if (!getGenericSource && !seqID->
IsGeneral()) {
891 info.acession.erase();
892 const list< CRef< CSeq_id > >& seqIds = bioseq->
GetId();
894 cit != seqIds.end(); cit++)
896 const CTextseq_id* textId = (*cit)->GetTextseq_Id();
901 if (
info.acession.size() > 0)
905 info.dbsource = (*cit)->Which();
912 list< CRef< CSeqdesc > >::const_iterator dit;
918 if ((*dit)->IsTitle())
919 info.defline = ((*dit)->GetTitle());
922 return !
info.acession.empty();
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static string SeqIdTypeToSource(unsigned int seqIdType, string accession=kEmptyStr)
static string GetSourceName(EDbSource priority)
string GetEffectiveChain_id(EBothUnsetPriority bothUnsetPriority=eBothUnset_ChainId) const
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@Seq_descr.hpp User-defined methods of the data storage class.
bool NcbieaaToNcbistdaaString(const std::string &str, vector< char > &vec)
bool ExtractGi(const CRef< CBioseq > &bioseq, TGi &gi, unsigned int nth)
bool SeqIdHasMatchInBioseq(const CRef< CSeq_id > &id, const CBioseq &bioseq)
void NcbistdaaToNcbieaaString(const std::vector< char > &vec, std::string *str)
TTaxId GetTaxIdInBioseq(const CBioseq &bioseq)
bool GetNcbistdSeq(const CBioseq &bioseq, vector< char > &seqData)
bool CopyBioseqWithType(const CRef< CSeq_entry > &seqEntry, CSeq_id::E_Choice choice, CRef< CBioseq > &seqEntryBioseq)
bool GetNcbieaaString(const CRef< CSeq_entry > &Seq, string &Str)
int GetSeqLength(const CBioseq &bioseq)
bool GetBioseqWithType(CRef< CSeq_entry > &seqEntry, CSeq_id::E_Choice choice, CRef< CBioseq > &seqEntryBioseq)
void SimplifySeqEntryForCD(CRef< CSeq_entry > &seqEntry, const vector< string > &keptComments, bool keepPDBBlock)
bool AddCommentToBioseq(CBioseq &bioseq, const string &comment)
string GetSpeciesFromBioseq(const CBioseq &bioseq)
bool CopyGiSeqId(const CRef< CBioseq > &bioseq, CRef< CSeq_id > &giSeqId, unsigned int nth)
void SimplifyBioseqForCD(CBioseq &bioseq, const vector< string > &keptComments, bool keepPDBBlock)
char GetResidueAtPosition(const CRef< CSeq_entry > &seqEntry, int pos, bool zeroBased)
bool GetPDBBlockFromSeqEntry(CRef< CSeq_entry > seqEntry, CRef< CPDB_block > &pdbBlock)
bool extractBioseqInfo(const CRef< CBioseq > bioseq, BioseqInfo &info)
string GetAccessionForSeqId(const CRef< CSeq_id > &seqID)
string GetDbSourceForSeqId(const CRef< CSeq_id > &seqID)
int GetCDDPssmIdFromSeqId(const CRef< CSeq_id > &id)
bool IsEnvironmentalSeq(const CBioseq &bioseq)
string GetRawSequenceString(const CBioseq &bioseq)
bool CopyPdbSeqId(const CRef< CBioseq > &bioseq, CRef< CSeq_id > &pdbSeqId, unsigned int nth)
bool ExtractPdbMolChain(const CRef< CBioseq > &bioseq, string &pdbMol, string &pdbChain, unsigned int nth)
void GetAccessionAndDatabaseSource(const CRef< CSeq_id > &seqID, string &accession, string &dbSource, bool getGenericSource)
unsigned int CopySeqIdsOfType(const CBioseq &bioseq, CSeq_id::E_Choice choice, list< CRef< CSeq_id > > &idsOfType)
bool HasSeqIdOfType(const CBioseq &bioseq, CSeq_id::E_Choice choice)
int GetMMDBId(const CBioseq &bioseq)
bool SeqIdsMatch(const CRef< CSeq_id > &ID1, const CRef< CSeq_id > &ID2)
bool IsConsensus(const CRef< CSeq_id > &seqId)
bool GetAccAndVersion(const CRef< CBioseq > bioseq, string &acc, int &version, CRef< CSeq_id > &seqId)
const TTaxId ENVIRONMENTAL_SEQUENCE_TAX_ID
bool Empty(const CNcbiOstrstream &src)
static const char * str(char *buf, int n)
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
bool IsPatent(void) const
Check if variant Patent is selected.
const TPdb & GetPdb(void) const
Get the variant data.
TGibbsq GetGibbsq(void) const
Get the variant data.
TId GetId(void) const
Get the Id member data.
const TName & GetName(void) const
Get the Name member data.
bool IsGibbmt(void) const
Check if variant Gibbmt is selected.
bool IsGeneral(void) const
Check if variant General is selected.
bool CanGetName(void) const
Check if it is safe to call GetName method.
E_Choice Which(void) const
Which variant is currently selected.
bool IsPdb(void) const
Check if variant Pdb is selected.
TGi GetGi(void) const
Get the variant data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
const TMol & GetMol(void) const
Get the Mol member data.
TSeqid GetSeqid(void) const
Get the Seqid member data.
const TGiim & GetGiim(void) const
Get the variant data.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsGiim(void) const
Check if variant Giim is selected.
bool IsLocal(void) const
Check if variant Local is selected.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
const TPatent & GetPatent(void) const
Get the variant data.
TGibbmt GetGibbmt(void) const
Get the variant data.
bool IsGibbsq(void) const
Check if variant Gibbsq is selected.
const TDb & GetDb(void) const
Get the Db member data.
const TAccession & GetAccession(void) const
Get the Accession member data.
const TSeq & GetSeq(void) const
Get the variant data.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void ResetDescr(void)
Reset Descr data member.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
bool IsNcbistdaa(void) const
Check if variant Ncbistdaa is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TPdb & SetPdb(void)
Select the variant.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
void ResetAnnot(void)
Reset Annot data member.
const Tdata & Get(void) const
Get the member data.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
TComment & SetComment(void)
Select the variant.
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Tdata & Set(void)
Assign a value to data member.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
const TDescr & GetDescr(void) const
Get the Descr member data.
bool CanGetCompound(void) const
Check if it is safe to call GetCompound method.
const TCompound & GetCompound(void) const
Get the Compound member data.
unsigned int
A callback function used to compare two keys in a database.
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
const string version
version string