37 #ifndef OBJECTS_SEQLOC_SEQ_ID_HPP
38 #define OBJECTS_SEQLOC_SEQ_ID_HPP
80 fParse_PartialOK = 0x01,
81 fParse_RawText = 0x02,
83 fParse_AnyRaw = fParse_RawText | fParse_RawGI,
87 fParse_ValidLocal = 0x08,
90 fParse_AnyLocal = 0x18,
91 fParse_NoFASTA = 0x20,
96 fParse_FallbackOK = 0x40,
102 fParse_Default = fParse_RawText | fParse_ValidLocal
110 fInvalidChar = 1 << 1,
111 fExceedsMaxLength = 1 << 2
117 eFasta_AsTypeAndContent
136 TParseFlags
flags = fParse_AnyRaw);
153 #ifdef NCBI_STRICT_GI
188 CSeq_id(EFastaAsTypeAndContent, E_Choice the_type,
198 CSeq_id& Set(E_Choice the_type,
201 CSeq_id& Set(E_Choice the_type,
207 CSeq_id& Set(EFastaAsTypeAndContent, E_Choice the_type,
214 static E_Choice WhichInverseSeqId(
const CTempString& SeqIdCode);
217 static const char* WhichFastaTag(E_Choice choice);
234 eSeqId_refseq = eSeqId_other,
247 eAcc_type_mask = 0xff,
251 fAcc_nuc = 0x80000000,
252 fAcc_prot = 0x40000000,
254 fAcc_predicted = 0x20000000,
255 fAcc_specials = 0x10000000,
256 fAcc_master = 0x08000000,
257 fAcc_ncbo = 0x04000000,
258 fAcc_fallback = 0x02000000,
259 fAcc_vdb_only = 0x01000000,
260 eAcc_flag_mask = 0xff000000,
266 eAcc_dirsub = 2 << 8,
267 eAcc_div_patent = 3 << 8,
270 eAcc_gsdb_ds = 6 << 8,
272 eAcc_backbone = 8 << 8,
274 eAcc_segset = 10 << 8,
276 eAcc_unique = 12 << 8,
277 eAcc_ambig = 112 << 8,
278 eAcc_ambig_g = 113 << 8,
279 eAcc_ambig_e = 114 << 8,
280 eAcc_ambig_ge = 115 << 8,
281 eAcc_ambig_d = 116 << 8,
282 eAcc_ambig_gd = 117 << 8,
283 eAcc_ambig_ed = 118 << 8,
284 eAcc_ambig_ged = 119 << 8,
285 eAcc_unreserved = 127 << 8,
286 fAcc_genomic = 128 << 8,
287 eAcc_genome = 128 << 8,
288 eAcc_htgs = 129 << 8,
292 eAcc_chromosome = 133 << 8,
293 eAcc_genomic_rgn = 134 << 8,
294 eAcc_wgs_intermed = 135 << 8,
297 eAcc_optical_map = 138 << 8,
298 eAcc_targeted = 139 << 8,
299 eAcc_division_mask = 0xff00,
302 eAcc_wgs_master = eAcc_wgs | fAcc_master,
303 eAcc_wgs_intermed_master = eAcc_wgs_intermed | fAcc_master,
304 eAcc_tsa_master = eAcc_tsa | fAcc_master,
305 eAcc_targeted_master = eAcc_targeted | fAcc_master,
306 eAcc_wgs_vdb_only = eAcc_wgs | fAcc_vdb_only,
307 eAcc_wgs_intermed_vdb_only = eAcc_wgs_intermed | fAcc_vdb_only,
308 eAcc_tsa_vdb_only = eAcc_tsa | fAcc_vdb_only,
309 eAcc_targeted_vdb_only = eAcc_targeted | fAcc_vdb_only,
310 eAcc_wgs_vdb_master = eAcc_wgs | fAcc_master | fAcc_vdb_only,
311 eAcc_wgs_intermed_vdb_master
312 = eAcc_wgs_intermed | fAcc_master | fAcc_vdb_only,
313 eAcc_tsa_vdb_master = eAcc_tsa | fAcc_master | fAcc_vdb_only,
314 eAcc_targeted_vdb_master = eAcc_targeted | fAcc_master | fAcc_vdb_only,
317 #define NCBI_ACC(type, div, mol) eSeqId_##type | eAcc_##div | fAcc_##mol
349 eAcc_gb_patent =
NCBI_ACC(genbank, div_patent, seq),
418 eAcc_refseq_unreserved =
NCBI_ACC(refseq, unreserved, seq),
432 eAcc_refseq_wgsm_intermed =
NCBI_ACC(refseq, wgs_intermed_master,
434 eAcc_refseq_wgsv_intermed =
NCBI_ACC(refseq, wgs_intermed_vdb_only,
436 eAcc_refseq_wgsvm_intermed =
NCBI_ACC(refseq, wgs_intermed_vdb_master,
438 eAcc_refseq_prot_predicted = eAcc_refseq_prot | fAcc_predicted,
439 eAcc_refseq_mrna_predicted = eAcc_refseq_mrna | fAcc_predicted,
440 eAcc_refseq_ncrna_predicted = eAcc_refseq_ncrna | fAcc_predicted,
441 eAcc_refseq_chromosome_ncbo = eAcc_refseq_chromosome | fAcc_ncbo,
442 eAcc_refseq_contig_ncbo = eAcc_refseq_contig | fAcc_ncbo,
481 eAcc_ddbj_targetedvm_nuc =
NCBI_ACC(ddbj, targeted_vdb_master,
nuc),
543 eAcc_ddbj_tpa_targetedm_nuc =
NCBI_ACC(tpd, targeted_master,
nuc),
544 eAcc_ddbj_tpa_targetedv_nuc =
NCBI_ACC(tpd, targeted_vdb_only,
nuc),
545 eAcc_ddbj_tpa_targetedvm_nuc =
NCBI_ACC(tpd, targeted_vdb_master,
nuc),
555 eAcc_gpipe_unreserved =
NCBI_ACC(gpipe, unreserved, seq),
558 eAcc_named_annot_track =
NCBI_ACC(named_annot_track, other, seq)
563 {
return static_cast<E_Choice>(
info & eAcc_type_mask); }
567 static EAccessionInfo IdentifyAccession(
const CTempString& accession,
568 TParseFlags
flags = fParse_AnyRaw);
569 EAccessionInfo IdentifyAccession(TParseFlags
flags
570 = fParse_AnyRaw | fParse_AnyLocal)
const;
572 static void LoadAccessionGuide(
const string& filename);
576 bool Match(
const CSeq_id& sid2)
const;
588 int CompareOrdered(
const CSeq_id& sid2)
const;
591 return CompareOrdered(sid2) < 0;
600 const string AsFastaString(
void)
const;
615 fLabel_Version = 0x10,
618 fLabel_GeneralDbIsContent = 0x20,
619 fLabel_Trimmed = 0x40,
620 fLabel_UpperCase = 0x80,
623 fLabel_Default = fLabel_Version
665 eFormat_BestWithVersion
687 static TErrorFlags CheckLocalID(
const CTempString& s);
701 TParseFlags
flags = fParse_Default);
720 bool allow_partial_failure =
false);
737 fRequireAccessions = 1 << 0
741 int AdjustScore (
int base_score,
742 TAdjustScoreFlags
flags = TAdjustScoreFlags())
745 {
return AdjustScore(base_score, fRequireAccessions); }
746 int BaseTextScore (
void)
const;
747 int BaseBestRankScore (
void)
const;
749 int BaseFastaAAScore (
void)
const;
750 int BaseFastaNAScore (
void)
const;
751 int BaseBlastScore (
void)
const;
753 int TextScore (
void)
const {
return AdjustScore(BaseTextScore()); }
756 int FastaAAScore (
void)
const {
return AdjustScore(BaseFastaAAScore()); }
757 int FastaNAScore (
void)
const {
return AdjustScore(BaseFastaNAScore()); }
758 int BlastScore (
void)
const {
return AdjustScore(BaseBlastScore()); }
761 {
return StrictAdjustScore(BaseTextScore()); }
763 {
return StrictAdjustScore(BaseBestRankScore()); }
765 {
return StrictAdjustScore(BaseFastaAAScore()); }
767 {
return StrictAdjustScore(BaseFastaNAScore()); }
769 {
return StrictAdjustScore(BaseBlastScore()); }
773 {
return id ?
id->TextScore() :
kMax_Int; }
775 {
return id ?
id->BestRankScore() :
kMax_Int; }
777 {
return Score(
id); }
779 {
return id ?
id->FastaAAScore() :
kMax_Int; }
781 {
return id ?
id->FastaNAScore() :
kMax_Int; }
783 {
return id ?
id->BlastScore() :
kMax_Int; }
786 {
return id ?
id->StrictTextScore() :
kMax_Int; }
788 {
return id ?
id->StrictBestRankScore() :
kMax_Int; }
790 {
return id ?
id->StrictFastaAAScore() :
kMax_Int; }
792 {
return id ?
id->StrictFastaNAScore() :
kMax_Int; }
794 {
return id ?
id->StrictBlastScore() :
kMax_Int; }
811 static bool PreferAccessionOverGi(
void);
814 static bool AvoidGi(
void);
820 fAllowLocalId = (1 << 0),
821 fGpipeAddSecondary = (1 << 1)
837 string ComposeOSLT(list<string>* secondary_ids =
nullptr,
841 const static size_t kMaxLocalIDLength = 50;
842 const static size_t kMaxGeneralDBLength = 20;
843 const static size_t kMaxGeneralTagLength = 50;
844 const static size_t kMaxAccessionLength = 30;
852 eSNPScaleLimit_Chromosome
855 static const char* GetSNPScaleLimit_Name(ESNPScaleLimit
value);
856 static ESNPScaleLimit GetSNPScaleLimit_Value(
const string& name);
857 bool IsAllowedSNPScaleLimit(ESNPScaleLimit scale_limit)
const;
866 static ETypeVariant x_IdentifyTypeVariant(E_Choice
type,
870 E_Choice x_Init(list<CTempString>& fasta_pieces, E_Choice
type,
881 void x_WriteContentAsFasta(ostream&
out)
const;
897 fAllowUnderscores = 0x1
922 : m_Range(it.m_Range), m_Number(it.m_Number)
929 {
return m_Accession.empty() ? x_SetAccession() : m_Accession; }
931 {
return m_Accession.empty() ? &x_SetAccession() : &m_Accession; }
933 {
return *(*
this +
n); }
938 { m_Accession.erase(); ++m_Number;
return *
this; }
942 { m_Accession.erase(); --m_Number;
return *
this; }
950 { m_Accession.erase(); m_Number +=
n;
return *
this; }
952 { m_Accession.erase(); m_Number -=
n;
return *
this; }
970 const string& x_SetAccession(
void)
const;
984 {
return stop - start + 1; }
1010 virtual const char* GetErrCodeString(
void)
const override;
1027 template<
class container>
1031 ITERATE (
typename container, iter, ids) {
1040 template<
class container>
1044 return id ?
id->GetGi() :
ZERO_GI;
1049 template<
class container>
1052 ITERATE (
typename container, iter, ids) {
@ eBoth
Both preliminary and traceback stages.
*** Sequence identifiers ******************************** *
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Abstract base class for lightweight line-by-line reading.
string GetSeqIdString(const CSeq_id &id)
bool operator<(const CEquivRange &A, const CEquivRange &B)
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
EErrCode
Error types that an application can generate.
CVect2< NCBI_PROMOTE(int,U) > operator*(int v1, const CVect2< U > &v2)
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void WriteAsFasta(CNcbiOstream &out) const
const_iterator operator--(int)
TGi FindGi(const container &ids)
Return gi from id list if exists, return 0 otherwise.
int WorstRankScore(void) const
int StrictFastaAAScore(void) const
static int StrictBestRank(const CRef< CSeq_id > &id)
const_iterator(const SSeqIdRange &range)
CProxy DumpAsFasta(void) const
const_iterator & operator++(void)
CConstRef< CSeq_id > GetSeq_idByType(const container &ids, CSeq_id::E_Choice choice)
Search the container of CRef<CSeq_id> for the id of given type.
static int StrictScore(const CRef< CSeq_id > &id)
EAccessionInfo
For IdentifyAccession (below)
const_iterator & operator+=(int n)
CConstRef< CSeq_id > Get_ConstRef_Seq_id(TId &id)
Dummy convertor for container search functions.
int BestRankScore(void) const
const SSeqIdRange * m_Range
static E_Choice GetAccType(EAccessionInfo info)
static int StrictBlastRank(const CRef< CSeq_id > &id)
E_SIC
Compare return values.
const_iterator(const SSeqIdRange &range, int number)
EFastaAsTypeAndContent
Tag for method variants that would otherwise be ambiguous.
int BaseWorstRankScore(void) const
int TextScore(void) const
bool operator<=(const const_iterator &it) const
EComposeOSLTFlags
Flags specifying special treatment for certain types of Seq-ids in ComposeOSLT().
const_iterator end(void) const
int BlastScore(void) const
const string * operator->(void) const
int TFlags
binary OR of EFlags
const_iterator operator+(int n) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
int operator-(const const_iterator &it) const
static int WorstRank(const CRef< CSeq_id > &id)
const_iterator operator++(int)
static int StrictFastaNARank(const CRef< CSeq_id > &id)
const_iterator(const const_iterator &it)
const_iterator begin(void) const
int StrictBestRankScore(void) const
static int StrictFastaAARank(const CRef< CSeq_id > &id)
EMaxScore
Numerical quality ranking; lower is better.
NCBI_EXCEPTION_DEFAULT(CSeqIdException, CException)
int StrictTextScore(void) const
int StrictBlastScore(void) const
bool operator==(const const_iterator &it) const
const_iterator operator-(int n) const
EStringFormat
Get a string representation of the sequence IDs of a given bioseq.
CSeq_id::EAccessionInfo acc_info
ptrdiff_t difference_type
#define NCBI_ACC(type, div, mol)
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
const_iterator & operator-=(int n)
bool operator>=(const const_iterator &it) const
static int BestRank(const CRef< CSeq_id > &id)
bool operator<(const CSeq_id &sid2) const
random_access_iterator_tag iterator_category
string operator[](int n) const
int StrictAdjustScore(int base_score) const
static int BlastRank(const CRef< CSeq_id > &id)
ELabelType
return the label for a given string
const_iterator & operator--(void)
DECLARE_SAFE_FLAGS_TYPE(EAdjustScoreFlags, TAdjustScoreFlags)
string GetLabel(const CSeq_id &id)
static int FastaNARank(const CRef< CSeq_id > &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
int FastaAAScore(void) const
const_iterator & operator=(const const_iterator &it)
set< CSeq_id_Handle > TSeqIdHandles
int StrictFastaNAScore(void) const
bool operator!=(const const_iterator &it) const
CConstRef< CSeq_id > FindTextseq_id(const container &ids)
Return text seq-id from id list if exists, return 0 otherwise.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
ESNPScaleLimit
SNP annotation scale limits.
int FastaNAScore(void) const
static int FastaAARank(const CRef< CSeq_id > &id)
@ eUnknownType
Unrecognized Seq-id type.
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
@ eFormat_BestWithoutVersion
@ eFastaContent
Like eFasta, but without any tag.
@ eContent
Untagged human-readable accession or the like.
@ eBoth
Type and content, delimited by a vertical bar.
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
@ eType
FASTA-style type, or database in GeneralDbIsContent mode.
@ eSNPScaleLimit_Supercontig
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
NCBI_NS_STD::string::size_type SIZE_TYPE
#define NCBI_SEQLOC_EXPORT
static const char label[]
CSeq_id_Base & operator=(const CSeq_id_Base &)
E_Choice Which(void) const
Which variant is currently selected.
@ e_Gibbmt
Geninfo backbone moltype.
@ e_Giim
Geninfo import id.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Gibbsq
Geninfo backbone seqid.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_Named_annot_track
Internal named annotation tracking ID.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
list< CRef< CSeq_id > > TId
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
std::istream & in(std::istream &in_, double &x_)
string GetStringDescr(const CBioseq &bioseq)
static const string kMaxScore
void Dump(CSplitCacheApp *app, const C &obj, ESerialDataFormat format, const string &key, const string &suffix=kEmptyStr)
bool operator>(const typename tree< T, tree_node_allocator >::iterator_base &one, const typename tree< T, tree_node_allocator >::iterator_base &two)
static bool ambig(char c)