157 #define STRAND_TO_INDEX(is_set, strand) \
158 ((is_set) ? size_t((strand) + 1) : 0)
160 #define INDEX_TO_STRAND(idx) \
257 if ( !top_level_id ) {
260 if ( !syns->empty() ) {
261 top_level_id = syns->GetSeq_id_Handle(syns->begin()).GetSeqId();
303 if ( !syns->empty() ) {
304 top_id = syns->GetSeq_id_Handle(syns->begin()).GetSeqId();
505 TSeqPos src_from, src_len, dst_from, dst_len;
524 src_from, src_len, src_strand,
526 dst_from, dst_len, dst_strand);
539 dst_from = top_ref_start + shift;
542 src_from, src_len, src_strand,
544 dst_from, dst_len, dst_strand);
555 TSeqPos src_from, src_len, dst_from, dst_len;
560 list<CSeqMap_CI> refs;
561 refs.push_back(seg_it);
566 if (refs.empty() || refs.back().GetDepth() < seg_it.
GetDepth()) {
567 refs.push_back(seg_it);
572 if ( !refs.empty() ) {
579 src_from =
leaf.GetPosition();
580 src_len =
leaf.GetLength();
581 dst_from =
leaf.GetRefPosition();
586 *
leaf.GetRefSeqid().GetSeqId(),
587 dst_from, dst_len, dst_strand);
590 ITERATE(list<CSeqMap_CI>, it, refs) {
591 TSeqPos shift =
leaf.GetPosition() - it->GetPosition();
593 src_from = it->GetRefPosition() + shift;
594 src_len =
leaf.GetLength();
595 dst_from =
leaf.GetRefPosition();
598 *it->GetRefSeqid().GetSeqId(),
599 src_from, src_len, src_strand,
600 *
leaf.GetRefSeqid().GetSeqId(),
601 dst_from, dst_len, dst_strand);
603 while ( !refs.empty() && refs.back().GetDepth() >= seg_it.
GetDepth()) {
608 refs.push_back(seg_it);
618 TSeqPos seg_from, seg_len, ref_from, ref_len;
622 list<CSeqMap_CI> refs;
623 refs.push_back(seg_it);
628 if (refs.empty() || refs.back().GetDepth() < seg_it.
GetDepth()) {
629 refs.push_back(seg_it);
634 if ( !refs.empty() ) {
650 while ( !refs.empty() ) {
663 seg_from, seg_len, seg_strand,
665 ref_from, ref_len, ref_strand);
670 ref_from, ref_len, ref_strand,
672 seg_from, seg_len, seg_strand);
684 if ( refs.empty() ) {
700 ref_from, ref_len, ref_strand);
705 ref_from, ref_len, ref_strand,
707 seg_from, seg_len, seg_strand);
725 refs.push_back(seg_it);
750 syn_id->
Assign(*syn->GetSeqId());
751 bioseq->
SetId().push_back(syn_id);
768 if ( gc_assembly.
IsUnit() ) {
805 return id.IsLocal() &&
id.GetLocal().IsStr() &&
806 id.GetLocal().GetStr().find(
"_random") != string::npos;
825 switch ( gc_id.
Which() ) {
871 switch (
id.Which() ) {
874 return id.GetGenbank().IsSetGi() ?
875 &
id.GetGenbank().GetGi() : &
id.GetGenbank().GetPublic();
878 return &
id.GetGenbank().GetPublic();
883 return id.GetRefseq().IsSetGi() ?
884 &
id.GetRefseq().GetGi() : &
id.GetRefseq().GetPublic();
887 return &
id.GetRefseq().GetPublic();
892 id.GetExternal().GetExternal() ==
"UCSC") {
893 return &
id.GetExternal().GetId();
898 return &
id.GetPrivate();
925 switch (
id.Which() ) {
927 if (
id.GetGenbank().IsSetGi() && dst_id != &
id.GetGenbank().GetGi()) {
930 if (dst_id != &
id.GetGenbank().GetPublic()) {
933 if (
id.GetGenbank().IsSetGpipe() ) {
938 if (
id.GetRefseq().IsSetGi() && dst_id != &
id.GetRefseq().GetGi()) {
941 if (dst_id != &
id.GetRefseq().GetPublic()) {
944 if (
id.GetRefseq().IsSetGpipe() ) {
950 if (
id.GetPrivate().IsLocal())
continue;
951 if (dst_id != &
id.GetPrivate()) {
956 if (dst_id != &
id.GetExternal().
GetId()) {
962 "Unsupported alias type in GC-Sequence synonyms");
974 false, 0, hlen, hlen);
991 if ( (*it)->IsLiteral() ) {
992 chr_pos += (*it)->GetLiteral().GetLength();
994 if ( !(*it)->IsLoc() ) {
998 for (; loc_it; ++loc_it) {
999 if ( loc_it.
IsEmpty() )
continue;
1004 switch ( to_alias ) {
1008 seg_pos, seg_len, seg_str,
1009 *chr_id, chr_pos, chr_len,
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Seq-loc and seq-align mapper exceptions.
the sequence(s) representing this molecule in the case of 2L and 2R - the molecule is represented by ...
bool CanGetLength() const
Is the length statistic available?
TSeqPos GetLength() const
return the length of this sequence.
CScope * GetScopeOrNull(void) const
CScope & GetScope(void) const
Storage for multiple mapping ranges.
CScope_Mapper_Sequence_Info(CScope *scope)
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Get sequence length or kInvalidSeqPos.
virtual void CollectSynonyms(const CSeq_id_Handle &id, TSynonyms &synonyms)
Collect all synonyms for the id including the id itself.
virtual TSeqType GetSequenceType(const CSeq_id_Handle &idh)
Get information about sequence type (nuc or prot).
Class used to map seq-alignments.
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
CSeq_loc_Mapper_Base –.
CSeq_loc_Mapper_Options –.
static CSeq_id_Handle GetSeq_id_Handle(const const_iterator &iter)
iterator_bool insert(const value_type &val)
static unsigned char depth[2 *(256+1+29)+1]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
bool IsSetStrand(void) const
Get strand.
bool IsEmpty(void) const
True if the current location is empty.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
void x_InitializeAlign(const CSeq_align &map_align, const CSeq_id &to_id, const CSeq_id *from_id=nullptr)
TDstStrandMap m_DstRanges
CSeq_loc_Mapper_Options & SetMapperSequenceInfo(IMapper_Sequence_Info *seq_info)
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
IMapper_Sequence_Info * GetMapperSequenceInfo(void) const
Sequence type, length etc.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
EGCAssemblyAlias
Destination of seq-id mapping through a GC-Assembly.
EScopeFlag
Using CScope for virtual bioseqs created from GC-Assemblies.
void x_InitializeFeat(const CSeq_feat &map_feat, EFeatMapDirection dir)
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
CSeq_loc_Mapper_Options m_MapOptions
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void x_NextMappingRange(const CSeq_id &src_id, TSeqPos &src_start, TSeqPos &src_len, ENa_strand src_strand, const CSeq_id &dst_id, TSeqPos &dst_start, TSeqPos &dst_len, ENa_strand dst_strand, const CInt_fuzz *fuzz_from=0, const CInt_fuzz *fuzz_to=0, int frame=0, TSeqPos src_bioseq_len=kInvalidSeqPos)
virtual CSeq_align_Mapper_Base * InitAlignMapper(const CSeq_align &src_align)
void x_InitGCAssembly(const CGC_Assembly &gc_assembly, EGCAssemblyAlias to_alias)
CRef< CSeq_entry > GetTSE(void) const
Create seq-entry with all parsed sequences, annotations etc.
CSeq_inst::TMol GetSequenceType(const CSeq_id &id, TGetFlags flags=0)
Get molecular type of sequence (protein/dna/rna) Return CSeq_inst::eMol_not_set if sequence is not fo...
const TSeqIds & GetTopLevelSequences(void) const
Get seq-ids for all top-level sequences in the assembly.
void x_InitializeSeqMapUp(CSeqMap_CI seg_it, const CSeq_id *top_id)
CSeq_loc_Mapper(CMappingRanges *mapping_ranges, CScope *scope=0, CSeq_loc_Mapper_Options options=CSeq_loc_Mapper_Options())
Mapping through a pre-filled CMappipngRanges.
void x_InitializeLocs(const CSeq_loc &source, const CSeq_loc &target, int src_frame=0, int dst_frame=0)
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
bool x_IsUCSCRandomChr(const CGC_Sequence &gc_seq, CConstRef< CSeq_id > &chr_id, TSynonyms &synonyms) const
void x_PreserveDestinationLocs(void)
CBioseq_Handle x_AddVirtualBioseq(const TSynonyms &synonyms, const CGC_Sequence &gc_seq)
void AddScope(CScope &scope, TPriority pri=kPriority_Default)
Add the scope's datasources as a single group with the given priority All data sources (data loaders ...
void x_InitializeSeqMap(const CSeqMap &seq_map, const CSeq_id *top_id, ESeqMapDirection direction)
void x_InitializeSeqMapDown(CSeqMap_CI seg_it, const CSeq_id *top_id)
void x_AddConversion(const CSeq_id &src_id, TSeqPos src_start, ENa_strand src_strand, const CSeq_id &dst_id, TSeqPos dst_start, ENa_strand dst_strand, TSeqPos length, bool ext_right, int frame, TSeqPos src_bioseq_len, TSeqPos dst_length)
void x_InitializeSeqMapSingleLevel(CSeqMap_CI seg_it, const CSeq_id *top_id, ESeqMapDirection direction)
bool GetMapSingleLevel(void) const
Mapping depth when using a seq-map, a bioseq or a GC-assembly.
void x_InitGCSequence(const CGC_Sequence &gc_seq, EGCAssemblyAlias to_alias)
@ eGCA_Genbank
Map to GenBank alias, prefer GI.
@ eGCA_UCSC
Map to UCSC alias.
@ eGCA_Refseq
Map to RefSeq alias, prefer GI.
@ eGCA_Other
Map to 'private' seq-id.
@ eGCA_GenbankAcc
Map to GenBank alias, prefer acc.ver.
@ eGCA_RefseqAcc
Map to RefSeq alias, prefer acc.ver.
@ eCopyScope
Create a new scope for virtual bioseqs.
@ eSeqMap_Down
map from a segmented bioseq to segments
@ eSeqMap_Up
map from segments to the top level bioseq
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
TSeqPos GetBioseqLength(void) const
bool CanGetInst_Length(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TInst_Length GetInst_Length(void) const
const CSeqMap & GetSeqMap(void) const
Get sequence map.
CConstRef< CSynonymsSet > GetSynonyms(void) const
Get the bioseq's synonyms.
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
TSeqPos GetRefPosition(void) const
SSeqMapSelector & SetFlags(TFlags flags)
Select segment type(s)
bool GetRefMinusStrand(void) const
CSeq_id_Handle GetRefSeqid(void) const
The following function makes sense only when the segment is a reference to another seq.
TSeqPos GetPosition(void) const
return position of current segment in sequence
TSeqPos GetLength(void) const
return length of current segment
size_t GetDepth(void) const
return the depth of current segment
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
void Reset(void)
Reset reference object.
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
position_type GetLength(void) const
static position_type GetWholeLength(void)
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
TFrom GetFrom(void) const
Get the From member data.
const TStr & GetStr(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
const TGenbank & GetGenbank(void) const
Get the variant data.
list< CRef< CGC_Sequence > > TSeqs
const TId & GetId(void) const
Get the Id member data.
const TUnit & GetUnit(void) const
Get the variant data.
const TStructure & GetStructure(void) const
Get the Structure member data.
bool IsSingle(void) const
Check if variant Single is selected.
const TGi & GetGi(void) const
Get the Gi member data.
const TRefseq & GetRefseq(void) const
Get the variant data.
bool IsSetMols(void) const
collections of molecules for this assembly Check if a value has been assigned to Mols data member.
const TPrivate & GetPrivate(void) const
Get the variant data.
list< CRef< CGC_TypedSeqId > > TSeq_id_synonyms
bool IsAssembly_set(void) const
Check if variant Assembly_set is selected.
bool IsSetMore_assemblies(void) const
Check if a value has been assigned to More_assemblies data member.
bool IsSetOther_sequences(void) const
On primary assembly-unit: here will be the unplaced sequences On alt-loci: list of sequences aligned/...
const TOther_sequences & GetOther_sequences(void) const
Get the Other_sequences member data.
const TPrimary_assembly & GetPrimary_assembly(void) const
Get the Primary_assembly member data.
bool IsSetStructure(void) const
locations of ordered scaffolds/components Check if a value has been assigned to Structure data member...
E_Choice Which(void) const
Which variant is currently selected.
const TExternal & GetExternal(void) const
Get the variant data.
const TSeq_id_synonyms & GetSeq_id_synonyms(void) const
Get the Seq_id_synonyms member data.
bool IsSetGi(void) const
optional since not all sequences have GIs Similarity: relationship between this synonym to main seqid...
list< CRef< CGC_TaggedSequences > > TSequences
const TAssembly_set & GetAssembly_set(void) const
Get the variant data.
const TMols & GetMols(void) const
Get the Mols member data.
list< CRef< CGC_Replicon > > TMols
bool IsSetSequences(void) const
placed: populated both on chromosome and scaffold levels unlocalized: populated on chromosome level C...
const TSequences & GetSequences(void) const
Get the Sequences member data.
bool IsUnit(void) const
Check if variant Unit is selected.
list< CRef< CGC_Assembly > > TMore_assemblies
list< CRef< CGC_Sequence > > TSet
const TSingle & GetSingle(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
const TMore_assemblies & GetMore_assemblies(void) const
Get the More_assemblies member data.
bool IsSetSeq_id_synonyms(void) const
Other known identifiers: Local / gpipe-satellite / genbank / refseq Check if a value has been assigne...
const TSeq_id & GetSeq_id(void) const
Get the Seq_id member data.
ENa_strand
strand of nucleic acid
TLocal & SetLocal(void)
Select the variant.
const TLocal & GetLocal(void) const
Get the variant data.
TId & SetId(void)
Assign a value to Id data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
const Tdata & Get(void) const
Get the member data.
list< CRef< CDelta_seq > > Tdata
@ eRepr_virtual
no seq data
@ eMol_na
just a nucleic acid
const CharType(& source)[N]
CSeq_loc_Mapper_Options & SetOptionsScope(CSeq_loc_Mapper_Options &options, CScope *scope)
ENa_strand s_IndexToStrand(size_t idx)
bool s_IsLocalRandomChrId(const CSeq_id &id)
const CSeq_id * s_GetSeqIdAlias(const CGC_TypedSeqId &id, CSeq_loc_Mapper::EGCAssemblyAlias alias)
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
Selector used in CSeqMap methods returning iterators.