33 #ifndef VARIATION_UTIL2_HPP_
34 #define VARIATION_UTIL2_HPP_
339 const string& codon_from,
340 const string& prot_to,
341 vector<string>& codons_to);
407 return !this->cdregion_feat ?
false
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
namespace ncbi::objects::
Cache seq-data in the CDS regions and the cds features by location.
void Get(const CSeq_loc &loc, TCdregions &cdregions)
map< CSeq_id_Handle, SSeqData > TSeqDataMap
vector< SCdregion > TCdregions
TSeqDataMap m_seq_data_map
map< CSeq_id_Handle, TRangeMap > TIdRangeMap
CCdregionIndex(CScope &scope, TOptions options)
CRangeMap< TCdregions, TSeqPos > TRangeMap
void x_Index(const CSeq_id_Handle &idh)
CRef< CSeq_literal > GetCachedLiteralAtLoc(const CSeq_loc &loc)
void x_CacheSeqData(const CSeq_loc &loc, const CSeq_id_Handle &idh)
Given a seq-loc, compute CVariantProperties::TGene_location from annotation.
static TLocsPair s_GetIntronsAndSpliceSiteLocs(const CSeq_loc &rna_loc)
pair< CRef< CSeq_loc >, CRef< CSeq_loc > > TLocsPair
static int s_GetGeneIdForProduct(CBioseq_Handle bsh)
void GetLocationProperties(const CSeq_loc &loc, TGeneIDAndPropVector &v)
CRangeMap< TGeneIDAndPropVector, TSeqPos > TRangeMap
pair< int, CVariantProperties::TGene_location > TGeneIDAndProp
CVariantPropertiesIndex(CScope &scope)
static TLocsPair s_GetNeighborhoodLocs(const CSeq_loc &gene_loc, TSeqPos max_pos)
static TLocsPair s_GetUTRLocs(const CSeq_loc &cds_loc, const CSeq_loc &parent_loc)
static TLocsPair s_GetStartAndStopCodonsLocs(const CSeq_loc &cds_loc)
static int s_GetGeneID(const CMappedFeat &mf, feature::CFeatTree &ft)
void x_Add(const CSeq_loc &loc, int gene_id, CVariantProperties::TGene_location prop)
map< CSeq_id_Handle, TRangeMap > TIdRangeMap
void x_Index(const CSeq_id_Handle &idh)
vector< TGeneIDAndProp > TGeneIDAndPropVector
CRef< CVariantPlacement > RemapToAnnotatedTarget(const CVariation &v, const CSeq_id &target)
Remap variation from product coordinates onto a nucleotide sequence on which this product is annotate...
static void s_AddInstOffsetsFromPlacementOffsets(CVariation_inst &vi, const CVariantPlacement &p)
static const CVariation::TPlacements * s_GetPlacements(const CVariation &v)
@ fAA2NA_truncate_common_prefix_and_suffix
void FindLocationProperties(const CSeq_align &transcript_aln, const CSeq_loc &query_loc, TSOTerms &terms)
Find location properties based on alignment.
CRef< CSeq_literal > GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > x_AsVariation(const CVariation_ref &vr)
SFlankLocs CreateFlankLocs(const CSeq_loc &loc, TSeqPos len)
vector< ESOTerm > TSOTerms
CRef< CVariation > x_CreateUnknownVariation(const CSeq_id &id, CVariantPlacement::TMol mol)
bool CheckAmbiguitiesInLiterals(CVariation &v)
if variation.data contains a seq-literal with non-ACGT residues, attach VariationException to the fir...
static size_t s_CountMatches(const string &a, const string &b)
void x_AdjustDelinsToInterval(CVariation &v, const CSeq_loc &loc)
ETestStatus CheckExonBoundary(const CVariantPlacement &p, const CSeq_align &aln)
static void s_FactorOutPlacements(CVariation &v)
If at any level in variation-set all variations have all same placements, move them to the parent lev...
static CConstRef< CVariation > s_FindConsequenceForPlacement(const CVariation &v, const CVariantPlacement &p)
Find attached consequence variation in v that corresponds to p (has same seq-id).
CRef< CVariation > TranslateNAtoAA(const CVariation_inst &nuc_inst, const CVariantPlacement &p, const CSeq_feat &cds_feat)
Evaluate protein effect of a single-inst @ single-placement.
static string AsString(ESOTerm term)
static CRef< CSeq_literal > s_SpliceLiterals(const CSeq_literal &payload, const CSeq_literal &ref, TSeqPos pos)
insert seq-literal payload into ref before pos (pos=0 -> prepend; pos=ref.len -> append)
void x_SetVariantPropertiesForIntronic(CVariantPlacement &p, int offset, const CSeq_loc &loc, CBioseq_Handle &bsh)
bool AttachSeq(CVariantPlacement &p, TSeqPos max_len=kMaxAttachSeqLen)
If have offsets (intronic) or too long, return false; else set seq field on the placement and return ...
CRef< CVariation > AsVariation(const CSeq_feat &variation_ref)
@ fOpt_cache_exon_sequence
Use when there will be many calls to calculate protein consequnece per sequence.
void x_ChangeToDelins(CVariation &v)
void x_SetVariantProperties(CVariantProperties &p, const CVariation_inst &vi, const CSeq_loc &loc)
CVariationUtil(CScope &scope, TOptions options=fOpt_default)
bool CheckPlacement(CVariantPlacement &p)
if placement is invalid SeqLocCheck fails, or offsets out of order, attach VariationException and ret...
static void s_AddIntronicOffsets(CVariantPlacement &p, const CSpliced_seg &ss, CScope *scope)
CVariantPlacement::TMol GetMolType(const CSeq_id &id)
void x_InferNAfromAA(CVariation &v, TAA2NAFlags flags)
static string s_CollapseAmbiguities(const vector< string > &seqs)
static void s_FindLocationProperties(CConstRef< CSeq_loc > rna_loc, CConstRef< CSeq_loc > cds_loc, const CSeq_loc &query_loc, TSOTerms &terms)
void ChangeToDelins(CVariation &v)
static void s_ResolveIntronicOffsets(CVariantPlacement &p)
static void s_UntranslateProt(const string &prot_str, vector< string > &codons)
void AsSOTerms(const CVariantProperties &p, TSOTerms &terms)
static void s_AttachGeneIDdbxref(CVariantPlacement &p, int gene_id)
static const CConstRef< CSeq_literal > s_FindFirstLiteral(const CVariation &v)
CRef< CSeq_literal > x_GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > InferNAfromAA(const CVariation &prot_variation, TAA2NAFlags flags=fAA2NA_default)
void FlipStrand(CVariation &v) const
Other utility methods:
void SetPlacementProperties(CVariantPlacement &placement)
Methods to compute properties.
CVariantPropertiesIndex m_variant_properties_index
CRef< CVariation_ref > x_AsVariation_ref(const CVariation &v, const CVariantPlacement &p)
CRef< CVariantPlacement > Remap(const CVariantPlacement &p, const CSeq_align &aln, bool check_placements=true)
Methods to remap a VariantPlacement.
static bool s_IsInstStrandFlippable(const CVariation &v, const CVariation_inst &inst)
CCdregionIndex m_cdregion_index
static TSeqPos s_GetLength(const CVariantPlacement &p, CScope *scope)
ESOTerm
Supported SO-terms.
@ eSO_splice_acceptor_variant
@ eSO_nc_transcript_variant
@ eSO_initiator_codon_variant
@ eSO_coding_sequence_variant
@ eSO_2KB_upstream_variant
@ eSO_splice_donor_variant
@ eSO_3_prime_UTR_variant
@ eSO_5_prime_UTR_variant
@ eSO_500B_downstream_variant
@ eSO_terminator_codon_variant
TSeqPos GetEffectiveTranscriptLength(const CBioseq_Handle &bsh)
Length up to last position of the last exon (i.e.
void AttachProteinConsequences(CVariation &nuc_variation, const CSeq_id *=NULL, bool ignore_genomic=false)
Find the CDSes for the first placement; Compute prot consequence using TranslateNAtoAA for each and a...
void x_TranslateNAtoAA(CVariation &prot_variation)
CConstRef< CSeq_literal > x_FindOrCreateLiteral(const CVariation &v)
static void s_ConvertInstOffsetsToPlacementOffsets(CVariation &v, CVariantPlacement &p)
static const CConstRef< CSeq_literal > s_FindAssertedLiteral(const CVariation &v)
void AsVariation_feats(const CVariation &v, CSeq_annot::TData::TFtable &feats)
void SetVariantProperties(CVariation &v)
static CRef< CSeq_literal > s_CatLiterals(const CSeq_literal &a, const CSeq_literal &b)
join two seq-literals
int TAA2NAFlags
Methods to convert between nucleotide and protein.
void s_CalcPrecursorVariationCodon(const string &codon_from, const string &prot_to, vector< string > &codons_to)
CRef< CVariantPlacement > x_Remap(const CVariantPlacement &p, CSeq_loc_Mapper &mapper)
Include a standard set of the NCBI C++ Toolkit most basic headers.
const TResidue codons[4][4]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
list< CRef< CSeq_feat > > TFtable
list< CRef< CVariantPlacement > > TPlacements
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
bool operator<(const SCdregion &other) const
CConstRef< CSeq_feat > cdregion_feat
CRef< CSeq_loc_Mapper > mapper
Calculate upstream (first) and downstream(second) flanks for loc.
CRef< CSeq_loc > upstream
CRef< CSeq_loc > downstream