45 #define NCBI_USE_ERRCODE_X Objtools_Aln_Sparse
53 objects::CScope& scope)
115 gaps.reserve(gaps.size() + ins_vec.size());
116 for (
auto i = ins_vec.begin();
i != ins_vec.end(); ++
i) {
118 gap.
from =
i->GetFirstFrom();
120 gap.
len =
i->GetLength();
131 stable_sort(gaps.begin(), gaps.end());
135 gap_it->shift = shift;
136 shift += gap_it->len;
148 CPairwiseAln::const_iterator seg_it = pw.
begin();
149 TGapRanges::const_iterator gap = gaps.begin();
154 bool first_direct =
true;
155 bool second_direct =
true;
156 while (seg_it != pw.
end()) {
162 while (gap != gaps.end() &&
164 if (gap->row ==
row) {
167 gap->second_from, gap->len, gap->direct);
188 shift = gap->shift + gap->len;
196 while (gap != gaps.end()) {
197 if (gap->row ==
row) {
199 last_to, gap->len, gap->direct);
204 shift = gap->shift + gap->len;
332 const int k_unasigned_base_width = 0;
333 int base_width = k_unasigned_base_width;
335 if (base_width == k_unasigned_base_width) {
343 if (base_width != 1) {
365 return T::eBackwards;
376 bool try_reverse_dir)
const
387 bool try_reverse_dir)
const
402 string errstr =
"Invalid bioseq handle. Seq id \"" +
403 GetSeqId(
row).AsFastaString() +
"\" not in scope?";
457 if ( ref )
return ref->GetGcode();
469 size_t na_remainder = na.size() % 3;
470 size_t na_size = na.size() - na_remainder;
473 aa.resize(na_size / 3 + (na_remainder ? 1 : 0));
476 if ( na.empty() )
return;
480 for (
size_t na_i = 0; na_i < na_size; ) {
481 for (
size_t i = 0;
i < 3; ++
i, ++na_i) {
499 bool force_translation)
const
507 const TRange &rq_seq_range,
508 bool force_translation)
const
512 TRange seq_range = rq_seq_range;
525 if (seq_range.
GetFrom() % 3 > 0) {
529 force_translation =
false;
531 if (tr_to > tr_from) {
544 if ( force_translation ) {
555 bool force_translation)
const
570 if (pairwise_aln.
empty()) {
581 bool translate = force_translation || pairwise_aln.
GetSecondId()->IsProtein();
601 bool is_first_seg =
true;
602 size_t trim_from = 0;
608 if ( row_r.
Empty() ) {
610 is_first_seg =
false;
615 if (base_width == 1) {
621 vec_size - row_r.
GetFrom(), s);
633 if (tr_from % 3 > 0) {
634 if (tr_from == split_codon_pos) {
637 if ( is_first_seg ) trim_from = tr_from % 3;
639 tr_from -= tr_from % 3;
643 off += 3 - (tr_from % 3);
644 tr_from += 3 - (tr_from % 3);
648 split_codon_pos = tr_to;
655 if (tr_to == split_codon_pos) {
658 if ( is_first_seg ) trim_from = 3 - (tr_to % 3);
659 off -= 3 - (tr_to % 3);
660 tr_to += 3 - (tr_to % 3);
667 if (tr_from % 3 > 0) {
668 split_codon_pos = tr_from;
669 trim_to = 3 - (tr_from % 3);
670 tr_from += 3 - (tr_from % 3);
677 prot_r.
SetOpen(tr_from / 3, tr_to / 3);
684 vec_size - prot_r.
GetFrom(), s);
688 size_t len =
min(buf_size - off, s.size());
700 is_first_seg =
false;
702 if (translate && (
size_t)aln_range.
GetLength() >= trim_from + trim_to) {
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
CRange< position_type > GetSecondRange() const
class CAlignRangeCollectionList<TAlignRange> represent a sorted collection of TAlignRange.
const_iterator begin() const
ESearchDirection
adding empty ranges is considered valid, they are simply ignored
@ fMixedDir
contains at least one reversed range
@ fInvalid
collection was modified and not validated
@ fReversed
contains at least one direct range
@ fUnsorted
one or more policies violated
const TInsertions & GetInsertions() const
Each insertion shows where the 'first' sequence has a gap while the 'second' sequence has the inserti...
vector< TAlignRange > TInsertions
const_iterator end() const
CAlignRange Represents an element of pairwise alignment of two sequences.
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
TDim GetDim(void) const
How many rows.
void SetAnchorRow(TDim anchor_row)
Modify anchor row (never do this unless you are creating a new alignment and know what you're doing).
TPairwiseAlnVector & SetPairwiseAlns(void)
Modify pairwise alns.
void SetScore(int score)
Set the total score.
TDim GetAnchorRow(void) const
Which is the anchor row?
int GetScore(void) const
What is the total score?
void SetDim(TDim dim)
Modify the number of rows.
int GetGenCode(int def=1) const
static const CTrans_table & GetTransTable(int id)
A pairwise aln is a collection of ranges for a pair of rows.
int GetSecondBaseWidth(void) const
Base width of the second row.
const TAlnSeqIdIRef & GetFirstId(void) const
Get first sequence id.
const TAlnSeqIdIRef & GetSecondId(void) const
Get second sequence id.
TSignedSeqPos GetSeqAlnStart(TNumrow row) const
TRng GetAlnRange(void) const
Get whole alignment range.
const TAlnRngColl & GetAlignCollection(TNumrow row)
Get pairwise alignment for the row.
TSignedSeqPos GetSeqAlnStop(TNumrow row) const
void x_Build(const CAnchoredAln &src_align)
void SetGapChar(TResidue gap_char)
Gap character modifier.
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
TRange GetSeqRange(TNumrow row) const
Get sequence range in sequence coords.
CAnchoredAln::TDim TDim
Synonym of TNumrow.
CSparseAln(const CAnchoredAln &anchored_aln, objects::CScope &scope)
Constructor.
CRef< CAnchoredAln > m_Aln
TSeqPos GetSeqStop(TNumrow row) const
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const TSignedRange &range, IAlnSegmentIterator::EFlags flags) const
Create segment iterator.
string & GetAlnSeqString(TNumrow row, string &buffer, const TSignedRange &rq_aln_rng, bool force_translation=false) const
Fetch alignment sequence data.
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Map sequence position to alignment coordinates.
int x_GetGenCode(TNumrow row) const
TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
vector< objects::CBioseq_Handle > m_BioseqHandles
bool IsTranslated(void) const
Wheather the alignment is translated (heterogenous), e.g. nuc-prot.
vector< TRng > m_SecondRanges
bool IsPositiveStrand(TNumrow row) const
Check direction of the row.
TDim GetDim(void) const
Alignment dimension (number of sequence rows in the alignment)
const objects::CBioseq_Handle & GetBioseqHandle(TNumrow row) const
Get bioseq handle for the row.
virtual ~CSparseAln(void)
Destructor.
CRef< objects::CScope > m_Scope
TSeqPos GetSeqStart(TNumrow row) const
TSignedRange GetSeqAlnRange(TNumrow row) const
Get sequence range in alignment coords (strand ignored).
CRef< objects::CScope > GetScope(void) const
Scope accessor.
vector< CRef< CSeqVector > > m_SeqVectors
CSeqVector & x_GetSeqVector(TNumrow row) const
string & GetSeqString(TNumrow row, string &buffer, TSeqPos seq_from, TSeqPos seq_to, bool force_translation=false) const
Fetch sequence data for the given row and range.
bool IsNegativeStrand(TNumrow row) const
const objects::CSeq_id & GetSeqId(TNumrow row) const
Get seq-id for the row.
Implementation of IAlnSegmentIterator for CSparseAln.
char GetCodonResidue(int state) const
static int NextCodonState(int state, unsigned char ch)
CRange< TSignedSeqPos > TSignedRange
objects::CSeqVector::TResidue TResidue
ESearchDirection
Position search options.
@ eRight
Towards higher aln coord (always to the right)
@ eLeft
Towards lower aln coord (always to the left)
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
@ eForward
Towards higher seq coord (to the right if plus strand, left if minus)
Alignment segment iterator interface.
@ eSkipInserts
Iterate segments where at least some rows are aligned (including gap segments)
virtual const TSignedRange & GetRange(void) const =0
Get the selected row range.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
const COrg_ref * GetOrg_refOrNull(const CBioseq_Handle &handle)
Return the pointer to org-ref associated with a given sequence or null if there is no org-ref associa...
@ eStrand_Plus
Plus strand.
@ eStrand_Minus
Minus strand.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsProtein(void) const
void SetCoding(TCoding coding)
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
bool IsNucleotide(void) const
void Reset(void)
Reset reference object.
position_type GetLength(void) const
TThisType & CombineWith(const TThisType &r)
position_type GetToOpen(void) const
position_type GetFirstToOpen(void) const
TThisType & SetLength(position_type len)
bool IsFirstDirect() const
TThisType & SetSecondFrom(position_type second_from)
position_type GetSecondFrom(void) const
TThisType & SetOpen(position_type from, position_type toOpen)
void SetDirect(bool direct=true)
TThisType & SetFirstFrom(position_type from)
void SetFirstDirect(bool direct=true)
position_type GetFirstFrom(void) const
position_type GetSecondToOpen(void) const
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
TFrom GetFrom(void) const
Get the From member data.
@ e_not_set
No variant selected.
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
static pcre_uint8 * buffer
vector< SGapRange > TGapRanges
USING_SCOPE(ncbi::objects)
CSparseAln::TAlnRngColl::ESearchDirection GetCollectionSearchDirection(CSparseAln::ESearchDirection dir)
#define row(bind, expected)
TSignedSeqPos second_from
bool operator<(const SGapRange &rg) const