CPP_DOC/doxyhtml/alnvec_8cpp_source.html

 /*  $Id: alnvec.cpp 100369 2023-07-25 17:29:42Z grichenk $

 * ===========================================================================

 *

 *                            PUBLIC DOMAIN NOTICE

 *               National Center for Biotechnology Information

 *

 *  This software/database is a "United States Government Work" under the

 *  terms of the United States Copyright Act.  It was written as part of

 *  the author's official duties as a United States Government employee and

 *  thus cannot be copyrighted.  This software/database is freely available

 *  to the public for use. The National Library of Medicine and the U.S.

 *  Government have not placed any restriction on its use or reproduction.

 *

 *  Although all reasonable efforts have been taken to ensure the accuracy

 *  and reliability of the software and data, the NLM and the U.S.

 *  Government do not and cannot warrant the performance or results that

 *  may be obtained by using this software or data. The NLM and the U.S.

 *  Government disclaim all warranties, express or implied, including

 *  warranties of performance, merchantability or fitness for any particular

 *  purpose.

 *

 *  Please cite the author in any work or product based on this material.

 *

 * ===========================================================================

 *

 * Author:  Kamen Todorov, NCBI

 *

 * File Description:

 *   Access to the actual aligned residues

 *

 * ===========================================================================

 */


 #include <ncbi_pch.hpp>

 #include <objtools/alnmgr/alnvec.hpp>


 // Objects includes

 #include <objects/seq/Bioseq.hpp>

 #include <objects/seq/IUPACna.hpp>

 #include <objects/seq/Seq_descr.hpp>

 #include <objects/seq/Seqdesc.hpp>

 #include <objects/seq/Seq_inst.hpp>

 #include <objects/seqset/Seq_entry.hpp>

 #include <objects/seqloc/Seq_id.hpp>

 #include <objects/seqloc/Seq_interval.hpp>

 #include <objects/seqloc/Seq_loc.hpp>

 #include <objects/general/Object_id.hpp>

 #include <objects/seqfeat/Genetic_code_table.hpp>


 // Object Manager includes

 #include <objmgr/scope.hpp>

 #include <objmgr/seq_vector.hpp>


 #include <util/tables/raw_scoremat.h>


 BEGIN_NCBI_SCOPE

 BEGIN_objects_SCOPE // namespace ncbi::objects::


 CAlnVec::CAlnVec(const CDense_seg& ds, CScope& scope)

     : CAlnMap(ds),

       m_Scope(&scope),

       m_set_GapChar(false),

       m_set_EndChar(false),

       m_NaCoding(CSeq_data::e_not_set),

       m_AaCoding(CSeq_data::e_not_set)

 {

 }


 CAlnVec::CAlnVec(const CDense_seg& ds, TNumrow anchor, CScope& scope)

     : CAlnMap(ds, anchor),

       m_Scope(&scope),

       m_set_GapChar(false),

       m_set_EndChar(false),

       m_NaCoding(CSeq_data::e_not_set),

       m_AaCoding(CSeq_data::e_not_set)

 {

 }


 CAlnVec::~CAlnVec(void)

 {

 }


 const CBioseq_Handle& CAlnVec::GetBioseqHandle(TNumrow row) const

 {

     TBioseqHandleCache::iterator i = m_BioseqHandlesCache.find(row);


     if (i != m_BioseqHandlesCache.end()) {

         return i->second;

     } else {

         CBioseq_Handle bioseq_handle =

             GetScope().GetBioseqHandle(GetSeqId(row));

         if (bioseq_handle) {

             return m_BioseqHandlesCache[row] = bioseq_handle;

         } else {

             string errstr = string("CAlnVec::GetBioseqHandle(): ")

                 + "Seq-id cannot be resolved: "

                 + GetSeqId(row).AsFastaString();


             NCBI_THROW(CAlnException, eInvalidSeqId, errstr);

         }

     }

 }


 CSeqVector& CAlnVec::x_GetSeqVector(TNumrow row) const

 {

     TSeqVectorCache::iterator iter = m_SeqVectorCache.find(row);

     CRef<CSeqVector> seq_vec;

     if (iter != m_SeqVectorCache.end()) {

         seq_vec = iter->second;

     }

     else {

         CBioseq_Handle h = GetBioseqHandle(row);

         CSeqVector vec = h.GetSeqVector

             (CBioseq_Handle::eCoding_Iupac,

              IsPositiveStrand(row) ?

              CBioseq_Handle::eStrand_Plus :

              CBioseq_Handle::eStrand_Minus);

         seq_vec.Reset(new CSeqVector(vec));

         m_SeqVectorCache[row] = seq_vec;

     }

     if ( seq_vec->IsNucleotide() ) {

         if (m_NaCoding != CSeq_data::e_not_set) {

             seq_vec->SetCoding(m_NaCoding);

         }

         else {

             seq_vec->SetIupacCoding();

         }

     }

     else if ( seq_vec->IsProtein() ) {

         if (m_AaCoding != CSeq_data::e_not_set) {

             seq_vec->SetCoding(m_AaCoding);

         }

         else {

             seq_vec->SetIupacCoding();

         }

     }

     return *seq_vec;

 }


 string& CAlnVec::GetAlnSeqString(string& buffer,

                                  TNumrow row,

                                  const TSignedRange& aln_rng) const

 {

     string buff;

     buffer.erase();


     CSeqVector& seq_vec      = x_GetSeqVector(row);

     TSeqPos     seq_vec_size = seq_vec.size();


     // get the chunks which are aligned to seq on anchor

     CRef<CAlnMap::CAlnChunkVec> chunk_vec =

         GetAlnChunks(row, aln_rng, fSkipInserts | fSkipUnalignedGaps);


     // for each chunk

     for (int i=0; i<chunk_vec->size(); i++) {

         CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];


         if (chunk->GetType() & fSeq) {

             // add the sequence string

             if (IsPositiveStrand(row)) {

                 seq_vec.GetSeqData(chunk->GetRange().GetFrom(),

                                    chunk->GetRange().GetTo() + 1,

                                    buff);

             } else {

                 seq_vec.GetSeqData(seq_vec_size - chunk->GetRange().GetTo() - 1,

                                    seq_vec_size - chunk->GetRange().GetFrom(),

                                    buff);

             }

             if (GetWidth(row) == 3) {

                 TranslateNAToAA(buff, buff, GetGenCode(row));

             }

            buffer += buff;

         } else {

             // add appropriate number of gap/end chars

             const int n = chunk->GetAlnRange().GetLength();

             char* ch_buff = new char[n+1];

             char fill_ch;

             if (chunk->GetType() & fNoSeqOnLeft  ||

                 chunk->GetType() & fNoSeqOnRight) {

                 fill_ch = GetEndChar();

             } else {

                 fill_ch = GetGapChar(row);

             }

             memset(ch_buff, fill_ch, n);

             ch_buff[n] = 0;

             buffer += ch_buff;

             delete[] ch_buff;

         }

     }

     return buffer;

 }


 string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,

                                       string&       buffer,

                                       TSeqPosList * insert_aln_starts,

                                       TSeqPosList * insert_starts,

                                       TSeqPosList * insert_lens,

                                       unsigned int  scrn_width,

                                       TSeqPosList * scrn_lefts,

                                       TSeqPosList * scrn_rights) const

 {

     TSeqPos       aln_pos = 0,

         len = 0,

         curr_pos = 0,

         anchor_pos = 0,

         scrn_pos = 0,

         prev_len = 0,

         ttl_len = 0;

     TSignedSeqPos start = -1,

         stop = -1,

         scrn_lft_seq_pos = -1,

         scrn_rgt_seq_pos = -1,

         prev_aln_pos = -1,

         prev_start = -1;

     TNumseg       seg;

     int           pos, nscrns, delta;


     TSeqPos aln_len = GetAlnStop() + 1;


     bool anchored = IsSetAnchor();

     bool plus     = IsPositiveStrand(row);

     int  width    = GetWidth(row);


     scrn_width *= width;


     const bool record_inserts = insert_starts && insert_lens;

     const bool record_coords  = scrn_width && scrn_lefts && scrn_rights;


     // allocate space for the row

     buffer.clear();

     buffer.reserve(aln_len);

     string buff;


     const TNumseg& left_seg = x_GetSeqLeftSeg(row);

     const TNumseg& right_seg = x_GetSeqRightSeg(row);


     // loop through all segments

     for (seg = 0, pos = row, aln_pos = 0, anchor_pos = m_Anchor;

          seg < m_NumSegs;

          ++seg, pos += m_NumRows, anchor_pos += m_NumRows) {


         const TSeqPos& seg_len = m_Lens[seg];

         start = m_Starts[pos];

         len = seg_len * width;


         if (anchored  &&  m_Starts[anchor_pos] < 0) {

             if (start >= 0) {

                 // record the insert if requested

                 if (record_inserts) {

                     if (prev_aln_pos == (TSignedSeqPos)(aln_pos / width)  &&

                         start == (TSignedSeqPos)(plus ? prev_start + prev_len :

                                   prev_start - len)) {

                         // consolidate the adjacent inserts

                         ttl_len += len;

                         insert_lens->pop_back();

                         insert_lens->push_back(ttl_len);

                         if (!plus) {

                             insert_starts->pop_back();

                             insert_starts->push_back(start);

                         }

                     } else {

                         prev_aln_pos = aln_pos / width;

                         ttl_len = len;

                         insert_starts->push_back(start);

                         insert_aln_starts->push_back(prev_aln_pos);

                         insert_lens->push_back(len);

                     }

                     prev_start = start;

                     prev_len = len;

         }

             }

         } else {

             if (start >= 0) {

                 stop = start + len - 1;


                 // add regular sequence to buffer

                 GetSeqString(buff, row, start, stop);

                 TSeqPos buf_len = min<TSeqPos>(TSeqPos(buff.size()), seg_len);

                 buffer += buff;

                 if (buf_len < seg_len) {

                     // Not enough chars in the sequence, add gap

                     buf_len = seg_len - buf_len;

                     char fill_ch;


                     if (seg < left_seg  ||  seg > right_seg) {

                         fill_ch = GetEndChar();

                     } else {

                         fill_ch = GetGapChar(row);

                     }


                     for (size_t i = 0; i < buf_len; ++i) {

                         buffer += fill_ch;

                     }

                 }


                 // take care of coords if necessary

                 if (record_coords) {

                     if (scrn_lft_seq_pos < 0) {

                         scrn_lft_seq_pos = plus ? start : stop;

                         if (scrn_rgt_seq_pos < 0) {

                             scrn_rgt_seq_pos = scrn_lft_seq_pos;

                         }

                     }

                     // previous scrns

                     nscrns = (aln_pos - scrn_pos) / scrn_width;

                     for (int i = 0; i < nscrns; i++) {

                         scrn_lefts->push_back(scrn_lft_seq_pos);

                         scrn_rights->push_back(scrn_rgt_seq_pos);

                         if (i == 0) {

                             scrn_lft_seq_pos = plus ? start : stop;

                         }

                         scrn_pos += scrn_width;

                     }

                     if (nscrns > 0) {

                         scrn_lft_seq_pos = plus ? start : stop;

                     }

                     // current scrns

                     nscrns = (aln_pos + len - scrn_pos) / scrn_width;

                     curr_pos = aln_pos;

                     for (int i = 0; i < nscrns; i++) {

                         delta = (plus ?

                                  scrn_width - (curr_pos - scrn_pos) :

                                  curr_pos - scrn_pos - scrn_width);


                         scrn_lefts->push_back(scrn_lft_seq_pos);

                         if (plus ?

                             scrn_lft_seq_pos < start :

                             scrn_lft_seq_pos > stop) {

                             scrn_lft_seq_pos = (plus ? start : stop) +

                                 delta;

                             scrn_rgt_seq_pos = scrn_lft_seq_pos +

                                 (plus ? -1 : 1);

                         } else {

                             scrn_rgt_seq_pos = scrn_lft_seq_pos + (plus ? -1 : 1)

                                 + delta;

                             scrn_lft_seq_pos += delta;

                         }

                         if (seg == left_seg  &&

                             scrn_lft_seq_pos == scrn_rgt_seq_pos) {

                             if (plus) {

                                 scrn_rgt_seq_pos--;

                             } else {

                                 scrn_rgt_seq_pos++;

                             }

                         }

                         scrn_rights->push_back(scrn_rgt_seq_pos);

                         curr_pos = scrn_pos += scrn_width;

                     }

                     if (aln_pos + len <= scrn_pos) {

                         scrn_lft_seq_pos = -1; // reset

                     }

                     scrn_rgt_seq_pos = plus ? stop : start;

                 }

             } else {

                 // add appropriate number of gap/end chars


                 char fill_ch;


                 if (seg < left_seg  ||  seg > right_seg) {

                     fill_ch = GetEndChar();

                 } else {

                     fill_ch = GetGapChar(row);

                 }


                 for (size_t i = 0; i < seg_len; ++i) {

                     buffer += fill_ch;

                 }

             }

             aln_pos += len;

         }


     }


     // take care of the remaining coords if necessary

     if (record_coords) {

         // previous scrns

         TSeqPos pos_diff = aln_pos - scrn_pos;

         if (pos_diff > 0) {

             nscrns = pos_diff / scrn_width;

             if (pos_diff % scrn_width) {

                 nscrns++;

             }

             for (int i = 0; i < nscrns; i++) {

                 scrn_lefts->push_back(scrn_lft_seq_pos);

                 scrn_rights->push_back(scrn_rgt_seq_pos);

                 if (i == 0) {

                     scrn_lft_seq_pos = scrn_rgt_seq_pos;

                 }

                 scrn_pos += scrn_width;

             }

         }

     }

     return buffer;

 }


 //

 // CreateConsensus()

 //

 // compute a consensus sequence given a particular alignment

 // the rules for a consensus are:

 //   - a segment is consensus gap if > 50% of the sequences are gap at this

 //     segment.  50% exactly is counted as sequence

 //   - for a segment counted as sequence, for each position, the most

 //     frequently occurring base is counted as consensus.  in the case of

 //     a tie, the consensus is considered muddied, and the consensus is

 //     so marked

 //

 CRef<CDense_seg>

 CAlnVec::CreateConsensus(int& consensus_row,

                          CBioseq& consensus_seq,

                          const CSeq_id& consensus_id,

                          vector<string>* consens) const

 {

     consensus_seq.Reset();

     if ( !m_DS || m_NumRows < 1) {

         return CRef<CDense_seg>();

     }


     bool isNucleotide = GetBioseqHandle(0).IsNucleotide();


     size_t i;

     size_t j;


     // If the caller did not pass in consensus values, compute them now

     vector<string> c;

     if (consens == NULL) {

         c.resize(m_NumSegs);

         CreateConsensus(c);

         consens = &c;

     }


     //

     // now, create a new CDense_seg

     // we create a new CBioseq for our data and

     // copy the contents of the CDense_seg

     //

     string data;

     TSignedSeqPos total_bases = 0;


     CRef<CDense_seg> new_ds(new CDense_seg());

     new_ds->SetDim(m_NumRows + 1);

     new_ds->SetNumseg(m_NumSegs);

     new_ds->SetLens() = m_Lens;

     new_ds->SetStarts().reserve(m_Starts.size() + m_NumSegs);

     if ( !m_Strands.empty() ) {

         new_ds->SetStrands().reserve(m_Strands.size() +

                                      m_NumSegs);

     }


     for (i = 0;  i < consens->size();  ++i) {

         // copy the old entries

         for (j = 0;  j < (size_t)m_NumRows;  ++j) {

             size_t idx = i * m_NumRows + j;

             new_ds->SetStarts().push_back(m_Starts[idx]);

             if ( !m_Strands.empty() ) {

                 new_ds->SetStrands().push_back(m_Strands[idx]);

             }

         }


         // add our new entry

         // this places the consensus as the last sequence

         // it should preferably be the first, but this would mean adjusting

         // the bioseq handle and seqvector caches, and all row numbers would

         // shift

         if ((*consens)[i].length() != 0) {

             new_ds->SetStarts().push_back(total_bases);

         } else {

             new_ds->SetStarts().push_back(-1);

         }


         if ( !m_Strands.empty() ) {

             new_ds->SetStrands().push_back(eNa_strand_unknown);

         }


         total_bases += TSignedSeqPos((*consens)[i].length());

         data += (*consens)[i];

     }


     // copy our IDs

     for (i = 0;  i < m_Ids.size();  ++i) {

         new_ds->SetIds().push_back(m_Ids[i]);

     }


     // now, we construct a new Bioseq

     {{


          // sequence ID

          CRef<CSeq_id> id(new CSeq_id());

          id->Assign(consensus_id);

          consensus_seq.SetId().push_back(id);


          new_ds->SetIds().push_back(id);


          // add a description for this sequence

          CSeq_descr& desc = consensus_seq.SetDescr();

          CRef<CSeqdesc> d(new CSeqdesc);

          desc.Set().push_back(d);

          d->SetComment("This is a generated consensus sequence");


          // the main one: Seq-inst

          CSeq_inst& inst = consensus_seq.SetInst();

          inst.SetRepr(CSeq_inst::eRepr_raw);

          inst.SetMol(isNucleotide ? CSeq_inst::eMol_na : CSeq_inst::eMol_aa);

          inst.SetLength(CSeq_inst::TLength(data.length()));


          CSeq_data& seq_data = inst.SetSeq_data();

          if (isNucleotide) {

              CIUPACna& na = seq_data.SetIupacna();

              na = CIUPACna(data);

          } else {

              CIUPACaa& aa = seq_data.SetIupacaa();

              aa = CIUPACaa(data);

          }

     }}


     consensus_row = int(new_ds->GetIds().size()) - 1;

     return new_ds;

 }


 void CAlnVec::TransposeSequences(vector<string>& segs)

 {

     char* buf = NULL;

     size_t cols = 0;

     size_t rows = segs.size();

     size_t gap_rows = 0;

     for (size_t row = 0; row < rows; ++row) {

         const string& s = segs[row];

         if (s.empty()) {

             ++gap_rows;

             continue;

         }

         if (cols == 0) {

             cols = s.size();

             buf = new char[(rows+1)*(cols+1)];

         }

         const char* src = s.c_str();

         char* dst = buf+(row-gap_rows);

         while ((*dst = *src++)) {

             dst += rows+1;

         }

     }

     segs.clear();

     for (size_t col = 0; col < cols; ++col) {

         char* col_buf = buf + col*(rows+1);

         *(col_buf+(rows-gap_rows)) = 0;

         segs.push_back(string(col_buf));

     }

     delete[] buf;

 }


 void CAlnVec::CollectNucleotideFrequences(const string& col, int base_count[], int numBases)

 {

     // first, we record which bases occur and how often

     // this is computed in NCBI4na notation

     fill_n(base_count, numBases, 0);


     const char* i = col.c_str();

     unsigned char c;

     while ((c = *i++)) {

         switch(c) {

         case 'A':

             ++base_count[0];

             break;

         case 'C':

             ++base_count[1];

             break;

         case 'M':

             ++base_count[1];

             ++base_count[0];

             break;

         case 'G':

             ++base_count[2];

             break;

         case'R':

             ++base_count[2];

             ++base_count[0];

             break;

         case 'S':

             ++base_count[2];

             ++base_count[1];

             break;

         case 'V':

             ++base_count[2];

             ++base_count[1];

             ++base_count[0];

             break;

         case 'T':

             ++base_count[3];

             break;

         case 'W':

             ++base_count[3];

             ++base_count[0];

             break;

         case 'Y':

             ++base_count[3];

             ++base_count[1];

             break;

         case 'H':

             ++base_count[3];

             ++base_count[1];

             ++base_count[0];

             break;

         case 'K':

             ++base_count[3];

             ++base_count[2];

             break;

         case 'D':

             ++base_count[3];

             ++base_count[2];

             ++base_count[0];

             break;

         case 'B':

             ++base_count[3];

             ++base_count[2];

             ++base_count[1];

             break;

         case 'N':

             ++base_count[3];

             ++base_count[2];

             ++base_count[1];

             ++base_count[0];

             break;

         default:

             break;

         }

     }

 }


 void CAlnVec::CollectProteinFrequences(const string& col, int base_count[], int numBases)

 {

     // first, we record which bases occur and how often

     // this is computed in NCBI4na notation

     fill_n(base_count, numBases, 0);


     const char* i = col.c_str();

     char c;

     while ((c = *i++)) {

         int pos = c-'A';

         if (0<=pos && pos < numBases)

             ++base_count[ pos ];

     }

 }


 void CAlnVec::CreateConsensus(vector<string>& consens) const

 {

     bool isNucleotide = GetBioseqHandle(0).IsNucleotide();


     const int numBases = isNucleotide ? 4 : 26;


     int base_count[26]; // must be a compile-time constant for some compilers


     // determine what the number of segments required for a gapped consensus

     // segment is.  this must be rounded to be at least 50%.

     int gap_seg_thresh = m_NumRows - m_NumRows / 2;


     for (size_t j = 0;  j < (size_t)m_NumSegs;  ++j) {

         // evaluate for gap / no gap

         int gap_count = 0;

         for (size_t i = 0;  i < (size_t)m_NumRows;  ++i) {

             if (m_Starts[ j*m_NumRows + i ] == -1) {

                 ++gap_count;

             }

         }


         // check to make sure that this seg is not a consensus

         // gap seg

         if ( gap_count > gap_seg_thresh )

             continue;


         // the base threshold for being considered unique is at least

         // 70% of the available sequences

         int base_thresh =

             ((m_NumRows - gap_count) * 7 + 5) / 10;


         {

             // we will build a segment with enough bases to match

             consens[j].resize(m_Lens[j]);


             // retrieve all sequences for this segment

             vector<string> segs(m_NumRows);

             RetrieveSegmentSequences(j, segs);

             TransposeSequences(segs);


             typedef multimap<int, unsigned char, greater<int> > TRevMap;


             //

             // evaluate for a consensus

             //

             for (size_t i = 0;  i < m_Lens[j];  ++i) {

                 if (isNucleotide) {

                     CollectNucleotideFrequences(segs[i], base_count, numBases);

                 } else {

                     CollectProteinFrequences(segs[i], base_count, numBases);

                 }


                 // we create a sorted list (in descending order) of

                 // frequencies of appearance to base

                 // the frequency is "global" for this position: that is,

                 // if 40% of the sequences are gapped, the highest frequency

                 // any base can have is 0.6

                 TRevMap rev_map;


                 for (int k = 0;  k < numBases;  ++k) {

                     // this gets around a potentially tricky idiosyncrasy

                     // in some implementations of multimap.  depending on

                     // the library, the key may be const (or not)

                     TRevMap::value_type p(base_count[k], isNucleotide ? (1<<k) : k);

                     rev_map.insert(p);

                 }


                 // now, the first element here contains the best frequency

                 // we scan for the appropriate bases

                 if (rev_map.count(rev_map.begin()->first) == 1 &&

                     rev_map.begin()->first >= base_thresh) {

                         consens[j][i] = isNucleotide ?

                             ToIupac(rev_map.begin()->second) :

                             (rev_map.begin()->second+'A');

                 } else {

                     // now we need to make some guesses based on IUPACna

                     // notation

                     int               count;

                     unsigned char     c    = 0x00;

                     int               freq = 0;

                     TRevMap::iterator curr = rev_map.begin();

                     TRevMap::iterator prev = rev_map.begin();

                     for (count = 0;

                          curr != rev_map.end() &&

                          (freq < base_thresh || prev->first == curr->first);

                          ++curr, ++count) {

                         prev = curr;

                         freq += curr->first;

                         if (isNucleotide) {

                             c |= curr->second;

                         } else {

                             unsigned char cur_char = curr->second+'A';

                             switch (c) {

                                 case 0x00:

                                     c = cur_char;

                                     break;

                                 case 'N': case 'D':

                                     c = (cur_char == 'N' || cur_char == 'D') ? 'B' : 'X';

                                     break;

                                 case 'Q': case 'E':

                                     c = (cur_char == 'Q' || cur_char == 'E') ? 'Z' : 'X';

                                     break;

                                 case 'I': case 'L':

                                     c = (cur_char == 'I' || cur_char == 'L') ? 'J' : 'X';

                                     break;

                                 default:

                                     c = 'X';

                             }

                         }

                     }


                     //

                     // catchall

                     //

                     if (count > 2) {

                         consens[j][i] = isNucleotide ? 'N' : 'X';

                     } else {

                         consens[j][i] = isNucleotide ? ToIupac(c) : c;

                     }

                 }

             }

         }

     }

 }


 void CAlnVec::RetrieveSegmentSequences(size_t segment, vector<string>& segs) const

 {

     size_t segment_row_index = segment*m_NumRows;

     for (size_t i = 0;  i < (size_t)m_NumRows;  ++i, ++segment_row_index) {

         TSignedSeqPos start = m_Starts[ segment_row_index ];

         if (start != -1) {

             TSeqPos stop  = start + m_Lens[segment];


             string& s = segs[i];


             if (IsPositiveStrand(TNumrow(i))) {

                 x_GetSeqVector(TNumrow(i)).GetSeqData(start, stop, s);

             } else {

                 CSeqVector &  seq_vec = x_GetSeqVector(TNumrow(i));

                 TSeqPos size = seq_vec.size();

                 seq_vec.GetSeqData(size - stop, size - start, s);

             }

         }

         else {

             segs[i].clear();

         }

     }

 }


 CRef<CDense_seg> CAlnVec::CreateConsensus(int& consensus_row,

                                           const CSeq_id& consensus_id) const

 {

     CRef<CBioseq> bioseq(new CBioseq);

     CRef<CDense_seg> ds = CreateConsensus(consensus_row,

                                           *bioseq, consensus_id);


     // add bioseq to the scope

     CRef<CSeq_entry> entry(new CSeq_entry());

     entry->SetSeq(*bioseq);

     GetScope().AddTopLevelSeqEntry(*entry);


     return ds;

 }


 CRef<CDense_seg> CAlnVec::CreateConsensus(int& consensus_row) const

 {

     CSeq_id id("lcl|consensus");

     return CreateConsensus(consensus_row, id);

 }


 static SNCBIFullScoreMatrix s_FullScoreMatrix;


 int CAlnVec::CalculateScore(const string& s1, const string& s2,

                             bool s1_is_prot, bool s2_is_prot,

                             int gen_code1, int gen_code2)

 {

     // check the lengths

     if (s1_is_prot == s2_is_prot  &&  s1.length() != s2.length()) {

         NCBI_THROW(CAlnException, eInvalidRequest,

                    "CAlnVec::CalculateScore(): "

                    "Strings should have equal lenghts.");

     } else if (s1.length() * (s1_is_prot ? 1 : 3) !=

                s2.length() * (s2_is_prot ? 1 : 3)) {

         NCBI_THROW(CAlnException, eInvalidRequest,

                    "CAlnVec::CalculateScore(): "

                    "Strings lengths do not match.");

     }


     int score = 0;


     const unsigned char * res1 = (unsigned char *) s1.c_str();

     const unsigned char * res2 = (unsigned char *) s2.c_str();

     const unsigned char * end1 = res1 + s1.length();

     const unsigned char * end2 = res2 + s2.length();


     static bool s_FullScoreMatrixInitialized = false;

     if (s1_is_prot  &&  s2_is_prot) {

         if ( !s_FullScoreMatrixInitialized ) {

             s_FullScoreMatrixInitialized = true;

             NCBISM_Unpack(&NCBISM_Blosum62, &s_FullScoreMatrix);

         }


         // use BLOSUM62 matrix

         for ( ;  res1 != end1;  res1++, res2++) {

             _ASSERT(*res1 < NCBI_FSM_DIM);

             _ASSERT(*res2 < NCBI_FSM_DIM);

             score += s_FullScoreMatrix.s[*res1][*res2];

         }

     } else if ( !s1_is_prot  &&  !s2_is_prot ) {

         // use match score/mismatch penalty

         for ( ; res1 != end1;  res1++, res2++) {

             if (*res1 == *res2) {

                 score += 1;

             } else {

                 score -= 3;

             }

         }

     } else {

         string t;

         if (s1_is_prot) {

             TranslateNAToAA(s2, t, gen_code2);

             for ( ;  res1 != end1;  res1++, res2++) {

                 _ASSERT(*res1 < NCBI_FSM_DIM);

                 _ASSERT(*res2 < NCBI_FSM_DIM);

                 score += s_FullScoreMatrix.s[*res1][*res2];

             }

         } else {

             TranslateNAToAA(s1, t, gen_code1);

             for ( ;  res2 != end2;  res1++, res2++) {

                 _ASSERT(*res1 < NCBI_FSM_DIM);

                 _ASSERT(*res2 < NCBI_FSM_DIM);

                 score += s_FullScoreMatrix.s[*res1][*res2];

             }

         }

     }

     return score;

 }


 void CAlnVec::TranslateNAToAA(const string& na,

                               string& aa,

                               int gencode)

 {

     if (na.size() % 3) {

         NCBI_THROW(CAlnException, eTranslateFailure,

                    "CAlnVec::TranslateNAToAA(): "

                    "NA size expected to be divisible by 3");

     }


     const CTrans_table& tbl = CGen_code_table::GetTransTable(gencode);


     size_t na_size = na.size();


     if (&aa != &na) {

         aa.resize(na_size / 3);

     }


     int state = 0;

     size_t aa_i = 0;

     for (size_t na_i = 0; na_i < na_size; ) {

         for (size_t i = 0; i < 3; i++) {

             state = tbl.NextCodonState(state, na[na_i++]);

         }

         aa[aa_i++] = tbl.GetCodonResidue(state);

     }


     if (&aa == &na) {

         aa.resize(aa_i);

     }

 }


 int CAlnVec::CalculateScore(TNumrow row1, TNumrow row2) const

 {

     TNumrow       numrows = m_NumRows;

     TNumrow       index1 = row1, index2 = row2;

     TSignedSeqPos start1, start2;

     string        buff1, buff2;

     bool          isAA1, isAA2;

     int           score = 0;

     TSeqPos       len;


     isAA1 = GetBioseqHandle(row1).GetBioseqCore()

         ->GetInst().GetMol() == CSeq_inst::eMol_aa;


     isAA2 = GetBioseqHandle(row2).GetBioseqCore()

         ->GetInst().GetMol() == CSeq_inst::eMol_aa;


     CSeqVector&   seq_vec1 = x_GetSeqVector(row1);

     TSeqPos       size1    = seq_vec1.size();

     CSeqVector &  seq_vec2 = x_GetSeqVector(row2);

     TSeqPos       size2    = seq_vec2.size();


     for (TNumseg seg = 0; seg < m_NumSegs; seg++) {

         start1 = m_Starts[index1];

         start2 = m_Starts[index2];


         if (start1 >=0  &&  start2 >= 0) {

             len = m_Lens[seg];


             if (IsPositiveStrand(row1)) {

                 seq_vec1.GetSeqData(start1,

                                     start1 + len,

                                     buff1);

             } else {

                 seq_vec1.GetSeqData(size1 - (start1 + len),

                                     size1 - start1,

                                     buff1);

             }

             if (IsPositiveStrand(row2)) {

                 seq_vec2.GetSeqData(start2,

                                     start2 + len,

                                     buff2);

             } else {

                 seq_vec2.GetSeqData(size2 - (start2 + len),

                                     size2 - start2,

                                     buff2);

             }

             score += CalculateScore(buff1, buff2, isAA1, isAA2,

                 GetGenCode(row1), GetGenCode(row2));

         }


         index1 += numrows;

         index2 += numrows;

     }

     return score;

 }


 string& CAlnVec::GetColumnVector(string& buffer,

                                  TSeqPos aln_pos,

                                  TResidueCount * residue_count,

                                  bool gaps_in_count) const

 {

     buffer.resize(GetNumRows(), GetEndChar());

     if (aln_pos > GetAlnStop()) {

         aln_pos = GetAlnStop(); // out-of-range adjustment

     }

     TNumseg seg   = GetSeg(aln_pos);

     TSeqPos delta = aln_pos - GetAlnStart(seg);

     TSeqPos len   = GetLen(seg);


     TSignedSeqPos pos;


     for (TNumrow row = 0; row < m_NumRows; row++) {

         pos = GetStart(row, seg);

         if (pos >= 0) {

             // it's a sequence residue


             bool plus = IsPositiveStrand(row);

             if (plus) {

                 pos += delta;

             } else {

                 pos += len - 1 - delta;

             }


             CSeqVector& seq_vec = x_GetSeqVector(row);

             if (GetWidth(row) == 3) {

                 string na_buff, aa_buff;

                 if (plus) {

                     seq_vec.GetSeqData(pos, pos + 3, na_buff);

                 } else {

                     TSeqPos size = seq_vec.size();

                     seq_vec.GetSeqData(size - pos - 3, size - pos, na_buff);

                 }

                 TranslateNAToAA(na_buff, aa_buff, GetGenCode(row));

                 buffer[row] = aa_buff[0];

             } else {

                 buffer[row] = seq_vec[plus ? pos : seq_vec.size() - pos - 1];

             }


             if (residue_count) {

                 (*residue_count)[FromIupac(buffer[row])]++;

             }


         } else {

             // it's a gap or endchar


             if (GetEndChar() != (buffer[row] = GetGapChar(row))) {

                 // need to check the where the segment is

                 // only if endchar != gap

                 // this saves a check if there're the same

                 TSegTypeFlags type = GetSegType(row, seg);

                 if (type & fNoSeqOnLeft  ||  type & fNoSeqOnRight) {

                     buffer[row] = GetEndChar();

                 }

             }


             if (gaps_in_count  &&  residue_count) {

                 (*residue_count)[FromIupac(buffer[row])]++;

             }

         }

     } // for row


     return buffer;

 }


 int CAlnVec::CalculatePercentIdentity(TSeqPos aln_pos) const

 {

     string column;

     column.resize(m_NumRows);


     TResidueCount residue_cnt;

     residue_cnt.resize(16, 0);


     GetColumnVector(column, aln_pos, &residue_cnt);


     int max = 0, total = 0;

     ITERATE (TResidueCount, i_res, residue_cnt) {

         if (*i_res > max) {

             max = *i_res;

         }

         total += *i_res;

     }

     if (total) {

         return 100 * max / total;

     }

     else {

         return 0;

     }

 }


 END_objects_SCOPE // namespace ncbi::objects::

 END_NCBI_SCOPE

m_Scope
static CRef< CScope > m_Scope
Definition: AltValidator.cpp:73

Bioseq.hpp

Genetic_code_table.hpp

IUPACna.hpp
User-defined methods of the data storage class.

Object_id.hpp

Seq_descr.hpp

Seq_entry.hpp

Seq_id.hpp

Seq_inst.hpp

Seq_interval.hpp

Seq_loc.hpp

Seqdesc.hpp

s_FullScoreMatrix
static SNCBIFullScoreMatrix s_FullScoreMatrix
Definition: alnvec.cpp:824

alnvec.hpp

CAlnException
Definition: alnexception.hpp:43

CAlnMap
Definition: alnmap.hpp:45

CAlnMap::TNumrow
TDim TNumrow
Definition: alnmap.hpp:69

CAlnMap::IsSetAnchor
bool IsSetAnchor(void) const
Definition: alnmap.hpp:524

CAlnMap::TSeqPosList
list< TSeqPos > TSeqPosList
Definition: alnmap.hpp:73

CAlnMap::GetWidth
int GetWidth(TNumrow row) const
Definition: alnmap.hpp:560

CAlnMap::x_GetSeqLeftSeg
const TNumseg & x_GetSeqLeftSeg(TNumrow row) const
Definition: alnmap.cpp:716

CAlnMap::m_Starts
const CDense_seg::TStarts & m_Starts
Definition: alnmap.hpp:373

CAlnMap::GetSegType
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:503

CAlnMap::m_NumSegs
TNumseg m_NumSegs
Definition: alnmap.hpp:371

CAlnMap::GetStart
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:614

CAlnMap::GetSeqId
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645

CAlnMap::IsPositiveStrand
bool IsPositiveStrand(TNumrow row) const
Definition: alnmap.hpp:600

CAlnMap::m_Ids
const CDense_seg::TIds & m_Ids
Definition: alnmap.hpp:372

CAlnMap::GetSeg
TNumseg GetSeg(TSeqPos aln_pos) const
Definition: alnmap.cpp:373

CAlnMap::fNoSeqOnRight
@ fNoSeqOnRight
Definition: alnmap.hpp:57

CAlnMap::fNoSeqOnLeft
@ fNoSeqOnLeft
Definition: alnmap.hpp:58

CAlnMap::fSeq
@ fSeq
Definition: alnmap.hpp:52

CAlnMap::GetNumRows
TDim GetNumRows(void) const
Definition: alnmap.hpp:517

CAlnMap::m_Anchor
TNumrow m_Anchor
Definition: alnmap.hpp:378

CAlnMap::m_Strands
const CDense_seg::TStrands & m_Strands
Definition: alnmap.hpp:375

CAlnMap::m_DS
CConstRef< CDense_seg > m_DS
Definition: alnmap.hpp:369

CAlnMap::x_GetSeqRightSeg
const TNumseg & x_GetSeqRightSeg(TNumrow row) const
Definition: alnmap.cpp:736

CAlnMap::GetAlnChunks
CRef< CAlnChunkVec > GetAlnChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
Definition: alnmap.cpp:1002

CAlnMap::TSegTypeFlags
unsigned int TSegTypeFlags
Definition: alnmap.hpp:50

CAlnMap::GetAlnStart
TSeqPos GetAlnStart(void) const
Definition: alnmap.hpp:177

CAlnMap::GetLen
TSeqPos GetLen(TNumseg seg, int offset=0) const
Definition: alnmap.hpp:621

CAlnMap::TNumseg
CDense_seg::TNumseg TNumseg
Definition: alnmap.hpp:72

CAlnMap::m_NumRows
TNumrow m_NumRows
Definition: alnmap.hpp:370

CAlnMap::GetAlnStop
TSeqPos GetAlnStop(void) const
Definition: alnmap.hpp:495

CAlnMap::fSkipInserts
@ fSkipInserts
Definition: alnmap.hpp:94

CAlnMap::fSkipUnalignedGaps
@ fSkipUnalignedGaps
Definition: alnmap.hpp:91

CAlnMap::m_Lens
const CDense_seg::TLens & m_Lens
Definition: alnmap.hpp:374

CAlnVec::CollectNucleotideFrequences
static void CollectNucleotideFrequences(const string &col, int base_count[], int numBases)
Definition: alnvec.cpp:558

CAlnVec::CAlnVec
CAlnVec(const CDense_seg &ds, CScope &scope)
Definition: alnvec.cpp:59

CAlnVec::GetBioseqHandle
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
Definition: alnvec.cpp:86

CAlnVec::GetGapChar
TResidue GetGapChar(TNumrow row) const
Definition: alnvec.hpp:358

CAlnVec::GetSeqString
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
Definition: alnvec.hpp:288

CAlnVec::GetEndChar
TResidue GetEndChar() const
Definition: alnvec.hpp:387

CAlnVec::TranslateNAToAA
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
Definition: alnvec.cpp:893

CAlnVec::GetGenCode
int GetGenCode(TNumrow row) const
Definition: alnvec.hpp:425

CAlnVec::~CAlnVec
~CAlnVec(void)
Definition: alnvec.cpp:81

CAlnVec::GetColumnVector
string & GetColumnVector(string &buffer, TSeqPos aln_pos, TResidueCount *residue_count=0, bool gaps_in_count=false) const
Definition: alnvec.cpp:983

CAlnVec::GetWholeAlnSeqString
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
Definition: alnvec.cpp:199

CAlnVec::m_AaCoding
TCoding m_AaCoding
Definition: alnvec.hpp:197

CAlnVec::x_GetSeqVector
CSeqVector & x_GetSeqVector(TNumrow row) const
Definition: alnvec.cpp:108

CAlnVec::GetScope
CScope & GetScope(void) const
Definition: alnvec.hpp:247

CAlnVec::CollectProteinFrequences
static void CollectProteinFrequences(const string &col, int base_count[], int numBases)
Definition: alnvec.cpp:636

CAlnVec::ToIupac
static unsigned char ToIupac(unsigned char c)
Definition: alnvec.hpp:468

CAlnVec::CreateConsensus
CRef< CDense_seg > CreateConsensus(int &consensus_row) const
Definition: alnvec.cpp:817

CAlnVec::GetAlnSeqString
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
Definition: alnvec.cpp:145

CAlnVec::CalculateScore
int CalculateScore(TNumrow row1, TNumrow row2) const
Definition: alnvec.cpp:926

CAlnVec::RetrieveSegmentSequences
void RetrieveSegmentSequences(size_t segment, vector< string > &segs) const
Definition: alnvec.cpp:777

CAlnVec::m_BioseqHandlesCache
TBioseqHandleCache m_BioseqHandlesCache
Definition: alnvec.hpp:183

CAlnVec::TResidueCount
vector< int > TResidueCount
Definition: alnvec.hpp:55

CAlnVec::FromIupac
static unsigned char FromIupac(unsigned char c)
Definition: alnvec.hpp:444

CAlnVec::m_SeqVectorCache
TSeqVectorCache m_SeqVectorCache
Definition: alnvec.hpp:184

CAlnVec::CalculatePercentIdentity
int CalculatePercentIdentity(TSeqPos aln_pos) const
Definition: alnvec.cpp:1051

CAlnVec::TransposeSequences
static void TransposeSequences(vector< string > &segs)
Definition: alnvec.cpp:527

CAlnVec::m_NaCoding
TCoding m_NaCoding
Definition: alnvec.hpp:196

CBioseq_Handle
CBioseq_Handle –.
Definition: bioseq_handle.hpp:91

CBioseq
Definition: Bioseq.hpp:63

CConstRef
CConstRef –.
Definition: ncbiobj.hpp:1266

CDense_seg
Definition: Dense_seg.hpp:61

CGen_code_table::GetTransTable
static const CTrans_table & GetTransTable(int id)
Definition: Genetic_code_table.cpp:296

CIUPACaa
CIUPACaa –.
Definition: IUPACaa.hpp:66

CIUPACna
CIUPACna –.
Definition: IUPACna.hpp:66

CRange< TSignedSeqPos >

CRef< CSeqVector >

CScope
CScope –.
Definition: scope.hpp:92

CSeqVector
CSeqVector –.
Definition: seq_vector.hpp:65

CSeq_data
Definition: Seq_data.hpp:53

CSeq_descr
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55

CSeq_entry
Definition: Seq_entry.hpp:56

CSeq_id
Definition: Seq_id.hpp:71

CSeq_inst
Definition: Seq_inst.hpp:51

CSeqdesc
Definition: Seqdesc.hpp:51

CTrans_table
Definition: Genetic_code_table.hpp:94

CTrans_table::GetCodonResidue
char GetCodonResidue(int state) const
Definition: Genetic_code_table.hpp:226

CTrans_table::NextCodonState
static int NextCodonState(int state, unsigned char ch)
Definition: Genetic_code_table.hpp:211

IAlnExplorer::TNumrow
int TNumrow
Definition: aln_explorer.hpp:52

map_checker< std::map< TNumrow, CBioseq_Handle, less< TNumrow > > >::iterator
container_type::iterator iterator
Definition: map.hpp:54

map_checker::end
const_iterator end() const
Definition: map.hpp:152

map_checker::find
const_iterator find(const key_type &key) const
Definition: map.hpp:153

multimap
Definition: map.hpp:344

state
Definition: sls_alp.hpp:60

false
#define false
Definition: bool.h:36

prev
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61

column
static const char * column
Definition: stats.c:23

data
char data[12]
Definition: iconv.c:80

TSeqPos
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875

ITERATE
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815

TSignedSeqPos
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887

string
string
Definition: cgiapp.hpp:690

NULL
#define NULL
Definition: ncbistd.hpp:225

NCBI_THROW
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704

CSeq_id::AsFastaString
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266

CScope::AddTopLevelSeqEntry
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522

CScope::GetBioseqHandle
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95

CBioseq_Handle::IsNucleotide
bool IsNucleotide(void) const
Definition: bioseq_handle.hpp:869

CBioseq_Handle::GetBioseqCore
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
Definition: bioseq_handle.cpp:144

CBioseq_Handle::GetSeqVector
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
Definition: bioseq_handle.cpp:487

CBioseq_Handle::eStrand_Plus
@ eStrand_Plus
Plus strand.
Definition: bioseq_handle.hpp:353

CBioseq_Handle::eStrand_Minus
@ eStrand_Minus
Minus strand.
Definition: bioseq_handle.hpp:354

CBioseq_Handle::eCoding_Iupac
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
Definition: bioseq_handle.hpp:350

CSeqVector::GetSeqData
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304

CSeqVector::size
TSeqPos size(void) const
Definition: seq_vector.hpp:291

CSeqVector::IsProtein
bool IsProtein(void) const
Definition: seq_vector.hpp:350

CSeqVector::SetCoding
void SetCoding(TCoding coding)
Definition: seq_vector.cpp:1272

CSeqVector::SetIupacCoding
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
Definition: seq_vector.cpp:1281

CSeqVector::IsNucleotide
bool IsNucleotide(void) const
Definition: seq_vector.hpp:357

CRef::Reset
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773

END_NCBI_SCOPE
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103

BEGIN_NCBI_SCOPE
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100

CDense_seg_Base::SetLens
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561

CDense_seg_Base::SetDim
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427

CDense_seg_Base::SetStarts
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536

CDense_seg_Base::SetStrands
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586

CDense_seg_Base::SetNumseg
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474

CDense_seg_Base::GetIds
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505

CDense_seg_Base::SetIds
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511

eNa_strand_unknown
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65

CSeq_entry_Base::SetSeq
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108

CBioseq_Base::SetId
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296

CBioseq_Base::GetInst
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336

CSeq_data_Base::SetIupacna
TIupacna & SetIupacna(void)
Select the variant.
Definition: Seq_data_.hpp:517

CSeq_inst_Base::GetMol
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612

CSeqdesc_Base::SetComment
TComment & SetComment(void)
Select the variant.
Definition: Seqdesc_.hpp:1065

CBioseq_Base::SetInst
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86

CBioseq_Base::Reset
virtual void Reset(void)
Reset the whole object.
Definition: Bioseq_.cpp:97

CBioseq_Base::SetDescr
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65

CSeq_inst_Base::SetRepr
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574

CSeq_descr_Base::Set
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172

CSeq_inst_Base::SetLength
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668

CSeq_inst_Base::SetSeq_data
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130

CSeq_data_Base::SetIupacaa
TIupacaa & SetIupacaa(void)
Select the variant.
Definition: Seq_data_.hpp:537

CSeq_inst_Base::SetMol
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621

CSeq_inst_Base::TLength
TSeqPos TLength
Definition: Seq_inst_.hpp:147

CSeq_inst_Base::eRepr_raw
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94

CSeq_data_Base::e_not_set
@ e_not_set
No variant selected.
Definition: Seq_data_.hpp:103

CSeq_inst_Base::eMol_na
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113

CSeq_inst_Base::eMol_aa
@ eMol_aa
Definition: Seq_inst_.hpp:112

int
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210

e_not_set
@ e_not_set
Definition: keywords_item.cpp:89

buf
char * buf
Definition: lex.newick.cpp:1452

i
int i
Definition: lex.newick.cpp:1456

n
yy_size_t n
Definition: lex.newick.cpp:1455

len
int len
Definition: lex.newick.cpp:1450

mu::value_type
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228

ncbi::grid::netcache::search::fields::size
const struct ncbi::grid::netcache::search::fields::SIZE size

t
EIPRangeType t
Definition: ncbi_localip.c:101

ncbi_pch.hpp

max
T max(T x_, T y_)
Definition: njn_function.hpp:105

plus
T plus(T x_)

delta
Int4 delta(size_t dimension_, const Int4 *score_)

count
#define count
Definition: pcre2posix_test.c:54

buffer
static uint8_t * buffer
Definition: pcre2test.c:1016

raw_scoremat.h

NCBISM_Blosum62
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92

NCBI_FSM_DIM
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
Definition: raw_scoremat.h:85

NCBISM_Unpack
void NCBISM_Unpack(const SNCBIPackedScoreMatrix *psm, SNCBIFullScoreMatrix *fsm)
Expand a packed score matrix into an unpacked one, which callers can proceed to index directly by sta...
Definition: raw_scoremat.c:81

scope.hpp

seq_vector.hpp

row
#define row(bind, expected)
Definition: string_bind.c:73

SNCBIFullScoreMatrix
Definition: raw_scoremat.h:86

SNCBIFullScoreMatrix::s
TNCBIScore s[128][128]
Definition: raw_scoremat.h:87

type
Definition: type.c:6

_ASSERT
#define _ASSERT
Definition: test_assert_impl.h:173