46 :m_masterIn(
false), m_residueRowsMap(), m_residueTypeCount(0), m_indexByConsensus(-1)
107 unsigned int max = 0;
129 double w = 1.0/denom;
140 double w = 1.0/denom;
155 rowWeights[cit->second.first] += colResWeight;
156 total += colResWeight;
157 rowsUsed.
insert(cit->second.first);
159 if (countGap && ((
int)rowsUsed.
size() < numRows))
161 double gapWeight = (1.0 - total) / (numRows - rowsUsed.
size());
165 rowWeights[
row] += gapWeight;
173 double totalWeight = 0;
174 double maxResWeight = 0;
175 double resWeight = 0;
178 pair <ResidueRowsMap::const_iterator, ResidueRowsMap::const_iterator>
range =
183 int row = cit->second.first;
184 resWeight += rowWeights[
row];
186 if (resWeight > maxResWeight)
189 maxResWeight = resWeight;
191 totalWeight += resWeight;
205 residues[rit->second.first] = rit->first;
224 pair <ResidueRowsMap::const_iterator, ResidueRowsMap::const_iterator>
range =
228 if (cit->second.first ==
row)
229 return cit->second.second;
238 return (*cit)->second.second;
248 if (!(cit->second.second))
311 double freqThreshold = 0.0001f;
313 static const double ln2 =
log(2.0f);
322 if ( standardFreq > freqThreshold)
324 double freq = double(
count)/total;
325 double freqRatio = freq/standardFreq;
326 if (freqRatio > freqThreshold)
327 info += freq * (
log(freqRatio))/ln2;
337 : mPos(posOnMaster), gap(aGap)
362 : m_frequencyThreshold(0.5),
364 m_profiles(), m_rowWeights(),
365 m_consensus(), m_guideAlignment()
377 const vector<Block>& mBlocks =
bmp.getMaster().getBlocks();
378 const vector<Block>& sBlocks =
bmp.getSlave().getBlocks();
383 for (
unsigned int bn = 0; bn < mBlocks.size(); bn++)
385 for (
int i = 0;
i < mBlocks[bn].getLen();
i++)
387 int mPos = mBlocks[bn].getStart() +
i;
388 int sPos = sBlocks[bn].getStart() +
i;
390 m_profiles[col].addOccurence(mSeq[mPos], masterRow,
true);
394 if (bn != (mBlocks.size() -1))
396 int mPos = mBlocks[bn].getEnd();
397 int sPos = sBlocks[bn].getEnd();
399 int sGapLen =
bmp.getSlave().getGapToCTerminal(bn);
400 int mGapLen =
bmp.getMaster().getGapToCTerminal(bn);
404 for (
int gap =1; gap <= mGapLen; gap++)
407 m_profiles[col].addOccurence(mSeq[mPos+gap], masterRow,
false);
412 int midOnMaster = mGapLen/2 + mGapLen%2;
413 int mid = sGapLen/2 + sGapLen%2;
414 for (
int gap =1; gap <= sGapLen; gap++)
419 if (gap <= midOnMaster)
422 col.
mPos = mPos + gap;
426 col.
mPos = mPos + midOnMaster;
427 col.
gap = gap - midOnMaster;
432 int mPosNext = mBlocks[bn+1].getStart();
433 int sPosNext = sBlocks[bn+1].getStart();
434 int delta = sPosNext - (sPos + gap);
435 if (
delta <= (mGapLen/2))
442 col.
mPos = mPosNext - (mGapLen/2);
456 bool countGap =
false;
459 double weightsSum = 0;
464 int highestCount = 0;
467 int aCount = cit->second.getSumCount();
468 if (aCount > highestCount)
469 highestCount = aCount;
496 double defaultWeight = 0.0;
498 defaultWeight = 1.0f * double(colUsed)/double(
m_totalRows);
501 int noWeightRows = 0;
547 blocksOnMaster.clear();
548 blocksOnConsensus.clear();
552 bool inBlock =
false;
553 int startM = 0, endM = 0;
566 bool qualifiedForConsensus = (
weight >= threshold && res );
567 bool qualifiedForGuide = qualifiedForConsensus && ((cit->second).isAligned(0));
572 if (qualifiedForGuide)
583 if (qualifiedForGuide)
586 if (col.
mPos == (endM + 1))
593 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
594 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
605 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
606 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
610 if (qualifiedForConsensus)
612 cit->second.setIndexByConsensus(
m_consensus.size());
623 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
624 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
683 cr.
read(pit->second);
692 if (pit->first.gap == 0)
694 int mPos = pit->first.mPos;
696 cr.
read(pit->second);
706 if (pit->second.getIndexByConsensus() >= 0)
707 cr.
read(pit->second);
716 if (pit->second.isAllRowsAligned())
717 cr.
read(pit->second);
736 useCol = (cit->first.gap == 0);
767 blocksOnMaster.clear();
768 blocksOnConsensus.clear();
775 bool inBlock =
false;
776 int startM = 0, endM = 0;
791 bool qualifiedForGuide = qualifiedForConsensus && ((cit->second).isAligned(0));
794 if (qualifiedForGuide)
805 if (qualifiedForGuide)
808 if (col.
mPos == (endM + 1))
815 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
816 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
827 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
828 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
833 if (qualifiedForConsensus)
836 if (curMap.
find(conIndex) != curMap.
end()) {
845 if (curMap.
find(conIndex) != curMap.
end()) {
856 blocksOnMaster.push_back(
Block(startM, endM - startM + 1, blockId));
857 blocksOnConsensus.push_back(
Block(startC, endM - startM + 1, blockId));
864 vector<UnalignedSegReader::Seg> segs;
866 if (segs.size() == 0)
881 for(
unsigned int i = 0;
i < segs.size();
i++)
883 for(
int k = segs[
i].
first; k <= segs[
i].second; k++)
889 : m_totalUnaligned(0), m_pos(0)
951 return seg.second - seg.first + 1 ;
956 string head =
in.substr(0, seg.first);
957 string tail =
in.substr(seg.second + 1,
in.size() - (seg.second + 1));
Various auxiliary BLAST utility functions.
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
vector< Block > & getBlocks()
bool operator<(const ColumnAddress &rhs) const
virtual void read(ColumnResidueProfile &crp)=0
double calculateColumnWeight(char residue, bool countGap, int numRows) const
double reweightColumnByRowWeights(const vector< double > &rowWeights, char &heaviestResidue) const
static const string m_residues
void addOccurence(char residue, int row, bool aligned)
unsigned char getResidueByRow(int row)
pair< int, bool > RowStatusPair
ResidueRowsMap::iterator * findRow(int row)
int getIndexByConsensus() const
bool isAllRowsAligned() const
double getBackgroundResFreq(char res)
void setIndexByConsensus(int col)
vector< ResidueRowsMap::iterator * > m_residuesByRow
double sumUpColumnWeightsByRow(vector< double > &rowWeights, bool countGap, int numRows) const
void getResiduesByRow(vector< char > &residues, bool byNcbiStd=true) const
static map< char, double > m_backgroundResFreq
ResidueRowsMap m_residueRowsMap
double calcInformationContent()
char getMostFrequentResidue(int &count) const
static unsigned char getNcbiStdCode(char eaa)
static void useDefaultBackgroundResFreq()
int getResidueTypeCount() const
bool isAligned(char residue, int row) const
vector< double > m_rowWeights
const string & makeConsensus()
set< int > m_colsToSkipOnConsensus
UnqualForConsMap::const_iterator UnqualForConsCit
void adjustConsensusAndGuide()
void traverseColumnsOnMaster(ColumnReader &cr)
BlockModelPair m_guideAlignment
UnqualForConsMap m_numUnqualAfterConsIndex
double calcInformationContent(bool byConsensus=true)
const string getConsensus(bool inNcbieaa=true)
set< int > m_colsToSkipOnMaster
void traverseAlignedColumns(ColumnReader &cr)
void countUnalignedConsensus(UnalignedSegReader &ucr)
const BlockModelPair & getGuideAlignment() const
bool HasUnqualAfterIndex(int index) const
int countColumnsOnMaster(string &seq)
unsigned int GetNumUnqualAfterIndex(int index) const
void calculateRowWeights()
void addOneRow(BlockModelPair &bmp, const string &mSeq, const string &sSeq)
void traverseAllColumns(ColumnReader &cr)
void traverseColumnsOnConsensus(ColumnReader &cr)
vector< CRef< CSeq_id > > m_seqIds
bool skipUnalignedSeg(UnalignedSegReader &ucr, int len)
void segsToSet(vector< UnalignedSegReader::Seg > &segs, set< int > &cols)
double m_frequencyThreshold
void read(ColumnResidueProfile &crp)
string getIndexSequence()
int getLongUnalignedSegs(int length, vector< Seg > &segs)
string subtractSeg(Seg seg, string &in)
vector< Seg > m_unalignedSegs
string subtractLongestSeg(int threshold)
void setIndexSequence(string &seq)
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
const_iterator_pair equal_range(const key_type &key) const
container_type::const_iterator const_iterator
const_iterator end() const
iterator insert(const value_type &val)
const_iterator begin() const
container_type::iterator iterator
container_type::value_type value_type
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
double * m_backgroundResFreqArray
thread_local unique_ptr< FtaMsgPost > bmp
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
std::istream & in(std::istream &in_, double &x_)
Int4 delta(size_t dimension_, const Int4 *score_)
#define row(bind, expected)