59 fp = fopen(filename,
"w");
64 fprintf(
fp,
">%s\n", seqIds[
i-startRow].c_str());
81 matrixName(
"BLOSUM62"),
82 requestInformationContent(
false),
83 requestResidueFrequencies(
false),
84 requestWeightedResidueFrequencies(
false),
85 requestFrequencyRatios(
false),
86 requestNumIndepObs(
false),
87 gaplessColumnWeights(
false),
88 unalignedSegThreshold(-1),
89 inclusionThreshold(0.5),
103 : m_profiles(profiles),m_options(0), m_useConsensus(useConsensus), m_diagRequest(),
125 if (
config.pseudoCount > 0 ) {
132 if (SumAInf > 84 ) iPseudo = 10;
133 else if (SumAInf > 55 ) iPseudo = 7;
134 else if (SumAInf > 43 ) iPseudo = 5;
135 else if (SumAInf > 41.5) iPseudo = 4;
136 else if (SumAInf > 40 ) iPseudo = 3;
137 else if (SumAInf > 39 ) iPseudo = 2;
163 vector<char> residuesOnColumn;
321 : m_conMaker(0), m_useConsensus(useConsensus), m_addQuery(addQueryToPssm),
322 m_masterSeqEntry(), m_trunctMaster(), m_cd(cd), m_pssmInput(0)
334 vector<int> seqIndice;
336 if (seqIndice.size() > 0)
373 pssmRef = pssmEngine.
Run();
431 list< double >* freqs = 0;
437 for (
unsigned int col = 0; col < consensus.size(); col++)
439 char c1 = consensus.at(col);
444 scores.push_back(score);
446 freqs->push_back(0.0);
459 static const string commaSpace(
", ");
460 static const string periodSpaceSpace(
". ");
463 list< CRef< CSeq_id > > & ids = bioseq.
SetId();
468 list< CRef< CCdd_id > >& cdids =
m_cd->
SetId().Set();
470 list< CRef< CCdd_id > >::iterator cit = cdids.begin();
471 for (; cit != cdids.end(); cit++)
475 uid = (*cit)->GetUid();
489 ids.push_back(seqId);
491 list< CRef< CSeqdesc > >& descList = bioseq.
SetDescr().Set();
498 if (cdTitle.length() > 0) {
500 cdTitle = cdTitle.substr(0, cdTitle.length() - 1);
505 seqDescTitle += commaSpace;
509 if (cdTitle.length() > 0) {
510 seqDescTitle += commaSpace + cdTitle + periodSpaceSpace;
514 list< CRef< CCdd_descr > >::iterator lit = cddescList.begin();
516 for (; lit != cddescList.end(); lit++)
518 if ((*lit)->IsComment())
520 if (cdTitle.length() == 0) {
521 seqDescTitle += commaSpace;
523 seqDescTitle += (*lit)->GetComment();
533 list< CRef< CSeqdesc > >::iterator it = descList.begin();
534 for(; it != descList.end(); it++)
535 if ( (*it)->IsTitle() ) {
539 descList.push_back(desc);
546 bmp.getSlave().setSeqId(seqId);
575 vector<string> seqIdStr;
578 seqIdStr.push_back(seqIds[0]->AsFastaString());
579 for (
unsigned int i = 1;
i < seqIds.size();
i++)
581 seqIdStr.push_back(seqIds[
i]->AsFastaString());
589 if (fileName.length() == 0) {
594 unsigned int nRows, nCols;
595 vector<string> seqIdStr;
599 FILE*
fp = fopen(fileName.c_str(),
"w");
605 seqIdStr.push_back(seqIds[0]->AsFastaString());
606 for (
unsigned int k = 1; k < seqIds.size(); k++)
608 seqIdStr.push_back(seqIds[k]->AsFastaString());
610 for (
i = 0;
i < seqIdStr.size(); ++
i) {
611 fprintf(
fp,
"row %d: %s\n",
i,seqIdStr[
i].c_str());
617 static const string dash(
"-");
618 fprintf(
fp,
"Query length: %d; Number of rows: %d\n", nCols,
nRows);
619 for (j = 0; j < nCols; j++) {
620 fprintf(
fp,
">column %d\n", j+1);
635 unsigned int i, j,
nRows, nCols;
644 for (j = 0; j < nCols; j++) {
646 colResidues.assign(
nRows + 1,
'-');
651 colResidues[
i] =
'-';
654 columnMap[j] = colResidues;
660 cd_utils::PssmMaker pm(ccd,
true,
true);
661 cd_utils::PssmMakerOptions
config;
662 config.requestFrequencyRatios =
false;
675 bmp.getSlave() =
bmp.getMaster();
User-defined methods of the data storage class.
User-defined methods of the data storage class.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
bool GetSeqAlign(int Row, CRef< CSeq_align > &seqAlign)
bool GetSeqIDFromAlignment(int RowIndex, CRef< CSeq_id > &SeqID) const
bool FindConsensusInSequenceList(vector< int > *indices=NULL) const
bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry > &seqEntry) const
bool GetBioseqForRow(int rowId, CRef< CBioseq > &bioseq)
string GetAccession(int &Version) const
bool GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > &seqEntry) const
Computes a PSSM as specified in PSI-BLAST.
static const string m_residues
int getIndexByConsensus() const
static char getEaaCode(char stdCode)
void getResiduesByRow(vector< char > &residues, bool byNcbiStd=true) const
static unsigned char getNcbiStdCode(char eaa)
const string & getConsensus()
CRef< CSeq_entry > getConsensusSeqEntry()
ResidueProfiles & getResidueProfiles()
const BlockModelPair & getGuideAlignment() const
void skipUnalignedSeg(int threshold)
const string & getConsensus()
void setOptions(const PssmMakerOptions &option)
CRef< CPssmWithParameters > m_pssmMade
PssmMaker(CCdCore *cd, bool useConsensus=true, bool addQueryToPssm=true)
ConsensusMaker * m_conMaker
CdPssmInput * m_pssmInput
vector< char > m_trunctMaster
bool getTrunctMaster(CRef< CSeq_entry > &seqEntry)
CRef< CPssmWithParameters > makeDefaultPssm()
CRef< CPssmWithParameters > make()
PssmMakerOptions m_config
void printAlignment(string &fileName)
void getPssmColumnResidues(map< unsigned int, string > &columnMap)
void modifyQuery(CRef< CSeq_entry > query)
const BlockModelPair & getGuideAlignment()
void printAlignmentByColumn(string &fileName)
CRef< CSeq_entry > m_masterSeqEntry
int score(const CRef< CSeq_align > align, const CRef< CBioseq > bioseq)
const vector< CRef< CSeq_id > > getSeqIdsByRow() const
void traverseColumnsOnMaster(ColumnReader &cr)
double calcInformationContent(bool byConsensus=true)
const string getConsensus(bool inNcbieaa=true)
int countColumnsOnMaster(string &seq)
void traverseColumnsOnConsensus(ColumnReader &cr)
int GetScore(char i, char j)
The NCBI C++ standard methods for dealing with std::string.
int findHighestScoringRowByPssm(CCdCore *ccd)
static void printMsa(const char *filename, const PSIMsa *msa, vector< string > &seqIds)
string GetScoringMatrixName(EScoreMatrixType type)
void NcbistdaaToNcbieaaString(const vector< char > &vec, string *str)
bool IsConsensus(const CRef< CSeq_id > &seqId)
thread_local unique_ptr< FtaMsgPost > bmp
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
void SetId(TId &value)
Assign a value to Id data member.
void SetDescription(TDescription &value)
Assign a value to Description data member.
const TName & GetName(void) const
Get the Name member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
TStr & SetStr(void)
Select the variant.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId & SetId(void)
Select the variant.
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
void SetIntermediateData(TIntermediateData &value)
Assign a value to IntermediateData data member.
void SetFinalData(TFinalData &value)
Assign a value to FinalData data member.
void SetNumColumns(TNumColumns value)
Assign a value to NumColumns data member.
void SetNumRows(TNumRows value)
Assign a value to NumRows data member.
TGeneral & SetGeneral(void)
Select the variant.
TSeq & SetSeq(void)
Select the variant.
TId & SetId(void)
Assign a value to Id data member.
void ResetId(void)
Reset Id data member.
TTitle & SetTitle(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void ResetSeq_data(void)
Reset Seq_data data member.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
unsigned int
A callback function used to compare two keys in a database.
#define ASSERT
macro for assert.
Declares the CPSIBlastOptionsHandle class.
C++ API for the PSI-BLAST PSSM engine.
#define row(bind, expected)
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
Int4 pseudo_count
Pseudocount constant.
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Boolean information_content
request information content
Boolean frequency_ratios
request frequency ratios
Boolean independent_observations
request number of independent observations
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Boolean gapless_column_weights
request gapless column weights
Boolean residue_frequencies
request observed residue frequencies
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Boolean is_aligned
Is this letter part of the alignment?
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Uint4 num_seqs
Number of distinct sequences aligned with the query (does not include the query)
Uint4 query_length
Length of the query.
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
PSIMsaCell ** data
actual data, dimensions are (dimensions->num_seqs+1) by (dimensions->query_length)
PSIMsaDimensions * dimensions
dimensions of the msa
bool IsRequestingIntermediateData()
double inclusionThreshold
bool requestFrequencyRatios
request frequency ratios
int unalignedSegThreshold
bool requestWeightedResidueFrequencies
request observed weighted residue frequencies
bool gaplessColumnWeights
bool requestResidueFrequencies
request observed residue frequencies
bool requestInformationContent
request information content
bool requestNumIndepObs
request number of independent observations per position
voidp calloc(uInt items, uInt size)