62 string program_uc = program;
63 return NStr::ToUpper(program_uc) +
" " + blast::CBlastVersion().Print();
78 blast::CReference::EPublication pub,
81 string reference(
"Reference");
82 if (pub == blast::CReference::eCompAdjustedMatrices) {
83 reference +=
" for compositional score matrix adjustment";
84 }
else if (pub == blast::CReference::eCompBasedStats) {
85 reference +=
" for composition-based statistics";
87 reference +=
" starting in round 2";
89 }
else if (pub == blast::CReference::eIndexedMegablast) {
90 reference +=
" for database indexing";
92 reference +=
" for DELTA-BLAST";
100 string httpProt =
"https:";
101 if(!config_reg.
Empty()) {
102 if(config_reg.
HasEntry(
"BLASTFMTUTIL",
"PROTOCOL")) {
103 httpProt = config_reg.
Get(
"BLASTFMTUTIL",
"PROTOCOL");
106 str <<
"<b><a href=\""
108 << blast::CReference::GetPubmedUrl(pub)
109 <<
"\">" << reference <<
"</a>:</b>"
111 x_WrapOutputLine(
str.str() + blast::CReference::GetString(pub),
116 str << reference <<
": ";
117 x_WrapOutputLine(
str.str() + blast::CReference::GetHTMLFreeString(pub),
125 string definition_line,
135 string dbString = (html) ?
"<b>Database:</b> " :
"Database: ";
136 str << dbString << definition_line << endl;
137 if(!(html && with_links)) x_WrapOutputLine(
str.str(),line_len,
out);
173 (
const objects::CPssmWithParameters& pssm_with_params,
179 vector<double> info_content, gapless_col_weights, sigma;
180 blast::CScorematPssmConverter::GetInformationContent(pssm_with_params,
182 blast::CScorematPssmConverter::GetGaplessColumnWeights(pssm_with_params,
183 gapless_col_weights);
184 blast::CScorematPssmConverter::GetSigma(pssm_with_params, sigma);
188 bool pssm_calculation_done = info_content.empty() ?
false :
true;
190 if (pssm_calculation_done) {
191 out <<
"\nLast position-specific scoring matrix computed, weighted ";
192 out <<
"observed percentages rounded down, information per position, ";
193 out <<
"and relative weight of gapless real matches to pseudocounts\n";
195 out <<
"\nLast position-specific scoring matrix computed\n";
202 const SIZE_TYPE kQueryLength = pssm_with_params.GetPssm().GetQueryLength();
204 (
SIZE_TYPE)pssm_with_params.GetPssm().GetNumColumns());
205 unique_ptr< TNcbiMatrixInt > pssm
206 (blast::CScorematPssmConverter::GetScores(pssm_with_params));
207 unique_ptr< TNcbiMatrixDouble > weighted_res_freqs
208 (blast::CScorematPssmConverter::
209 GetWeightedResidueFrequencies(pssm_with_params));
210 vector<int> interval_sizes, num_matching_seqs;
211 blast::CScorematPssmConverter::GetIntervalSizes(pssm_with_params,
213 blast::CScorematPssmConverter::GetNumMatchingSeqs(pssm_with_params,
224 if (*it > max_score) {
228 if (-*it > max_score) {
235 while (max_score > 0) {
239 int width = num_digits + 2;
246 if (pssm_calculation_done) {
254 pssm_with_params.GetPssm().GetQuerySequenceData(
query);
255 const vector<char>& query_seq =
query.Get();
260 out <<
"\n" << setw(5) << (
i+1) <<
" " <<
273 if (pssm_calculation_done) {
286 out <<
" " << setprecision(2) << info_content[
i] <<
" ";
289 if ((num_matching_seqs[
i] > 1) && (query_seq[
i] != kXResidue)) {
290 out << setprecision(2) << gapless_col_weights[
i];
298 ancillary_data->GetUngappedKarlinBlk();
300 ancillary_data->GetGappedKarlinBlk();
302 ancillary_data->GetPsiUngappedKarlinBlk();
304 ancillary_data->GetPsiGappedKarlinBlk();
305 out <<
"\n\n" << setprecision(4);
306 out <<
" K Lambda\n";
308 out <<
"Standard Ungapped "
309 << ungapped_kbp->
K <<
" "
310 << ungapped_kbp->
Lambda <<
"\n";
313 out <<
"Standard Gapped "
314 << gapped_kbp->
K <<
" "
315 << gapped_kbp->
Lambda <<
"\n";
317 if (psi_ungapped_kbp) {
318 out <<
"PSI Ungapped "
319 << psi_ungapped_kbp->
K <<
" "
320 << psi_ungapped_kbp->
Lambda <<
"\n";
322 if (psi_gapped_kbp) {
324 << psi_gapped_kbp->
K <<
" "
325 << psi_gapped_kbp->
Lambda <<
"\n";
333 const string & db_name,
334 const string & db_title,
341 static const string kHistSeqalign(
"Hist Seqalign");
342 hist_align_obj->
SetType().SetStr(kHistSeqalign);
343 hist_align_obj->
AddField(kHistSeqalign,
true);
348 static const string kBlastType(
"Blast Type");
349 blast_type->
SetType().SetStr(kBlastType);
357 static const string kVDBNames(
"Database Names");
358 blast_db_info->
SetType().SetStr(kVDBNames);
359 blast_db_info->
AddField( db_name,
true );
364 static const string kBlastDBTitle(
"Blast Database Title");
365 blast_db_info->
SetType().SetStr(kBlastDBTitle);
368 blast_db_info->
AddField(
"n/a",
false );
372 blast_db_info->
AddField( db_name,
true );
377 blast_db_info->
AddField( db_title, is_nucl );
386 retval->
SetData().SetAlign().push_back(*itr);
395 const int kAsciiSize = 256;
396 Resize(kAsciiSize, kAsciiSize, INT_MIN);
400 const int kNumValues =
max(ncols, nrows);
401 vector<char> ncbistdaa_values(kNumValues);
402 for (
int index = 0; index < kNumValues; ++index)
403 ncbistdaa_values[index] = (
char) index;
412 vector<char> iupacaa_values(kNumValues);
413 for (
int index = 0; index < kNumValues; ++index)
414 iupacaa_values[index] = iupacaa_seq.
GetIupacaa().
Get()[index];
417 for (
unsigned int row = 0;
row < nrows; ++
row) {
418 for (
unsigned int col = 0; col < ncols; ++col) {
419 if (iupacaa_values[
row] >= 0 && iupacaa_values[col] >= 0) {
420 (*this)((
int)iupacaa_values[
row], (
int)iupacaa_values[col]) =
447 const ncbi::TMaskedQueryRegions& mask_info,
448 align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
452 vector<CRange<int> > segs_v;
453 for (
int index = 0; index < kNumSegs; ++index) {
456 segs_v.push_back(
range);
459 vector<CRange<int> > masks_v;
460 int aln_stop =
static_cast<int>(query_seq.size()) - 1;
461 ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
462 if ((*mask_iter)->GetFrame() != query_frame)
466 (*mask_iter)->GetInterval().GetFrom());
469 (*mask_iter)->GetInterval().GetTo());
471 if (query_frame < 0) {
480 masks_v.push_back(
range);
488 for (
int seg_index = 0;
489 seg_index < (
int) segs_v.size() && mask_index < (
int) masks_v.size();
491 if (segs_v[seg_index].
Empty())
493 int seg_start = segs_v[seg_index].GetFrom();
494 int seg_stop = segs_v[seg_index].GetTo();
496 while (mask_index < (
int) masks_v.size() &&
497 (mask_pos =
max(seg_start, masks_v[mask_index].GetFrom()))
499 int mask_stop =
min(seg_stop, masks_v[mask_index].GetTo());
501 for ( ; mask_pos <= mask_stop; ++mask_pos) {
502 if( query_seq[mask_pos] ==
'-' )
continue;
504 query_seq[mask_pos] =
'X';
506 query_seq[mask_pos]=
'N';
508 query_seq[mask_pos] =
509 tolower((
unsigned char)query_seq[mask_pos]);
514 if (mask_pos < seg_stop)
541 const objects::CDense_seg& ds,
542 objects::CScope& scope,
553 string & masked_query,
555 const objects::CDense_seg & ds,
556 objects::CScope & scope,
559 const ncbi::TMaskedQueryRegions& mask_info,
560 align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
568 masked_query =
query;
569 s_MaskQuerySeq(aln_vec, masked_query, mask_info, mask_char, query_frame);
578 if(!org_align_set.
IsSet() || org_align_set.
Get().empty()) {
579 _TRACE(
"Empty seq_align_set");
584 unsigned int check_type = score_type;
585 if(org_align_set.
Get().front()->GetNamedScore(
"seq_percent_coverage", dont_care)) {
588 if (org_align_set.
Get().front()->GetNamedScore(
"uniq_seq_percent_coverage", dont_care)) {
608 _TRACE(
"Invalid Query Length");
613 list<CRef<CSeq_align> > & tmp_align_list = tmp_align_set.
Set();
614 list<CRef<CSeq_align> > & org_align_list = org_align_set.
Set();
616 list<CRef<CSeq_align> >::iterator left_it = org_align_list.begin();
617 list<CRef<CSeq_align> >::iterator right_it = org_align_list.begin();
619 while(left_it != org_align_list.end())
621 const CSeq_id & cur_id = (*left_it)->GetSeq_id(1);
624 for (; right_it != org_align_list.end(); ++right_it)
626 const CSeq_id &
id = (*right_it)->GetSeq_id(1);
627 if (!
id.Match(cur_id))
631 tmp_align_list.assign(left_it, right_it);
633 int master_coverage = align_format::CAlignFormatUtil::GetMasterCoverage(tmp_align_set);
637 double subj_coverage = 100.0 * (double) master_coverage/ (
double) query_len;
640 if(subj_coverage < 99)
643 (*left_it)->SetNamedScore (
"seq_percent_coverage", (
int) subj_coverage);
647 int uniq_coverage = align_format::CAlignFormatUtil::GetUniqSeqCoverage(tmp_align_set);
650 double uniq_subj_coverage = 100.0 * (double) uniq_coverage/ (
double) query_len;
653 if(uniq_subj_coverage < 99)
654 uniq_subj_coverage +=0.5;
656 (*left_it)->SetNamedScore (
"uniq_seq_percent_coverage", (
int) uniq_subj_coverage);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eDeltaBlast
Delta Blast.
CNcbiMatrix< int > TNcbiMatrixInt
static void s_MaskQuerySeq(CAlnVec &alnvec, string &query_seq, const ncbi::TMaskedQueryRegions &mask_info, align_format::CDisplaySeqalign::SeqLocCharOption mask_char, int query_frame)
Masks a query sequence string corresponding to an alignment, given a list of mask locations.
CNcbiMatrix< double > TNcbiMatrixDouble
static int RESIDUE_ORDER[]
Standard order of letters according to S.
static void s_GetQueryAndSubjectStrings(CAlnVec &aln_vec, string &query, string &subject, int master_gen_code, int slave_gen_code)
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TSeqPos GetAlnStop(TNumseg seg) const
TSeqPos GetAlnStart(TNumseg seg) const
TNumseg GetNumSegs(void) const
void SetGapChar(TResidue gap_char)
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
void SetAaCoding(TCoding coding)
void SetGenCode(int gen_code, TNumrow row=-1)
TSeqPos GetLength(void) const
bool IsSetLength(void) const
void Resize(size_t i, size_t j, int val=int())
resize this matrix, filling the empty cells with a known value
void AddUserObject(CUser_object &obj)
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
std::ofstream out("events_result.xml")
main entry point for tests
bool Empty(const CNcbiOstrstream &src)
static const char * str(char *buf, int n)
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
const char NCBISTDAA_TO_AMINOACID[]
Translates between ncbieaa and ncbistdaa.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TPrim & Get(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
position_type GetLength(void) const
bool NotEmpty(void) const
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
virtual bool HasEntry(const string §ion, const string &name=kEmptyStr, TFlags flags=0) const
bool Empty(TFlags flags=fAllLayers) const
Verify if Registry is empty.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
@ fWithCommas
Use commas as thousands separator.
TFrom GetFrom(void) const
Get the From member data.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void SetData(TData &value)
Assign a value to Data data member.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Iupacaa
IUPAC 1 letter amino acid code.
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
#define DIM(A)
dimension of an array.
C++ API for the PSI-BLAST PSSM engine.
Defines BLAST database access classes.
#define row(bind, expected)
Structure to hold the Karlin-Altschul parameters.
double K
K value used in statistics.
double Lambda
Lambda value used in statistics.
Auxiliary structure used for sorting CRange<int> objects in increasing order of starting positions.
bool operator()(CRange< int > const &range1, CRange< int > const &range2)