NCBI C++ ToolKit
blastfmtutil.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blastfmtutil.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  * 12/2004
28  * File Description:
29  * blast formatter utilities
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
35 #include <algo/blast/api/pssm_engine.hpp> // for CScorematPssmConverter
38 
39 #include <stdio.h>
40 #include <sstream>
41 #include <iomanip>
42 
44 
45 #include <algo/blast/api/blast_types.hpp> // for CScorematPssmConverter
49 
51 #include <util/range.hpp>
52 
53 
54 
58 USING_SCOPE(align_format);
59 
60 string CBlastFormatUtil::BlastGetVersion(const string program)
61 {
62  string program_uc = program;
63  return NStr::ToUpper(program_uc) + " " + blast::CBlastVersion().Print();
64 }
65 
66 void CBlastFormatUtil::BlastPrintVersionInfo(const string program, bool html,
68 {
69  if (html)
70  out << "<b>" << BlastGetVersion(program) << "</b>" << "\n";
71  else
72  out << BlastGetVersion(program) << "\n";
73 }
74 
75 void
76 CBlastFormatUtil::BlastPrintReference(bool html, size_t line_len,
77  CNcbiOstream& out,
78  blast::CReference::EPublication pub,
79  bool is_psiblast /* = false */)
80 {
81  string reference("Reference");
82  if (pub == blast::CReference::eCompAdjustedMatrices) {
83  reference += " for compositional score matrix adjustment";
84  } else if (pub == blast::CReference::eCompBasedStats) {
85  reference += " for composition-based statistics";
86  if (is_psiblast) {
87  reference += " starting in round 2";
88  }
89  } else if (pub == blast::CReference::eIndexedMegablast) {
90  reference += " for database indexing";
91  } else if (pub == blast::CReference::eDeltaBlast) {
92  reference += " for DELTA-BLAST";
93  }
94 
95  ostringstream str;
96  if(html)
97  {
98  CNcbiIfstream config_file(".ncbirc");
99  CNcbiRegistry config_reg(config_file);
100  string httpProt = "https:";
101  if(!config_reg.Empty()) {
102  if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
103  httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
104  }
105  }
106  str << "<b><a href=\""
107  << httpProt
108  << blast::CReference::GetPubmedUrl(pub)
109  << "\">" << reference << "</a>:</b>"
110  << "\n";
111  x_WrapOutputLine(str.str() + blast::CReference::GetString(pub),
112  line_len, out);
113  }
114  else
115  {
116  str << reference << ": ";
117  x_WrapOutputLine(str.str() + blast::CReference::GetHTMLFreeString(pub),
118  line_len, out);
119  }
120 
121  out << "\n";
122 }
123 
125  string definition_line,
126  int nNumSeqs,
127  Uint8 nTotalLength,
128  bool html,
129  bool with_links,
130  CNcbiOstream& out)
131 
132 
133 {
134  ostringstream str;
135  string dbString = (html) ? "<b>Database:</b> " : "Database: ";
136  str << dbString << definition_line << endl;
137  if(!(html && with_links)) x_WrapOutputLine(str.str(),line_len, out);
138 
139  out << " " << NStr::IntToString(nNumSeqs,NStr::fWithCommas) << " sequences; " << NStr::UInt8ToString(nTotalLength,NStr::fWithCommas) << " total letters" << endl;
140 }
141 
142 /** Standard order of letters according to S. Altschul
143  * FIXME: Move to blast_encoding.[hc] ?
144  */
145 static int RESIDUE_ORDER[] = {
146  1, /* A */
147  16, /* R */
148  13, /* N */
149  4, /* D */
150  3, /* C */
151  15, /* Q */
152  5, /* E */
153  7, /* G */
154  8, /* H */
155  9, /* I */
156  11, /* L */
157  10, /* K */
158  12, /* M */
159  6, /* F */
160  14, /* P */
161  17, /* S */
162  18, /* T */
163  20, /* W */
164  22, /* Y */
165  19 /* V */
166 };
167 
170 
171 void
173  (const objects::CPssmWithParameters& pssm_with_params,
175  CNcbiOstream& out)
176 {
177  _ASSERT(ancillary_data.NotEmpty());
178  static const Uint1 kXResidue = AMINOACID_TO_NCBISTDAA[(int)'X'];
179  vector<double> info_content, gapless_col_weights, sigma;
180  blast::CScorematPssmConverter::GetInformationContent(pssm_with_params,
181  info_content);
182  blast::CScorematPssmConverter::GetGaplessColumnWeights(pssm_with_params,
183  gapless_col_weights);
184  blast::CScorematPssmConverter::GetSigma(pssm_with_params, sigma);
185 
186  // We use whether the information content is available to assume whether
187  // the PSSM computation was done or not
188  bool pssm_calculation_done = info_content.empty() ? false : true;
189 
190  if (pssm_calculation_done) {
191  out << "\nLast position-specific scoring matrix computed, weighted ";
192  out << "observed percentages rounded down, information per position, ";
193  out << "and relative weight of gapless real matches to pseudocounts\n";
194  } else {
195  out << "\nLast position-specific scoring matrix computed\n";
196  }
197 
198  // will need psiblast statistics: posCount, intervalSizes,
199  // sigma,
200  // posCounts can be calculated from residue_frequencies
201 
202  const SIZE_TYPE kQueryLength = pssm_with_params.GetPssm().GetQueryLength();
203  _ASSERT(kQueryLength ==
204  (SIZE_TYPE)pssm_with_params.GetPssm().GetNumColumns());
205  unique_ptr< TNcbiMatrixInt > pssm
206  (blast::CScorematPssmConverter::GetScores(pssm_with_params));
207  unique_ptr< TNcbiMatrixDouble > weighted_res_freqs
208  (blast::CScorematPssmConverter::
209  GetWeightedResidueFrequencies(pssm_with_params));
210  vector<int> interval_sizes, num_matching_seqs;
211  blast::CScorematPssmConverter::GetIntervalSizes(pssm_with_params,
212  interval_sizes);
213  blast::CScorematPssmConverter::GetNumMatchingSeqs(pssm_with_params,
214  num_matching_seqs);
215 
216  // find score width
217  // find maximum score
218  int max_score = 0;
219  ITERATE (TNcbiMatrixInt::TData, it, pssm->GetData()) {
220  if (*it <= BLAST_SCORE_MIN) {
221  continue;
222  }
223 
224  if (*it > max_score) {
225  max_score = *it;
226  }
227 
228  if (-*it > max_score) {
229  max_score = -*it;
230  }
231  }
232 
233  // find number of digits in maximum score
234  int num_digits = 0;
235  while (max_score > 0) {
236  max_score /= 10;
237  num_digits++;
238  }
239  int width = num_digits + 2;
240 
241  out << " ";
242  // print the header for the last PSSM computed
243  for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
244  out << setw(width) << NCBISTDAA_TO_AMINOACID[RESIDUE_ORDER[c]];
245  }
246  if (pssm_calculation_done) {
247  // print the header for the weigthed observed percentages
248  for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
250  }
251  }
252 
254  pssm_with_params.GetPssm().GetQuerySequenceData(query);
255  const vector<char>& query_seq = query.Get();
256 
257  out << fixed;
258  for (SIZE_TYPE i = 0; i < kQueryLength; i++) {
259  // print the residue for position i
260  out << "\n" << setw(5) << (i+1) << " " <<
261  NCBISTDAA_TO_AMINOACID[(int)query_seq[i]] << " ";
262 
263  // print the PSSM
264  for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
265  if ((*pssm)(RESIDUE_ORDER[c], i) == BLAST_SCORE_MIN) {
266  out << "-I ";
267  } else {
268  out << setw(width) << (*pssm)(RESIDUE_ORDER[c], i);
269  }
270  }
271  out << " ";
272 
273  if (pssm_calculation_done) {
274  // Print the weighted observed
275  for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
276  if ((*pssm)(RESIDUE_ORDER[c], i) != BLAST_SCORE_MIN) {
277  double value = 100;
278  value *= (*weighted_res_freqs)(RESIDUE_ORDER[c], i);
279  // round to the nearest integer
280  value = (int)(value + (value > 0. ? 0.5 : -0.5));
281  out << setw(4) << (int)value;
282  }
283  }
284 
285  // print the information content
286  out << " " << setprecision(2) << info_content[i] << " ";
287 
288  // print the relative weight of gapless real matches to pseudocounts
289  if ((num_matching_seqs[i] > 1) && (query_seq[i] != kXResidue)) {
290  out << setprecision(2) << gapless_col_weights[i];
291  } else {
292  out << " 0.00";
293  }
294  }
295  }
296 
297  const Blast_KarlinBlk* ungapped_kbp =
298  ancillary_data->GetUngappedKarlinBlk();
299  const Blast_KarlinBlk* gapped_kbp =
300  ancillary_data->GetGappedKarlinBlk();
301  const Blast_KarlinBlk* psi_ungapped_kbp =
302  ancillary_data->GetPsiUngappedKarlinBlk();
303  const Blast_KarlinBlk* psi_gapped_kbp =
304  ancillary_data->GetPsiGappedKarlinBlk();
305  out << "\n\n" << setprecision(4);
306  out << " K Lambda\n";
307  if (ungapped_kbp) {
308  out << "Standard Ungapped "
309  << ungapped_kbp->K << " "
310  << ungapped_kbp->Lambda << "\n";
311  }
312  if (gapped_kbp) {
313  out << "Standard Gapped "
314  << gapped_kbp->K << " "
315  << gapped_kbp->Lambda << "\n";
316  }
317  if (psi_ungapped_kbp) {
318  out << "PSI Ungapped "
319  << psi_ungapped_kbp->K << " "
320  << psi_ungapped_kbp->Lambda << "\n";
321  }
322  if (psi_gapped_kbp) {
323  out << "PSI Gapped "
324  << psi_gapped_kbp->K << " "
325  << psi_gapped_kbp->Lambda << "\n";
326  }
327 }
328 
329 
331 CBlastFormatUtil::CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set & alnset,
332  blast::EProgram program,
333  const string & db_name,
334  const string & db_title,
335  bool vdb_search)
336 {
337  CRef<CSeq_annot> retval(new CSeq_annot);
338 
339  //Fill in Hist Seqalign
340  CRef<CUser_object> hist_align_obj(new CUser_object);
341  static const string kHistSeqalign("Hist Seqalign");
342  hist_align_obj->SetType().SetStr(kHistSeqalign);
343  hist_align_obj->AddField(kHistSeqalign, true);
344  retval->AddUserObject(*hist_align_obj);
345 
346  //Fill in Blast Type
347  CRef<CUser_object> blast_type(new CUser_object);
348  static const string kBlastType("Blast Type");
349  blast_type->SetType().SetStr(kBlastType);
350  blast_type->AddField(blast::EProgramToTaskName(program), program);
351  retval->AddUserObject(*blast_type);
352 
353  //Fill in DB Title
354  CRef<CUser_object> blast_db_info(new CUser_object);
355  if(vdb_search)
356  {
357  static const string kVDBNames("Database Names");
358  blast_db_info->SetType().SetStr(kVDBNames);
359  blast_db_info->AddField( db_name, true );
360 
361  }
362  else
363  {
364  static const string kBlastDBTitle("Blast Database Title");
365  blast_db_info->SetType().SetStr(kBlastDBTitle);
366  if(0 == db_name.size() || 0 == NStr::CompareNocase(db_name, "n/a"))
367  {
368  blast_db_info->AddField( "n/a", false );
369  }
370  else if(0 == NStr::CompareNocase(db_name, "SRA"))
371  {
372  blast_db_info->AddField( db_name, true );
373  }
374  else
375  {
377  blast_db_info->AddField( db_title, is_nucl );
378  }
379  }
380 
381  retval->AddUserObject(*blast_db_info);
382 
383  //Fill in data -- Seq align
384  retval->SetData().SetAlign();
385  ITERATE(CSeq_align_set::Tdata, itr, alnset.Get()) {
386  retval->SetData().SetAlign().push_back(*itr);
387  }
388 
389  return retval;
390 }
391 
393  unsigned int ncols)
394 {
395  const int kAsciiSize = 256;
396  Resize(kAsciiSize, kAsciiSize, INT_MIN);
397 
398  // Create a CSeq_data object from a vector of values from 0 to the size of
399  // the matrix (26).
400  const int kNumValues = max(ncols, nrows);
401  vector<char> ncbistdaa_values(kNumValues);
402  for (int index = 0; index < kNumValues; ++index)
403  ncbistdaa_values[index] = (char) index;
404 
405  CSeq_data ncbistdaa_seq(ncbistdaa_values, CSeq_data::e_Ncbistdaa);
406 
407  // Convert to IUPACaa using the CSeqportUtil::Convert method.
408  CSeq_data iupacaa_seq;
409  CSeqportUtil::Convert(ncbistdaa_seq, &iupacaa_seq, CSeq_data::e_Iupacaa);
410 
411  // Extract the IUPACaa values
412  vector<char> iupacaa_values(kNumValues);
413  for (int index = 0; index < kNumValues; ++index)
414  iupacaa_values[index] = iupacaa_seq.GetIupacaa().Get()[index];
415 
416  // Fill the 256x256 output matrix.
417  for (unsigned int row = 0; row < nrows; ++row) {
418  for (unsigned int col = 0; col < ncols; ++col) {
419  if (iupacaa_values[row] >= 0 && iupacaa_values[col] >= 0) {
420  (*this)((int)iupacaa_values[row], (int)iupacaa_values[col]) =
421  data[row][col];
422  }
423  }
424  }
425 }
426 
427 
428 /// Auxiliary structure used for sorting CRange<int> objects in increasing
429 /// order of starting positions.
431  bool operator()(CRange<int> const& range1, CRange<int> const& range2)
432  {
433  return (range1.GetFrom() < range2.GetFrom());
434  }
435 };
436 
437 /// Masks a query sequence string corresponding to an alignment, given a list
438 /// of mask locations.
439 /// @param alnvec One alignment [in]
440 /// @param query_seq Query string corresponding to this alignment [in] [out]
441 /// @param mask_info List of masking locations [in]
442 /// @param mask_char How should sequence be masked? [in]
443 /// @param query_frame If query is translated, what query frame is this
444 /// alignment for?
445 static void
446 s_MaskQuerySeq(CAlnVec& alnvec, string& query_seq,
447  const ncbi::TMaskedQueryRegions& mask_info,
448  align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
449  int query_frame)
450 {
451  const int kNumSegs = alnvec.GetNumSegs();
452  vector<CRange<int> > segs_v;
453  for (int index = 0; index < kNumSegs; ++index) {
454  CRange<int> range(alnvec.GetAlnStart(index),
455  alnvec.GetAlnStop(index));
456  segs_v.push_back(range);
457  }
458 
459  vector<CRange<int> > masks_v;
460  int aln_stop = static_cast<int>(query_seq.size()) - 1;
461  ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
462  if ((*mask_iter)->GetFrame() != query_frame)
463  continue;
464  int start =
465  alnvec.GetAlnPosFromSeqPos(0,
466  (*mask_iter)->GetInterval().GetFrom());
467  int stop =
468  alnvec.GetAlnPosFromSeqPos(0,
469  (*mask_iter)->GetInterval().GetTo());
470  // For negative frames, start and stop must be swapped.
471  if (query_frame < 0) {
472  int tmp = start;
473  start = stop;
474  stop = tmp;
475  }
476  if (start >= 0) {
477  if (stop < 0)
478  stop = aln_stop;
479  CRange<int> range(start, stop);
480  masks_v.push_back(range);
481  }
482  }
483 
484  sort(masks_v.begin(), masks_v.end(), SRangeStartSort());
485 
486  // Mask the sequence
487  int mask_index = 0;
488  for (int seg_index = 0;
489  seg_index < (int) segs_v.size() && mask_index < (int) masks_v.size();
490  ++seg_index) {
491  if (segs_v[seg_index].Empty())
492  continue;
493  int seg_start = segs_v[seg_index].GetFrom();
494  int seg_stop = segs_v[seg_index].GetTo();
495  int mask_pos;
496  while (mask_index < (int) masks_v.size() &&
497  (mask_pos = max(seg_start, masks_v[mask_index].GetFrom()))
498  <= seg_stop) {
499  int mask_stop = min(seg_stop, masks_v[mask_index].GetTo());
500  // Mask the respective part of the sequence
501  for ( ; mask_pos <= mask_stop; ++mask_pos) {
502  if( query_seq[mask_pos] == '-' ) continue; // preserve gap
503  if (mask_char == CDisplaySeqalign::eX) {
504  query_seq[mask_pos] = 'X';
505  } else if (mask_char == CDisplaySeqalign::eN){
506  query_seq[mask_pos]='N';
507  } else if (mask_char == CDisplaySeqalign::eLowerCase) {
508  query_seq[mask_pos] =
509  tolower((unsigned char)query_seq[mask_pos]);
510  }
511  }
512  // Advance to the next mask if this mask is done with. Otherwise
513  // break out of the loop.
514  if (mask_pos < seg_stop)
515  ++mask_index;
516  else
517  break;
518  }
519  }
520 }
521 
522 static void
524  string & query,
525  string & subject,
526  int master_gen_code,
527  int slave_gen_code)
528 {
529  //Note: do not switch the set order per calnvec specs.
530  aln_vec.SetGenCode(slave_gen_code);
531  aln_vec.SetGenCode(master_gen_code, 0);
532 
533  aln_vec.SetGapChar('-');
534  aln_vec.GetWholeAlnSeqString(0, query);
535  aln_vec.GetWholeAlnSeqString(1, subject);
536 }
537 
538 void
540  string & subject,
541  const objects::CDense_seg& ds,
542  objects::CScope& scope,
543  int master_gen_code,
544  int slave_gen_code)
545 {
546  CAlnVec aln_vec(ds, scope);
548  s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
549 }
550 
551 void
553  string & masked_query,
554  string & subject,
555  const objects::CDense_seg & ds,
556  objects::CScope & scope,
557  int master_gen_code,
558  int slave_gen_code,
559  const ncbi::TMaskedQueryRegions& mask_info,
560  align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
561  int query_frame)
562 {
563  CAlnVec aln_vec(ds, scope);
565 
566  s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
567 
568  masked_query = query;
569  s_MaskQuerySeq(aln_vec, masked_query, mask_info, mask_char, query_frame);
570 }
571 
573  const CBioseq_Handle & query_handle,
574  TSeqRange query_range,
575  ESubjectScores score_type)
576 {
577 
578  if(!org_align_set.IsSet() || org_align_set.Get().empty()) {
579  _TRACE("Empty seq_align_set");
580  return;
581  }
582  // Seq align set from
583  int dont_care = 0;
584  unsigned int check_type = score_type;
585  if(org_align_set.Get().front()->GetNamedScore("seq_percent_coverage", dont_care)) {
586  check_type &= (~eQueryCovPerSubj);
587  }
588  if (org_align_set.Get().front()->GetNamedScore("uniq_seq_percent_coverage", dont_care)) {
589  check_type &= (~eQueryCovPerUniqSubj);
590  }
591  if(check_type == eNoQuerySubjCov){
592  return;
593  }
594 
595  CConstRef<CBioseq> query_bioseq = query_handle.GetCompleteBioseq();
596  int query_len = 0;
597  if(query_range.NotEmpty())
598  {
599  query_len = query_range.GetLength();
600  }
601  else if (!query_bioseq.Empty() && query_bioseq->IsSetLength())
602  {
603  query_len = query_bioseq->GetLength();
604  }
605 
606  if(query_len <= 0)
607  {
608  _TRACE("Invalid Query Length");
609  return;
610  }
611 
612  CSeq_align_set tmp_align_set;
613  list<CRef<CSeq_align> > & tmp_align_list = tmp_align_set.Set();
614  list<CRef<CSeq_align> > & org_align_list = org_align_set.Set();
615 
616  list<CRef<CSeq_align> >::iterator left_it = org_align_list.begin();
617  list<CRef<CSeq_align> >::iterator right_it = org_align_list.begin();
618 
619  while(left_it != org_align_list.end())
620  {
621  const CSeq_id & cur_id = (*left_it)->GetSeq_id(1);
622  ++ right_it;
623 
624  for (; right_it != org_align_list.end(); ++right_it)
625  {
626  const CSeq_id & id = (*right_it)->GetSeq_id(1);
627  if (!id.Match(cur_id))
628  break;
629  }
630 
631  tmp_align_list.assign(left_it, right_it);
632  if((check_type & eQueryCovPerSubj)) {
633  int master_coverage = align_format::CAlignFormatUtil::GetMasterCoverage(tmp_align_set);
634 
635  if (master_coverage)
636  {
637  double subj_coverage = 100.0 * (double) master_coverage/ (double) query_len;
638  //cerr << "Query Length: " << query_len << endl;
639  //cerr << "Query coverage Length: " << master_coverage << endl;
640  if(subj_coverage < 99)
641  subj_coverage +=0.5;
642 
643  (*left_it)->SetNamedScore ("seq_percent_coverage", (int) subj_coverage);
644  }
645  }
646  if((check_type & eQueryCovPerUniqSubj)) {
647  int uniq_coverage = align_format::CAlignFormatUtil::GetUniqSeqCoverage(tmp_align_set);
648  if (uniq_coverage)
649  {
650  double uniq_subj_coverage = 100.0 * (double) uniq_coverage/ (double) query_len;
651  //cerr << "Query Uniq coverage Length: " << uniq_coverage << endl;
652  //cerr << uniq_coverage << endl;
653  if(uniq_subj_coverage < 99)
654  uniq_subj_coverage +=0.5;
655 
656  (*left_it)->SetNamedScore ("uniq_seq_percent_coverage", (int) uniq_subj_coverage);
657  }
658  }
659  left_it = right_it;
660  }
661 }
662 
664 : m_IterationNum(0)
665 {
666  m_SerialXmlEnd = "";
667 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
BLAST formatter utilities.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Definition: blast_program.c:53
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
Definition: blast_stat.h:121
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
CNcbiMatrix< int > TNcbiMatrixInt
static void s_MaskQuerySeq(CAlnVec &alnvec, string &query_seq, const ncbi::TMaskedQueryRegions &mask_info, align_format::CDisplaySeqalign::SeqLocCharOption mask_char, int query_frame)
Masks a query sequence string corresponding to an alignment, given a list of mask locations.
CNcbiMatrix< double > TNcbiMatrixDouble
USING_SCOPE(ncbi)
static int RESIDUE_ORDER[]
Standard order of letters according to S.
static void s_GetQueryAndSubjectStrings(CAlnVec &aln_vec, string &query, string &subject, int master_gen_code, int slave_gen_code)
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:527
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488
TSeqPos GetAlnStart(TNumseg seg) const
Definition: alnmap.hpp:481
TNumseg GetNumSegs(void) const
Definition: alnmap.hpp:510
void SetGapChar(TResidue gap_char)
Definition: alnvec.hpp:339
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
Definition: alnvec.cpp:199
void SetAaCoding(TCoding coding)
Definition: alnvec.hpp:114
void SetGenCode(int gen_code, TNumrow row=-1)
Definition: alnvec.hpp:397
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsSetLength(void) const
Definition: Bioseq.cpp:355
CConstRef –.
Definition: ncbiobj.hpp:1266
CNCBIstdaa –.
Definition: NCBIstdaa.hpp:66
void Resize(size_t i, size_t j, int val=int())
resize this matrix, filling the empty cells with a known value
Definition: matrix.hpp:390
vector< int > TData
Definition: matrix.hpp:49
CNcbiRegistry –.
Definition: ncbireg.hpp:913
void AddUserObject(CUser_object &obj)
Definition: Seq_annot.cpp:169
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
std::ofstream out("events_result.xml")
main entry point for tests
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
Definition: blast_aux.cpp:709
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
Definition: blast_aux.cpp:676
const char NCBISTDAA_TO_AMINOACID[]
Translates between ncbieaa and ncbistdaa.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SBlastXMLIncremental()
Default ctor()
static void PrintAsciiPssm(const objects::CPssmWithParameters &pssm, CConstRef< blast::CBlastAncillaryData > ancillary_data, CNcbiOstream &out)
Prints the PSSM in ASCII format (as in blastpgp's -Q option)
static void BlastPrintVersionInfo(const string program, bool html, CNcbiOstream &out)
Print out blast engine version.
static void PrintDbInformation(size_t line_len, string definition_line, int nNumSeqs, Uint8 nTotalLength, bool html, bool with_links, CNcbiOstream &out)
static void BlastPrintReference(bool html, size_t line_len, CNcbiOstream &out, blast::CReference::EPublication publication=blast::CReference::eGappedBlast, bool is_psiblast=false)
Print out blast reference.
static void InsertSubjectScores(objects::CSeq_align_set &org_align_set, const objects::CBioseq_Handle &query_handle, TSeqRange query_range=TSeqRange(), ESubjectScores score_type=eQueryCovPerSubj)
string m_SerialXmlEnd
tag to be printed at end.
static CRef< objects::CSeq_annot > CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set &alnset, blast::EProgram program, const string &db_name, const string &db_title, bool vdb_search=false)
static void GetWholeAlnSeqStrings(string &query, string &subject, const objects::CDense_seg &ds, objects::CScope &scope, int master_gen_code, int slave_gen_code)
static string BlastGetVersion(const string program)
Returns the version and release date, e.g.
CBlastFormattingMatrix(int **data, unsigned int nrows, unsigned int ncols)
Constructor - allocates the matrix with appropriate size and populates with the values retrieved from...
#define _TRACE(message)
Definition: ncbidbg.hpp:122
const TPrim & Get(void) const
Definition: serialbase.hpp:347
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
position_type GetLength(void) const
Definition: range.hpp:158
bool NotEmpty(void) const
Definition: range.hpp:152
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
virtual bool HasEntry(const string &section, const string &name=kEmptyStr, TFlags flags=0) const
Definition: ncbireg.cpp:290
bool Empty(TFlags flags=fAllLayers) const
Verify if Registry is empty.
Definition: ncbireg.cpp:162
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
Definition: ncbistr.hpp:5168
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define DIM(A)
dimension of an array.
Definition: ncbi_std.h:176
int tolower(Uchar c)
Definition: ncbictype.hpp:72
T max(T x_, T y_)
T min(T x_, T y_)
C++ API for the PSI-BLAST PSSM engine.
Defines BLAST database access classes.
#define row(bind, expected)
Definition: string_bind.c:73
Structure to hold the Karlin-Altschul parameters.
Definition: blast_stat.h:66
double K
K value used in statistics.
Definition: blast_stat.h:68
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
Auxiliary structure used for sorting CRange<int> objects in increasing order of starting positions.
bool operator()(CRange< int > const &range1, CRange< int > const &range2)
static string subject
static string query
#define _ASSERT
Modified on Wed Apr 17 13:10:55 2024 by modify_doxy.py rev. 669887