NCBI C++ ToolKit
blast_format.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 Author: Jason Papadopoulos
29 
30 ******************************************************************************/
31 
32 /** @file blast_format.cpppiv
33  * Produce formatted blast output
34 */
35 
36 #include <ncbi_pch.hpp>
41 #include <objmgr/util/sequence.hpp>
44 #include <corelib/ncbiutil.hpp> // for FindBestChoice
47 
49 #include <algo/blast/format/data4xmlformat.hpp> /* NCBI_FAKE_WARNING */
51 #include <algo/blast/format/data4xml2format.hpp> /* NCBI_FAKE_WARNING */
54 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB
55 #include <serial/objostrxml.hpp>
56 
57 #include <corelib/ncbistre.hpp>
58 
59 #ifndef SKIP_DOXYGEN_PROCESSING
61 USING_SCOPE(blast);
63 USING_SCOPE(align_format);
64 USING_SCOPE(sequence);
65 #endif
66 
67 
68 CBlastFormat::CBlastFormat(const blast::CBlastOptions& options,
69  blast::CLocalDbAdapter& db_adapter,
71  bool believe_query, CNcbiOstream& outfile,
72  int num_summary,
73  int num_alignments,
74  CScope & scope,
75  const char *matrix_name /* = BLAST_DEFAULT_MATRIX */,
76  bool show_gi /* = false */,
77  bool is_html /* = false */,
78  int qgencode /* = BLAST_GENETIC_CODE */,
79  int dbgencode /* = BLAST_GENETIC_CODE */,
80  bool use_sum_statistics /* = false */,
81  bool is_remote_search /* = false */,
82  int dbfilt_algorithm /* = -1 */,
83  const string& custom_output_format /* = kEmptyStr */,
84  bool is_megablast /* = false */,
85  bool is_indexed /* = false */,
86  const blast::CIgBlastOptions *ig_opts /* = NULL */,
87  const blast::CLocalDbAdapter* domain_db_adapter /* = NULL*/,
88  const string & cmdline /* =kEMptyStr*/,
89  const string& subjectTag /* =kEmptyStr */)
90  : m_FormatType(format_type), m_IsHTML(is_html),
91  m_DbIsAA(db_adapter.IsProtein()), m_BelieveQuery(believe_query),
92  m_Outfile(outfile), m_NumSummary(num_summary),
93  m_NumAlignments(num_alignments), m_HitlistSize(options.GetHitlistSize()),
94  m_Program(Blast_ProgramNameFromType(options.GetProgramType())),
95  m_DbName(kEmptyStr),
96  m_QueryGenCode(qgencode), m_DbGenCode(dbgencode),
97  m_ShowGi(show_gi), m_ShowLinkedSetSize(false),
98  m_IsUngappedSearch(!options.GetGappedMode()),
99  m_MatrixName(matrix_name),
100  m_Scope(& scope),
101  m_IsBl2Seq(false),
102  m_IsDbScan(false),
103  m_SubjectTag(subjectTag),
104  m_IsRemoteSearch(is_remote_search),
105  m_QueriesFormatted(0),
106  m_Megablast(is_megablast),
107  m_IndexedMegablast(is_indexed),
108  m_CustomOutputFormatSpec(custom_output_format),
109  m_IgOptions(ig_opts),
110  m_Options(&options),
111  m_IsVdb(false),
112  m_IsIterative(false),
113  m_BaseFile(kEmptyStr),
114  m_XMLFileCount(0),
115  m_LineLength(align_format::kDfltLineLength),
116  m_OrigExceptionMask(outfile.exceptions()),
117  m_Cmdline(cmdline)
118 {
119  m_Outfile.exceptions(NcbiBadbit);
120  m_DbName = db_adapter.GetDatabaseName();
121  m_IsBl2Seq = (m_DbName == kEmptyStr ? true : false);
122  m_IsDbScan = db_adapter.IsDbScanMode();
123  if (m_IsBl2Seq) {
124  m_SeqInfoSrc.Reset(db_adapter.MakeSeqInfoSrc());
125  }
126  else {
127  m_SearchDb = db_adapter.GetSearchDatabase();
128  }
129  if(m_IsDbScan) {
130  int num_seqs=0;
131  int total_length=0;
132  if (!is_remote_search)
133  {
134  BlastSeqSrc* seqsrc = db_adapter.MakeSeqSrc();
135  num_seqs=BlastSeqSrcGetNumSeqs(seqsrc);
136  total_length=static_cast<int>(BlastSeqSrcGetTotLen(seqsrc));
137  }
138  CBlastFormatUtil::FillScanModeBlastDbInfo(m_DbInfo, m_DbIsAA,
139  num_seqs, total_length, m_SubjectTag);
140  } else {
141  int filteringAlgorithmId = db_adapter.GetFilteringAlgorithm();
142  if(filteringAlgorithmId == -1) {
143  CRef <CSearchDatabase> db_Info = db_adapter.GetSearchDatabase();
144  if (db_Info && db_Info.NotEmpty()) {
145  ESubjectMaskingType maskType = db_Info->GetMaskType();
146  if(maskType != eNoSubjMasking) {
148  ERR_POST(Warning << "Subject mask not found in " + m_DbName +", proceeding without subject masking.");
149  }
150  }
151  }
152  CBlastFormatUtil::GetBlastDbInfo(m_DbInfo, m_DbName, m_DbIsAA,
153  dbfilt_algorithm, is_remote_search);
154  }
158  }
159 
163  }
164 
165  if (use_sum_statistics && m_IsUngappedSearch) {
166  m_ShowLinkedSetSize = true;
167  }
168  if ( m_Program == "blastn" &&
169  options.GetMatchReward() == 0 &&
170  options.GetMismatchPenalty() == 0 )
171  {
172  /* This combination is an indicator that we have used matrices
173  * solely to develop the hsp score. Also for the time being it
174  * indicates that KA stats are not available. -RMH-
175  */
176  m_DisableKAStats = true;
177  }
178  else
179  {
180  m_DisableKAStats = false;
181  }
182 
184 
185  if (options.GetProgram() == eDeltaBlast) {
186  _ASSERT(options.GetProgramType() == eBlastTypePsiBlast);
187  m_Program = "deltablast";
188 
189  if (domain_db_adapter) {
190  CBlastFormatUtil::GetBlastDbInfo(m_DomainDbInfo,
191  domain_db_adapter->GetDatabaseName(),
192  true, -1, is_remote_search);
193  }
194  }
195 
196  m_IsIterative = options.IsIterativeSearch();
199  }
200 
202  if (app) {
203  const CNcbiRegistry& registry = app->GetConfig();
204  m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1");
205  }
206  m_HitsSortOption = -1;
207  m_HspsSortOption = -1;
208 }
209 
210 CBlastFormat::CBlastFormat(const blast::CBlastOptions& opts,
211  const vector< CBlastFormatUtil::SDbInfo >& dbinfo_list,
213  bool believe_query, CNcbiOstream& outfile,
214  int num_summary,
215  int num_alignments,
216  CScope& scope,
217  bool show_gi,
218  bool is_html,
219  bool is_remote_search,
220  const string& custom_output_format,
221  bool is_vdb,
222  const string & cmdline)
223  : m_FormatType(format_type),
224  m_IsHTML(is_html),
225  m_DbIsAA(!Blast_SubjectIsNucleotide(opts.GetProgramType())),
226  m_BelieveQuery(believe_query),
227  m_Outfile(outfile),
228  m_NumSummary(num_summary),
229  m_NumAlignments(num_alignments),
230  m_HitlistSize(opts.GetHitlistSize()),
231  m_Program(Blast_ProgramNameFromType(opts.GetProgramType())),
232  m_DbName(kEmptyStr),
233  m_QueryGenCode(opts.GetQueryGeneticCode()),
234  m_DbGenCode(opts.GetDbGeneticCode()),
235  m_ShowGi(show_gi),
236  m_ShowLinkedSetSize(false),
237  m_IsUngappedSearch(!opts.GetGappedMode()),
238  m_MatrixName(opts.GetMatrixName()),
239  m_Scope(&scope),
240  m_IsBl2Seq(false),
241  m_IsDbScan (false),
242  m_IsRemoteSearch(is_remote_search),
243  m_QueriesFormatted(0),
244  m_Megablast(opts.GetProgram() == eMegablast ||
245  opts.GetProgram() == eDiscMegablast),
246  m_IndexedMegablast(opts.GetMBIndexLoaded()),
247  m_CustomOutputFormatSpec(custom_output_format),
248  m_Options(&opts),
249  m_IsVdb(is_vdb),
250  m_IsIterative(false),
251  m_BaseFile(kEmptyStr),
252  m_XMLFileCount(0),
253  m_LineLength(align_format::kDfltLineLength),
254  m_OrigExceptionMask(outfile.exceptions()),
255  m_Cmdline(cmdline)
256 {
257  m_Outfile.exceptions(NcbiBadbit);
258  m_DbInfo.assign(dbinfo_list.begin(), dbinfo_list.end());
259  vector< CBlastFormatUtil::SDbInfo >::const_iterator itInfo;
260  for (itInfo = m_DbInfo.begin(); itInfo != m_DbInfo.end(); itInfo++)
261  {
262  if(itInfo != m_DbInfo.begin())
263  m_DbName += " ";
264 
265  m_DbName += itInfo->name;
266  }
267 
268  m_IsBl2Seq = false;
269 
273  }
274 
278  }
279 
280  if (opts.GetSumStatisticsMode() && m_IsUngappedSearch) {
281  m_ShowLinkedSetSize = true;
282  }
283 
284  if ( m_Program == "blastn" &&
285  opts.GetMatchReward() == 0 &&
286  opts.GetMismatchPenalty() == 0 )
287  {
288  /* This combination is an indicator that we have used matrices
289  * solely to develop the hsp score. Also for the time being it
290  * indicates that KA stats are not available. -RMH-
291  */
292  m_DisableKAStats = true;
293  }
294  else
295  {
296  m_DisableKAStats = false;
297  }
298 
300 
301  if (opts.GetProgram() == eDeltaBlast) {
302  _ASSERT(opts.GetProgramType() == eBlastTypePsiBlast);
303  m_Program = "deltablast";
304  }
305  m_IsIterative = opts.IsIterativeSearch();
308  }
310  if (app) {
311  const CNcbiRegistry& registry = app->GetConfig();
312  m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1");
313  }
314  m_HitsSortOption = -1;
315  m_HspsSortOption = -1;
316 }
317 
319 {
320  try {
321  m_Outfile.exceptions(m_OrigExceptionMask);
322  } catch (...) {/*ignore exceptions*/}
323  m_Outfile.flush();
324 }
325 
326 static const string kHTML_Prefix =
327 "<HTML>\n"
328 "<HEAD><TITLE>BLAST Search Results</TITLE></HEAD>\n"
329 "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" VLINK=\"#660099\" ALINK=\"#660099\">\n"
330 "<PRE>\n";
331 
332 static const string kHTML_Suffix =
333 "</PRE>\n"
334 "</BODY>\n"
335 "</HTML>";
336 
337 Int8
339 {
340  Int8 retv = 0L;
341  for (size_t i = 0; i < m_DbInfo.size(); i++) {
342  retv += m_DbInfo[i].total_length;
343  }
344  return retv;
345 }
346 
347 void
349 {
350  // no header for some output types
354  }
357  }
358  return;
359  }
360 
361  if (m_IsHTML) {
362  m_Outfile << kHTML_Prefix << "\n";
363  }
364  // Make sure no-one confuses us with the standard BLASTN
365  // algorithm. -RMH-
366  if ( m_Program == "blastn" &&
367  m_DisableKAStats == true )
368  {
370  m_Outfile);
371  m_Outfile << "\n\n";
372  m_Outfile << "Reference: Robert M. Hubley, Arian Smit\n";
373  m_Outfile << "RMBlast - RepeatMasker Search Engine\n";
374  m_Outfile << "2010 <http://www.repeatmasker.org>";
375  }else
376  {
378  m_Outfile);
379  }
380 
381  if (m_IsBl2Seq && !m_IsDbScan) {
382  return;
383  }
384 
386  if (m_Program == "deltablast") {
389  m_Outfile << "\n";
390  }
391 
392  if (m_Megablast)
395  else
397  m_Outfile);
398 
400  {
401  m_Outfile << "\n";
404  }
405 
406  if (m_Program == "psiblast" || m_Program == "deltablast") {
407  m_Outfile << "\n";
410  }
411  if (m_Program == "psiblast" || m_Program == "blastp") {
412  m_Outfile << "\n";
415  (bool)(m_Program == "psiblast"));
416  }
417 
418  if (m_Program == "deltablast" || !m_DomainDbInfo.empty()) {
419  m_Outfile << "\n\n";
420  if (!m_DomainDbInfo.empty()) {
421  m_Outfile << "\n\n" << "Conserved Domain ";
422  CBlastFormatUtil::PrintDbReport(m_DomainDbInfo, kFormatLineLength,
423  m_Outfile, true);
424  }
425  }
426  else {
427  m_Outfile << "\n\n";
428  }
429  if (!m_IsBl2Seq || m_IsDbScan)
430  CBlastFormatUtil::PrintDbReport(m_DbInfo, kFormatLineLength,
431  m_Outfile, true);
432 }
433 
434 void
435 CBlastFormat::x_PrintOneQueryFooter(const blast::CBlastAncillaryData& summary)
436 {
437  /* Skip printing KA parameters if the program is rmblastn -RMH- */
438  if ( m_DisableKAStats )
439  return;
440 
441  const Blast_KarlinBlk *kbp_ungap =
442  (m_Program == "psiblast" || m_Program == "deltablast")
443  ? summary.GetPsiUngappedKarlinBlk()
444  : summary.GetUngappedKarlinBlk();
445  const Blast_GumbelBlk *gbp = summary.GetGumbelBlk();
446  m_Outfile << NcbiEndl;
447  if (kbp_ungap) {
448  CBlastFormatUtil::PrintKAParameters(kbp_ungap->Lambda,
449  kbp_ungap->K, kbp_ungap->H,
451  false, gbp);
452  }
453 
454  const Blast_KarlinBlk *kbp_gap =
455  (m_Program == "psiblast" || m_Program == "deltablast")
456  ? summary.GetPsiGappedKarlinBlk()
457  : summary.GetGappedKarlinBlk();
458  m_Outfile << "\n";
459  if (kbp_gap) {
460  CBlastFormatUtil::PrintKAParameters(kbp_gap->Lambda,
461  kbp_gap->K, kbp_gap->H,
463  true, gbp);
464  }
465 
466  m_Outfile << "\n";
467  m_Outfile << "Effective search space used: " <<
468  summary.GetSearchSpace() << "\n";
469 }
470 
471 /// Auxialiary function to determine if there are local IDs in the identifiers
472 /// of the query sequences
473 /// @param queries query sequence(s) [in]
474 static bool
476 {
477  bool retval = false;
478  ITERATE(CBlastQueryVector, itr, *queries) {
479  if (blast::IsLocalId((*itr)->GetQuerySeqLoc()->GetId())) {
480  retval = true;
481  break;
482  }
483  }
484  return retval;
485 }
486 
487 void
489  int skip_from, int skip_to, int index,
490  int num_descriptions_to_show /* = -1 */)
491 {
492  int flags = 0;
495  if (m_IsHTML){
497  if (index >= 0) {
498  showdef.SetResultPosIndex(index);
499  }
500  }
501  if (m_ShowGi)
503  if (num_descriptions_to_show == 0)
505  if (m_LongSeqId) {
507  }
508  if(m_HitsSortOption >= 0) {
512  }
513  showdef.SetOption(flags);
514  showdef.SetDbName(m_DbName);
515  showdef.SetDbType(!m_DbIsAA);
516  showdef.SetSkipRange(skip_from, skip_to);
517 }
518 
519 void
521  CSeq_align_set& repeated_seqs,
522  CSeq_align_set& new_seqs,
523  blast::CPsiBlastIterationState::TSeqIds& prev_seqids)
524 {
525  static const CSeq_align::TDim kSubjRow = 1;
526  _ASSERT( !prev_seqids.empty() );
527  _ASSERT( !full_alignment->IsEmpty() );
528  _ASSERT(repeated_seqs.IsEmpty());
529  _ASSERT(new_seqs.IsEmpty());
530 
531  unsigned int count = 0;
532  ITERATE(CSeq_align_set::Tdata, alignment, full_alignment->Get()) {
533  CSeq_id_Handle subj_id =
534  CSeq_id_Handle::GetHandle((*alignment)->GetSeq_id(kSubjRow));
535  if (prev_seqids.find(subj_id) != prev_seqids.end()) {
536  // if found among previously seen Seq-ids...
537  repeated_seqs.Set().push_back(*alignment);
538  } else {
539  // ... else add them as new
540  new_seqs.Set().push_back(*alignment);
541  }
542  count++;
543  if(count >= (unsigned int)m_NumSummary)
544  break;
545  }
546 }
547 
548 bool
550 {
551  bool kIsGlobal = (seqalign_set->IsSet() && seqalign_set->CanGet() &&
552  seqalign_set->Get().front()->CanGetType() &&
553  seqalign_set->Get().front()->GetType() == CSeq_align_Base::eType_global);
554 
555  return kIsGlobal;
556 }
557 
558 
559 void
561  unsigned int itr_num,
562  blast::CPsiBlastIterationState::TSeqIds& prev_seqids,
563  int additional,
564  int index,
565  int defline_length )
566 {
567 
568  if (itr_num != numeric_limits<unsigned int>::max() &&
569  !prev_seqids.empty()) {
570  // Split seq-align-set
571  CSeq_align_set repeated_seqs, new_seqs;
572  x_SplitSeqAlign(aln_set, repeated_seqs, new_seqs, prev_seqids);
573 
574  // Show deflines for 'repeat' sequences
575  {{
576  CShowBlastDefline showdef(repeated_seqs, *m_Scope,
578  repeated_seqs.Size());
579  x_ConfigCShowBlastDefline(showdef);
582  }}
583  m_Outfile << "\n";
584 
585  // Show deflines for 'new' sequences
586  {{
587  CShowBlastDefline showdef(new_seqs, *m_Scope, kFormatLineLength,
588  new_seqs.Size());
589  x_ConfigCShowBlastDefline(showdef);
592  }}
593 
594  } else {
595 
596  CShowBlastDefline showdef(*aln_set, *m_Scope,
597  defline_length == -1 ? kFormatLineLength:defline_length,
598  m_NumSummary + additional);
599  x_ConfigCShowBlastDefline(showdef, -1, -1, index,
600  m_NumSummary+additional);
602  }
603  m_Outfile << "\n";
604 }
605 
606 int
607 s_SetFlags(string& program,
609  bool html, bool showgi, bool isbl2seq, bool disableKAStats)
610 {
611  // set the alignment flags
613 
614  if ( isbl2seq ) {
616  }
617 
618  if (html)
620  if (showgi)
622 
623  if (format_type >= CFormattingArgs::eQueryAnchoredIdentities &&
626  }
627  else {
630  }
631 
632  if (format_type == CFormattingArgs::eQueryAnchoredIdentities ||
635  }
636  if (format_type == CFormattingArgs::eQueryAnchoredIdentities ||
639  }
640  if (program == "tblastx") {
642  }
643 
644  if (disableKAStats)
646 
647  return flags;
648 }
649 
650 bool
652 {
653  return m_IsVdb;
654 }
655 // Port of jzmisc.c's AddAlignInfoToSeqAnnotEx (CVS revision 6.11)
658  const string& db_title) const
659 {
662  m_DbName, db_title,
663  x_IsVdbSearch());
664 }
665 
666 void
667 CBlastFormat::x_PrintStructuredReport(const blast::CSearchResults& results,
669 {
670  string db_title;
671  if (!m_DbInfo.empty()) {
672  db_title = m_DbInfo.front().definition;
673  for (size_t i=1;i < m_DbInfo.size();i++) {
674  db_title += "; ";
675  db_title += m_DbInfo[i].definition;
676  }
677  }
678 
679  // ASN.1 formatting is straightforward
681  if (results.HasAlignments()) {
682  CRef<CSeq_align_set> aln_set (new CSeq_align_set);
683  CBlastFormatUtil::PruneSeqalign(*(results.GetSeqAlign()), *aln_set, m_HitlistSize);
685  m_Outfile << MSerial_AsnText << *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
686  else
687  m_Outfile << MSerial_Json << *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
688  }
689  return;
691  if (results.HasAlignments()) {
692  CRef<CSeq_align_set> aln_set (new CSeq_align_set);
693  CBlastFormatUtil::PruneSeqalign(*(results.GetSeqAlign()), *aln_set, m_HitlistSize);
695  *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
696  }
697  return;
698  } else if (m_FormatType == CFormattingArgs::eXml) {
699  CRef<CSearchResults> res(const_cast<CSearchResults*>(&results));
700  res->TrimSeqAlign(m_HitlistSize);
701  m_AccumulatedResults.push_back(res);
702  CConstRef<CSeq_id> query_id = results.GetSeqId();
703  // FIXME: this can be a bottleneck with large numbers of queries
704  ITERATE(CBlastQueryVector, itr, *queries) {
705  if (query_id->Match(*(*itr)->GetQueryId())) {
706  m_AccumulatedQueries->push_back(*itr);
707  break;
708  }
709  }
710 
711  objects::CBlastOutput xml_output;
712  if(x_IsVdbSearch()) {
718  BlastXML_FormatReport(xml_output, &report_data, &m_Outfile,
720 
721  }
722  else {
728  BlastXML_FormatReport(xml_output, &report_data, &m_Outfile,
730  }
731  m_AccumulatedResults.clear();
732  m_AccumulatedQueries->clear();
733  return;
734  }
737  x_PrintXML2Report(results, queries);
738  return;
739  }
740  else if (m_FormatType == CFormattingArgs::eSAM) {
741  if(results.HasAlignments()) {
742  m_SamFormatter->Print(*(results.GetSeqAlign()));
743  }
744  return;
745  }
746 }
747 
748 void
749 CBlastFormat::x_PrintTabularReport(const blast::CSearchResults& results,
750  unsigned int itr_num)
751 {
752  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
753  if (m_IsUngappedSearch && results.HasAlignments()) {
755  }
756  // other output types will need a bioseq handle
757  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
759 
760  // tabular formatting just prints each alignment in turn
761  // (plus a header)
765  const CBlastTabularInfo::EFieldDelimiter kDelim =
768 
770  if(!m_CustomDelim.empty()) {
771  tabinfo.SetCustomDelim(m_CustomDelim);
772  }
775  tabinfo.SetParseSubjectDefline(true);
776  }
777  tabinfo.SetQueryRange(m_QueryRange);
778  if (ncbi::NStr::ToLower(m_Program) == string("blastn"))
779  tabinfo.SetNoFetch(true);
780 
782  string strProgVersion =
783  NStr::ToUpper(m_Program) + " " + blast::CBlastVersion().Print();
784  string dbname;
785  if (m_IsDbScan)
786  dbname = string("User specified sequence set (Input: ") + m_SubjectTag + string(")");
787  else
788  dbname = m_DbName;
789  CConstRef<CBioseq> subject_bioseq;
790  // dbname used in place of Bioseq in most cases.
791  if (dbname.empty())
792  subject_bioseq.Reset(x_CreateSubjectBioseq());
793  tabinfo.PrintHeader(strProgVersion, *(bhandle.GetBioseqCore()),
794  dbname, results.GetRID(), itr_num, aln_set,
795  subject_bioseq);
796  }
797 
798  if (results.HasAlignments()) {
799  CSeq_align_set copy_aln_set;
800  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_HitlistSize);
801 
802  {
803  unsigned int scores = CBlastFormatUtil::eNoQuerySubjCov;
804  if(string::npos != m_CustomOutputFormatSpec.find("qcovs"))
806  if(string::npos != m_CustomOutputFormatSpec.find("qcovus") &&
807  ncbi::NStr::ToLower(m_Program) == string("blastn"))
809 
812  }
814  tabinfo.SetDbGeneticCode(m_DbGenCode);
815  ITERATE(CSeq_align_set::Tdata, itr, copy_aln_set.Get()) {
816  const CSeq_align& s = **itr;
817  tabinfo.SetFields(s, *m_Scope, &m_ScoringMatrix);
818  tabinfo.Print();
819  }
820  }
821  return;
822  }
823 }
824 
825 static void s_SetCloneInfo(const CIgBlastTabularInfo& tabinfo,
826  const CBioseq_Handle& handle,
827  CBlastFormat::SClone& clone_info) {
828 
829  if (handle.GetSeqId()->Which() == CSeq_id::e_Local){
830  CDeflineGenerator defline (handle.GetSeq_entry_Handle());
831  clone_info.seqid = defline.GenerateDefline(handle).substr(0, 45);
832 
833  // clone_info.seqid = CDeflineGenerator.substr(0, 45);
834  } else {
835  string seqid;
837  wid->GetLabel(&seqid, CSeq_id::eContent);
838  clone_info.seqid = seqid.substr(0, 45);
839  }
840  tabinfo.GetIgInfo (clone_info.v_gene, clone_info.d_gene, clone_info.j_gene,
841  clone_info.c_gene,
842  clone_info.chain_type, clone_info.na, clone_info.aa, clone_info.productive);
843  clone_info.identity = 0;
844  const vector<CIgBlastTabularInfo::SIgDomain*>& domains = tabinfo.GetIgDomains();
845  int length = 0;
846  int num_match = 0;
847  for (unsigned int i=0; i<domains.size(); ++i) {
848  if (domains[i]->length > 0) {
849  length += domains[i]->length;
850  num_match += domains[i]->num_match;
851  }
852  }
853  if (length > 0){
854  clone_info.identity = ((double)num_match)/length;
855 
856  }
857 
858 }
859 
860 void
861 CBlastFormat::x_PrintTaxReport(const blast::CSearchResults& results)
862 {
863  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
865  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
866  if(m_IsHTML) {
867  m_Outfile << "<pre>";
868  }
869  else {
870  m_Outfile << "\n";
871  }
872  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
874  m_IsHTML, false,
875  results.GetRID());
876 
877  if(m_IsHTML) {
878  m_Outfile << "</pre>";
879  }
880  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
881  if (m_IsUngappedSearch && results.HasAlignments()) {
883  }
884 
885  CRef<CSeq_align_set> new_aln_set(const_cast<CSeq_align_set*>(aln_set.GetPointer()));
887  taxFormatRes->DisplayOrgReport(m_Outfile);
888 }
889 
890 void
891 CBlastFormat::x_PrintIgTabularReport(const blast::CIgBlastResults& results,
892  SClone& clone_info,
893  bool fill_clone_info)
894 {
895  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
896  /* TODO do we support ungapped Igblast search?
897  if (m_IsUngappedSearch && results.HasAlignments()) {
898  aln_set.Reset(CDisplaySeqalign::PrepareBlastUngappedSeqalign(*aln_set));
899  } */
900  // other output types will need a bioseq handle
901  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
903 
904  // tabular formatting just prints each alignment in turn
905  // (plus a header)
909 
913 
916 
917  string strProgVersion =
918  "IG" + NStr::ToUpper(m_Program);
919  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
920 
921  if (m_IsHTML) {
922  m_Outfile << "<html><body><pre>\n";
923  }
924  if (results.HasAlignments()) {
925  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
926  CSeq_align_set::Tdata::const_iterator itr = aln_set->Get().begin();
927  tabinfo.SetMasterFields(**itr, *m_Scope,
928  annots->m_ChainType[0],
929  annots->m_ChainTypeToShow,
930  &m_ScoringMatrix);
931  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
932  if (fill_clone_info) {
933  s_SetCloneInfo(tabinfo, bhandle, clone_info);
934  }
935  tabinfo.PrintHeader(m_IgOptions, strProgVersion, *(bhandle.GetBioseqCore()),
936  m_DbName,
937  m_IgOptions->m_DomainSystem,
938  results.GetRID(),
940  aln_set, subject_bioseq);
941 
942  int j = 1;
943  for (; itr != aln_set->Get().end(); ++itr) {
944  tabinfo.SetFields(**itr, *m_Scope,
945  annots->m_ChainType[j++],
946  annots->m_ChainTypeToShow,
947  &m_ScoringMatrix);
948  tabinfo.Print();
949  }
950  } else {
951  tabinfo.PrintHeader(m_IgOptions, strProgVersion, *(bhandle.GetBioseqCore()),
952  m_DbName,
953  m_IgOptions->m_DomainSystem,
954  results.GetRID(),
956  0, subject_bioseq);
957  }
958  if (m_IsHTML) {
959  m_Outfile << "\n</pre></body></html>\n";
960  }
961 }
962 
963 
964 void CBlastFormat::x_PrintAirrRearrangement(const blast::CIgBlastResults& results,
965  SClone& clone_info,
966  bool fill_clone_info,
967  bool print_airr_format_header)
968 {
969  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
970 
971  // other output types will need a bioseq handle
972  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
974 
975  // tabular formatting just prints each alignment in turn
976  // (plus a header)
977 
979 
982 
983  string strProgVersion =
984  "IG" + NStr::ToUpper(m_Program);
985  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
986 
987  CRef<CIgAnnotation> annots(null);
988  if (results.HasAlignments()) {
989  annots = results.GetIgAnnotation();
990  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
991  if (fill_clone_info) {
992  s_SetCloneInfo(tabinfo, bhandle, clone_info);
993  }
994  }
995  tabinfo.SetAirrFormatData(*m_Scope, annots,
996  bhandle, aln_set, m_IgOptions);
997 
998 
999  tabinfo.PrintAirrRearrangement(*m_Scope, annots, strProgVersion,
1000  *(bhandle.GetBioseqCore()),
1001  m_DbName,
1002  m_IgOptions->m_DomainSystem,
1003  results.GetRID(),
1005  aln_set, subject_bioseq, &m_ScoringMatrix,
1006  print_airr_format_header,
1007  m_IgOptions);
1008 
1009 }
1010 
1012 {
1013  if ( !m_IsBl2Seq && !m_IsDbScan) {
1014  return CConstRef<CBioseq>();
1015  }
1016 
1019  static Uint4 subj_index = 0;
1020 
1021  list< CRef<CSeq_id> > ids = m_SeqInfoSrc->GetId(subj_index++);
1023  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*id,
1025  // If this assertion fails, we're not able to get the subject, possibly a
1026  // programming error (see @note in this function's declaration - was the
1027  // order of calls altered?)
1028  _ASSERT(bhandle);
1029 
1030  // reset the subject index if necessary
1031  if (subj_index >= m_SeqInfoSrc->Size()) {
1032  subj_index = 0;
1033  }
1034  return bhandle.GetBioseqCore();
1035 }
1036 
1037 /// Auxiliary function to print the BLAST Archive in multiple output formats
1039 {
1040  if (archive.Empty()) {
1041  return;
1042  }
1043  string outfmt = CNcbiEnvironment().Get("ARCHIVE_FORMAT");
1044  if (outfmt.empty()) {
1045  out << MSerial_AsnText << *archive;
1046  } else if (!NStr::CompareNocase(outfmt, "xml")) {
1047  out << MSerial_Xml << *archive;
1048  } else if (NStr::StartsWith(outfmt, "bin", NStr::eNocase)) {
1049  out << MSerial_AsnBinary << *archive;
1050  }
1051 }
1052 
1053 void
1054 CBlastFormat::WriteArchive(blast::IQueryFactory& queries,
1055  blast::CBlastOptionsHandle& options_handle,
1056  const CSearchResultSet& results,
1057  unsigned int num_iters,
1058  const list<CRef<CBlast4_error> > & msg)
1059 {
1061  if (m_IsBl2Seq)
1062  {
1063  CRef<CBlastQueryVector> query_vector(new CBlastQueryVector);
1064  for (unsigned int i=0; i<m_SeqInfoSrc->Size(); i++)
1065  {
1066  list< CRef<CSeq_id> > ids = m_SeqInfoSrc->GetId(i);
1068  CRef<CSeq_loc> seq_loc(new CSeq_loc);
1069  seq_loc->SetWhole(*id);
1070  CRef<CBlastSearchQuery> search_query(new CBlastSearchQuery(*seq_loc, *m_Scope));
1071  query_vector->AddQuery(search_query);
1072  }
1073  CObjMgr_QueryFactory subjects(*query_vector);
1074  archive = BlastBuildArchive(queries, options_handle, results, subjects);
1075 
1076  }
1077  else if (!m_SearchDb.Empty())
1078  {
1079  // Use only by psi blast
1080  if(num_iters != 0) {
1081  archive = BlastBuildArchive(queries, options_handle, results, m_SearchDb , num_iters);
1082  }
1083  else {
1084  archive = BlastBuildArchive(queries, options_handle, results, m_SearchDb );
1085  }
1086  }
1087  else
1088  {
1089  if(m_DbInfo.empty()) {
1090  NCBI_THROW(CException, eUnknown, "Subject or DB info not available");
1091  }
1092  string db_list = kEmptyStr;
1094  for (unsigned int i=0; i < m_DbInfo.size(); i++) {
1095  db_list += m_DbInfo[i].name;
1096  }
1097  CRef<CSearchDatabase> sdb (new CSearchDatabase(db_list, mol_type));
1098  archive = BlastBuildArchive(queries, options_handle, results, sdb);
1099  }
1100 
1101  if(msg.size() > 0) {
1102  archive->SetMessages() = msg;
1103  }
1104  PrintArchive(archive, m_Outfile);
1105 }
1106 
1107 void
1108 CBlastFormat::WriteArchive(objects::CPssmWithParameters & pssm,
1109  blast::CBlastOptionsHandle& options_handle,
1110  const CSearchResultSet& results,
1111  unsigned int num_iters,
1112  const list<CRef<CBlast4_error> > & msg)
1113 {
1114  CRef<objects::CBlast4_archive> archive(BlastBuildArchive(pssm, options_handle, results, m_SearchDb, num_iters));
1115 
1116  if(msg.size() > 0) {
1117  archive->SetMessages() = msg;
1118  }
1119  PrintArchive(archive, m_Outfile);
1120 }
1121 
1122 
1124 {
1125 
1126  int delineFormatOption = 0;
1128 
1129  deflines.SetQueryNumber(1);//m_Query_number
1130  deflines.SetDbType (!m_DbIsAA);
1131  deflines.SetDbName(m_DbName);
1132  delineFormatOption |= CShowBlastDefline::eHtml;
1133  delineFormatOption |= CShowBlastDefline::eShowPercentIdent;
1134  deflines.SetOption(delineFormatOption); //m_defline_option
1135 
1136  //Next three lines are for proper initialization in formatting of defline
1138  deflineTemplates->advancedView = true;
1139  deflines.SetDeflineTemplates (deflineTemplates);
1140 
1141 
1142  vector <CShowBlastDefline::SDeflineFormattingInfo *> sdlFortInfoVec = deflines.GetFormattingInfo();
1143  CJson_Document doc;
1144  CJson_Object top_obj = doc.SetObject();
1145  CJson_Array defline_array = top_obj.insert_array("deflines");
1146 
1147  for(size_t i = 0; i < sdlFortInfoVec.size(); i++) {
1148  CJson_Object obj = defline_array.push_back_object();
1149 
1150  obj.insert("dfln_url",sdlFortInfoVec[i]->dfln_url);
1151  obj.insert("dfln_rid",sdlFortInfoVec[i]->dfln_rid);
1152  obj.insert("dfln_gi",sdlFortInfoVec[i]->dfln_gi);
1153  obj.insert("dfln_seqid",sdlFortInfoVec[i]->dfln_seqid);
1154  obj.insert("full_dfln_defline",sdlFortInfoVec[i]->full_dfln_defline);
1155  obj.insert("dfln_defline",sdlFortInfoVec[i]->dfln_defline);
1156  obj.insert("dfln_id",sdlFortInfoVec[i]->dfln_id);
1157  obj.insert("dflnFrm_id",sdlFortInfoVec[i]->dflnFrm_id);
1158  obj.insert("dflnFASTA_id",sdlFortInfoVec[i]->dflnFASTA_id);
1159  obj.insert("dflnAccs",sdlFortInfoVec[i]->dflnAccs);
1160 
1161  obj.insert("score_info",sdlFortInfoVec[i]->score_info);
1162  obj.insert("dfln_hspnum",sdlFortInfoVec[i]->dfln_hspnum);
1163  obj.insert("dfln_alnLen",sdlFortInfoVec[i]->dfln_alnLen);
1164  obj.insert("dfln_blast_rank",sdlFortInfoVec[i]->dfln_blast_rank);
1165  obj.insert("total_bit_string",sdlFortInfoVec[i]->total_bit_string);
1166  obj.insert("percent_coverage",sdlFortInfoVec[i]->percent_coverage);
1167  obj.insert("evalue_string",sdlFortInfoVec[i]->evalue_string);
1168  obj.insert("percent_identity",sdlFortInfoVec[i]->percent_identity);
1169  }
1170  doc.Write(m_Outfile);
1171 }
1172 
1173 
1175 {
1178 
1179  int delineFormatOption = 0;
1181 
1182  deflines.SetQueryNumber(1);//m_Query_number
1183  deflines.SetDbType (!m_DbIsAA);
1184  deflines.SetDbName(m_DbName);
1185  delineFormatOption |= CShowBlastDefline::eHtml;
1186  delineFormatOption |= CShowBlastDefline::eShowPercentIdent;
1187  deflines.SetOption(delineFormatOption); //m_defline_option
1189 
1190  deflines.Init();
1191  deflines.Display(m_Outfile);
1192 }
1193 
1194 
1196 {
1199 
1200  TMaskedQueryRegions masklocs;
1201  results.GetMaskedQueryRegions(masklocs);
1202 
1203  CSeq_align_set copy_aln_set;
1204  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
1205 
1206  CRef<CSeq_align_set> seqAlnSet(const_cast<CSeq_align_set*>(&copy_aln_set));
1207  if(!m_AlignSeqList.empty()) {
1209  }
1210 
1211  CDisplaySeqalign display(*seqAlnSet, *m_Scope, &masklocs, NULL, m_MatrixName);
1212  x_SetAlignParameters(display);
1214 
1215  display.DisplaySeqalign(m_Outfile);
1216 }
1217 
1219 {
1221  if(!app) return;
1222  const CNcbiRegistry& reg = app->GetConfig();
1223 
1224 
1226  string defLineTmpl;
1227 
1228  m_DeflineTemplates->defLineTmpl = reg.Get("Templates", "DFL_TABLE_ROW");
1229  m_DeflineTemplates->scoreInfoTmpl = reg.Get("Templates", "DFL_TABLE_SCORE_INFO");
1230  m_DeflineTemplates->seqInfoTmpl = reg.Get("Templates", "DFL_TABLE_SEQ_INFO");
1232 }
1233 
1235 {
1237  if(!app) return;
1238  const CNcbiRegistry& reg = app->GetConfig();
1239 
1241 
1242  m_AlignTemplates->alignHeaderTmpl = reg.Get("Templates", "BLAST_ALIGN_HEADER");
1243  string blastAlignParamsTemplData = reg.Get("Templates", "BLAST_ALIGN_PARAMS");
1244  string blastAlignParamsTag = (m_Program == "blastn") ? "ALIGN_PARAMS_NUC" : "ALIGN_PARAMS_PROT";
1245  string blastAlignProtParamsTable = reg.Get("Templates", blastAlignParamsTag);
1246  m_AlignTemplates->alignInfoTmpl = CAlignFormatUtil::MapTemplate(blastAlignParamsTemplData,"align_params",blastAlignProtParamsTable);
1247  m_AlignTemplates->sortInfoTmpl = reg.Get("Templates", "SORT_ALIGNS_SEQ");
1248  m_AlignTemplates->alignFeatureTmpl = reg.Get("Templates", "ALN_FEATURES");
1249  m_AlignTemplates->alignFeatureLinkTmpl = reg.Get("Templates", "ALN_FEATURES_LINK");
1250 
1251  m_AlignTemplates->alnDefLineTmpl = reg.Get("Templates", "ALN_DEFLINE_ROW");
1252  m_AlignTemplates->alnTitlesLinkTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES_LNK");
1253  m_AlignTemplates->alnTitlesTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES");
1254  m_AlignTemplates->alnSeqInfoTmpl = reg.Get("Templates", "ALN_DEFLINE_SEQ_INFO");
1255  m_AlignTemplates->alignRowTmpl = reg.Get("Templates", "BLAST_ALIGN_ROWS");
1256  m_AlignTemplates->alignRowTmplLast = reg.Get("Templates", "BLAST_ALIGN_ROWS_LST");
1257 }
1258 
1259 
1260 
1262 {
1263 
1264  int AlignOption = 0;
1265 
1266  AlignOption += CDisplaySeqalign::eShowMiddleLine;
1267 
1268  if (m_Program == "tblastx") {
1270  }
1271  AlignOption += CDisplaySeqalign::eShowBlastInfo;
1272  AlignOption += CDisplaySeqalign::eShowBlastStyleId;
1273  AlignOption += CDisplaySeqalign::eHtml;
1274  AlignOption += CDisplaySeqalign::eShowSortControls;//*******????
1275  AlignOption += CDisplaySeqalign::eDynamicFeature;
1276  cds.SetAlignOption(AlignOption);
1277 
1278  cds.SetDbName(m_DbName);
1279  cds.SetDbType(!m_DbIsAA);
1280  cds.SetLineLen(m_LineLength);
1281 
1282  if (m_Program == "blastn" || m_Program == "megablast") {
1285  } else {
1288  }
1289  cds.SetQueryNumber(1); //m_Query_number
1294 }
1295 
1296 
1297 
1298 static string s_GetMolType(const CBioseq_Handle& bioseqHandle)
1299 {
1300  int molType = bioseqHandle.GetBioseqMolType();
1301  string molTypeString;
1302 
1303  switch(molType) {
1305  molTypeString = "cdna";
1306  break;
1307  case CSeq_inst::eMol_dna:
1308  molTypeString = "dna";
1309  break;
1310  case CSeq_inst::eMol_rna:
1311  molTypeString = "rna";
1312  break;
1313  case CSeq_inst::eMol_aa:
1314  molTypeString = "amino acid";
1315  break;
1316  case CSeq_inst::eMol_na:
1317  molTypeString = "nucleic acid";
1318  break;
1319  default:
1320  molTypeString = "Unknown";
1321  }
1322  return molTypeString;
1323 }
1324 
1325 void
1326 CBlastFormat::PrintReport(const blast::CSearchResults& results,
1327  CBlastFormat::DisplayOption displayOption)
1328 {
1329  if (displayOption == eMetadata) {//Metadata in json format
1331  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1332 
1333  //string seqID = CAlignFormatUtil::GetSeqIdString(*bioseq, m_BelieveQuery);
1334  string seqID;
1335  CConstRef <CSeq_id> queryID = sequence::GetId(bhandle).GetSeqId();
1336  CSeq_id::ELabelType labelType = (queryID->IsLocal()) ? CSeq_id::eDefault : CSeq_id::eContent;
1337  queryID->GetLabel(&seqID,labelType);
1338 
1339 
1340  string seqDescr = CBlastFormatUtil::GetSeqDescrString(*bioseq);
1341  seqDescr = seqDescr.empty() ? "None" : seqDescr;
1342 
1343  string molType = s_GetMolType(bhandle);
1344 
1345  int length = 0;
1346  if(bioseq->IsSetInst() && bioseq->GetInst().CanGetLength()){
1347  length = bioseq->GetInst().GetLength();
1348  }
1349 
1350  CJson_Document doc;
1351  CJson_Object obj = doc.SetObject();
1352  obj.insert("Query",seqID);
1353  obj.insert("Query_descr",seqDescr);
1354  obj.insert("IsQueryLocal",queryID->IsLocal());
1355  obj.insert("Length",NStr::IntToString(length));
1356  obj.insert("Moltype",molType);
1357  obj.insert("Database",m_DbName);
1358  string dbTitle;
1359  try {
1360  CRef<CSeqDB> seqdb;
1362  dbTitle = seqdb->GetTitle();
1363  }
1364  catch (...) {/*ignore exceptions for now*/}
1365  obj.insert("Database_descr",dbTitle);
1366  obj.insert("IsDBProtein",m_DbIsAA);
1367  obj.insert("Program",m_Program);
1368 
1369 
1370  if (results.HasErrors()) {
1371  obj.insert("Error",results.GetErrorStrings());
1372  }
1373  if (results.HasWarnings()) {
1374  obj.insert("Warning",results.GetWarningStrings());
1375  }
1376  doc.Write(m_Outfile);
1377  }
1378  else {
1379  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1380  _ASSERT(results.HasAlignments());
1381  if (m_IsUngappedSearch) {
1383  }
1384 
1385  if (displayOption == eDescriptionsWithTemplates) {//Descriptions with html templates
1387  }
1388  if (displayOption == eDescriptions) {//Descriptions with html templates
1389  x_CreateDeflinesJson(aln_set);
1390  }
1391  else if (displayOption == eAlignments) {// print the alignments with html templates
1393  }
1394  }
1395 }
1396 
1397 void
1398 CBlastFormat::PrintOneResultSet(const blast::CSearchResults& results,
1400  unsigned int itr_num
1401  /* = numeric_limits<unsigned int>::max() */,
1402  blast::CPsiBlastIterationState::TSeqIds prev_seqids
1403  /* = CPsiBlastIterationState::TSeqIds() */,
1404  bool is_deltablast_domain_result /* = false */)
1405 {
1406  // For remote searches, we don't retrieve the sequence data for the query
1407  // sequence when initially sending the request to the BLAST server (if it's
1408  // a GI/accession/TI), so we flush the scope so that it can be retrieved
1409  // (needed if a self-hit is found) again. This is not applicable if the
1410  // query sequence(s) are specified as FASTA (will be identified by local
1411  // IDs).
1412  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
1414  }
1415 
1416  // Used with tabular output to print number of searches formatted at end.
1418 
1428  {
1429  x_PrintStructuredReport(results, queries);
1430  return;
1431  }
1432 
1433  if (results.HasErrors()) {
1434  ERR_POST(Error << results.GetErrorStrings());
1435  return; // errors are deemed fatal
1436  }
1437  if (results.HasWarnings()) {
1438  ERR_POST(Warning << results.GetWarningStrings());
1439  }
1440 
1444  x_PrintTabularReport(results, itr_num);
1445  return;
1446  }
1448  string reportCaption = "Tax BLAST report";
1450  m_Outfile << reportCaption;
1452  return;
1453  }
1454  const bool kIsTabularOutput = false;
1455 
1456  if (is_deltablast_domain_result) {
1457  m_Outfile << "Results from domain search" << "\n";
1458  }
1459 
1460  if (itr_num != numeric_limits<unsigned int>::max()) {
1461  m_Outfile << "Results from round " << itr_num << "\n";
1462  }
1463 
1464  // other output types will need a bioseq handle
1465  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
1467  // If we're not able to get the query, most likely a bug. SB-981 , GP-2207
1468  if( !bhandle ){
1469  string message = "Failed to resolve SeqId: "+results.GetSeqId()->AsFastaString();
1470  ERR_POST(message);
1471  NCBI_THROW(CException, eUnknown, message);
1472  }
1473  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1474 
1475  // print the preamble for this query
1476 
1477  m_Outfile << "\n\n";
1478  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
1480  m_IsHTML, kIsTabularOutput,
1481  results.GetRID());
1482 
1483  if (m_IsBl2Seq && !m_IsDbScan) {
1484  m_Outfile << "\n";
1485  // FIXME: this might be configurable in the future
1486  const bool kBelieveSubject = false;
1487  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
1488  CBlastFormatUtil::AcknowledgeBlastSubject(*subject_bioseq,
1490  m_Outfile, kBelieveSubject,
1491  m_IsHTML, kIsTabularOutput);
1492  }
1493 
1494  // quit early if there are no hits
1495  if ( !results.HasAlignments() ) {
1496  m_Outfile << "\n\n"
1497  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
1498  << "\n\n";
1499  x_PrintOneQueryFooter(*results.GetAncillaryData());
1500  return;
1501  }
1502 
1503  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1504  _ASSERT(results.HasAlignments());
1505  if (m_IsUngappedSearch) {
1507  }
1508 
1509  //invoke sorting only for m_HitsSortOption > CAlignFormatUtil::eEvalue or m_HspsSortOption > CAlignFormatUtil::eHspEvalue
1510  if(m_HitsSortOption > 0 || m_HspsSortOption > 0) {
1511  aln_set = CBlastFormatUtil::SortSeqalignForSortableFormat(
1512  *(const_cast<CSeq_align_set*>(aln_set.GetPointer())),
1513  (m_Program == "tblastx") ? true : false,
1516  }
1517 
1518  const bool kIsGlobal = s_IsGlobalSeqAlign(aln_set);
1519 
1520  //-------------------------------------------------
1521  // print 1-line summaries
1522  // Also disable when program is rmblastn. At this time
1523  // we do not want summary bit scores/evalues for this
1524  // program. -RMH-
1525  if ( (!m_IsBl2Seq || m_IsDbScan) && !(m_DisableKAStats || kIsGlobal) ) {
1526  x_DisplayDeflines(aln_set, itr_num, prev_seqids);
1527  }
1528 
1529  //-------------------------------------------------
1530  // print the alignments
1531  m_Outfile << "\n";
1532 
1533  TMaskedQueryRegions masklocs;
1534  results.GetMaskedQueryRegions(masklocs);
1535 
1536  CSeq_align_set copy_aln_set;
1537  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
1538 
1540  (m_IsBl2Seq && !m_IsDbScan), (m_DisableKAStats || kIsGlobal));
1541 
1542  CDisplaySeqalign display(copy_aln_set, *m_Scope, &masklocs, NULL, m_MatrixName);
1543  display.SetDbName(m_DbName);
1544  display.SetDbType(!m_DbIsAA);
1545  display.SetLineLen(m_LineLength);
1546  int kAlignToShow=2000000000; // Nice large number per SB-1817
1547  display.SetNumAlignToShow(kAlignToShow);
1548 
1549  // set the alignment flags
1550  display.SetAlignOption(flags);
1551 
1552  if (m_LongSeqId) {
1553  display.UseLongSequenceIds();
1554  }
1555 
1556  if (m_Program == "blastn" || m_Program == "megablast") {
1559  }
1560  else {
1563  }
1564 
1568  TSeqLocInfoVector subj_masks;
1569  results.GetSubjectMasks(subj_masks);
1570  display.SetSubjectMasks(subj_masks);
1571  display.DisplaySeqalign(m_Outfile);
1572 
1573  // print the ancillary data for this query
1574 
1575  x_PrintOneQueryFooter(*results.GetAncillaryData());
1576 }
1577 
1578 void
1581  SClone& clone_info,
1582  bool fill_clone_info,
1583  bool print_airr_format_header,
1584  int index)
1585 {
1586  clone_info.na = NcbiEmptyString;
1587  clone_info.aa = NcbiEmptyString;
1588 
1589  // For remote searches, we don't retrieve the sequence data for the query
1590  // sequence when initially sending the request to the BLAST server (if it's
1591  // a GI/accession/TI), so we flush the scope so that it can be retrieved
1592  // (needed if a self-hit is found) again. This is not applicable if the
1593  // query sequence(s) are specified as FASTA (will be identified by local
1594  // IDs).
1595  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
1597  }
1598 
1599  // Used with tabular output to print number of searches formatted at end.
1601 
1610  {
1611  x_PrintStructuredReport(results, queries);
1612  return;
1613  }
1614 
1615  if (results.HasErrors()) {
1616  ERR_POST(Error << results.GetErrorStrings());
1617  return; // errors are deemed fatal
1618  }
1619  if (results.HasWarnings()) {
1620  ERR_POST(Warning << results.GetWarningStrings());
1621  }
1622 
1623  if (results.GetIgAnnotation()->m_MinusStrand) {
1625  }
1626  //set j domain
1627  CRef<CIgAnnotation> & annots_edit = results.SetIgAnnotation();
1628  if (annots_edit->m_JDomain[1] > 0 && annots_edit->m_DomainInfo[9] > 0 &&
1629  annots_edit->m_JDomain[1] > annots_edit->m_DomainInfo[9]){
1630  annots_edit->m_JDomain[0] = annots_edit->m_DomainInfo[9] + 1 ;
1631  //fwr4
1632  if (annots_edit->m_JDomain[3] > 0) {
1633  annots_edit->m_JDomain[2] = annots_edit->m_JDomain[1] + 1 ;
1634  }
1635  }
1636 
1641  x_PrintIgTabularReport(results, clone_info, fill_clone_info);
1642  return;
1643  }
1644 
1646 
1647  if (m_Program == "blastn" || m_Program == "BLASTN") {
1648  x_PrintAirrRearrangement(results, clone_info, fill_clone_info, print_airr_format_header);
1649  } else {
1650  m_Outfile << "The AIRR format is only available for nucleotide sequence search" << endl;
1651  }
1652  return;
1653  }
1654 
1656  string reportCaption = "Tax BLAST report";
1658  m_Outfile << reportCaption;
1660  return;
1661  }
1662 
1663  const bool kIsTabularOutput = false;
1664 
1665  // other output types will need a bioseq handle
1666  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
1668  // If this assertion fails, we're not able to get the query, most likely a
1669  // bug
1670  _ASSERT(bhandle);
1671  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1672 
1673  // print the preamble for this query
1674 
1675  m_Outfile << "\n\n";
1676 
1677  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
1679  m_IsHTML, kIsTabularOutput,
1680  results.GetRID());
1681 
1682  // quit early if there are no hits
1683  if ( !results.HasAlignments() ) {
1684  m_Outfile << "\n\n"
1685  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
1686  << "\n\n";
1687  x_PrintOneQueryFooter(*results.GetAncillaryData());
1688  return;
1689  }
1690 
1691  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1692  _ASSERT(results.HasAlignments());
1693  if (m_IsUngappedSearch) {
1695  }
1696 
1697  //-------------------------------------------------
1698  // print 1-line summaries
1699  if ( !m_IsBl2Seq ) {
1701  int additional = results.m_NumActualV +results.m_NumActualD + results.m_NumActualJ +
1702  results.m_NumActualC;
1703  x_DisplayDeflines(aln_set, numeric_limits<unsigned int>::max(), prev_ids, additional, index, 100);
1704  }
1705 
1706  //-------------------------------------------------
1707  // print the alignments
1708  m_Outfile << "\n";
1709 
1710  const CBlastTabularInfo::EFieldDelimiter kDelim =
1713 
1716 
1717  // print the master alignment
1718  if (results.HasAlignments()) {
1719  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
1720  CSeq_align_set::Tdata::const_iterator itr = aln_set->Get().begin();
1721  tabinfo.SetMasterFields(**itr, *m_Scope,
1722  annots->m_ChainType[0],
1723  annots->m_ChainTypeToShow,
1724  &m_ScoringMatrix);
1725  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
1726  if (fill_clone_info) {
1727  s_SetCloneInfo(tabinfo, bhandle, clone_info);
1728  }
1729  m_Outfile << "Domain classification requested: " << m_IgOptions->m_DomainSystem << endl << endl;
1730  if (m_IsHTML) {
1731  tabinfo.PrintHtmlSummary(m_IgOptions);
1732  } else {
1733  tabinfo.PrintMasterAlign(m_IgOptions, "");
1734  }
1735  }
1736 
1737  TMaskedQueryRegions masklocs;
1738  results.GetMaskedQueryRegions(masklocs);
1739 
1745 
1748  }
1749 
1750  if (m_IsHTML) {
1753  }
1754 
1755  list < CRef<CDisplaySeqalign::DomainInfo> > domain;
1756 
1757  string kabat_domain_name[] = {"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "C region"};
1758  string imgt_domain_name[] = {"FR1-IMGT", "CDR1-IMGT", "FR2-IMGT", "CDR2-IMGT", "FR3-IMGT", "CDR3-IMGT", "FR4-IMGT", "C region"};
1759  int domain_name_length = 8;
1760  vector<string> domain_name;
1761  if (m_IgOptions->m_DomainSystem == "kabat") {
1762  for (int i = 0; i < domain_name_length; i ++) {
1763  domain_name.push_back(kabat_domain_name[i]);
1764  }
1765  } else {
1766  for (int i = 0; i < domain_name_length; i ++) {
1767  domain_name.push_back(imgt_domain_name[i]);
1768  }
1769  }
1770 
1771  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
1772 
1773  for (int i=0; i<9; i = i + 2) {
1774  if (annots->m_DomainInfo[i] >= 0){
1776  int start = annots->m_DomainInfo[i];
1777  int subject_start = annots->m_DomainInfo_S[i];
1778 
1779  int stop = annots->m_DomainInfo[i+1];
1780  int subject_stop = annots->m_DomainInfo_S[i+1];
1781 
1782  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1783  (CSeq_loc::TPoint) start,
1784  (CSeq_loc::TPoint) stop);
1785  temp->subject_seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(1),
1786  (CSeq_loc::TPoint) subject_start,
1787  (CSeq_loc::TPoint) subject_stop);
1788  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1789  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1790  temp->domain_name = domain_name[i/2];
1791  domain.push_back(temp);
1792  }
1793  }
1794 
1795  //J domain
1796  //cdr3
1797  if (annots->m_JDomain[0] > 0 && annots->m_JDomain[1] > 0){
1799  int start = annots->m_JDomain[0];
1800  int subject_start = -1;
1801  int stop = annots->m_JDomain[1];
1802  int subject_stop = -1;
1803 
1804  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1805  (CSeq_loc::TPoint) start,
1806  (CSeq_loc::TPoint) stop);
1807  CRef<CSeq_id> id_holder (new CSeq_id);
1808  temp->subject_seqloc = new CSeq_loc(*id_holder,
1809  (CSeq_loc::TPoint) subject_start,
1810  (CSeq_loc::TPoint) subject_stop);
1811  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1812  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1813  temp->domain_name = domain_name[5];
1814  domain.push_back(temp);
1815  }
1816  //fwr4
1817  if (annots->m_JDomain[2] > 0 && annots->m_JDomain[3] > 0){
1819  int start = annots->m_JDomain[2];
1820  int subject_start = -1;
1821  int stop = annots->m_JDomain[3];
1822  int subject_stop = -1;
1823 
1824  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1825  (CSeq_loc::TPoint) start,
1826  (CSeq_loc::TPoint) stop);
1827  CRef<CSeq_id> id_holder (new CSeq_id);
1828  temp->subject_seqloc = new CSeq_loc(*id_holder,
1829  (CSeq_loc::TPoint) subject_start,
1830  (CSeq_loc::TPoint) subject_stop);
1831  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1832  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1833  temp->domain_name = domain_name[6];
1834  domain.push_back(temp);
1835  }
1836 
1837  //C region
1838 
1839  if (annots->m_CDomain[0] > 0 && annots->m_CDomain[1] > 0 &&
1840  annots->m_JDomain[2] > 0 && annots->m_JDomain[3] > 0){
1842  int start = annots->m_CDomain[0];
1843  int subject_start = -1;
1844  int stop = annots->m_CDomain[1];
1845  int subject_stop = -1;
1846 
1847  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1848  (CSeq_loc::TPoint) start,
1849  (CSeq_loc::TPoint) stop);
1850  CRef<CSeq_id> id_holder (new CSeq_id);
1851  temp->subject_seqloc = new CSeq_loc(*id_holder,
1852  (CSeq_loc::TPoint) subject_start,
1853  (CSeq_loc::TPoint) subject_stop);
1854  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1855  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1856  temp->domain_name = domain_name[7];
1857  domain.push_back(temp);
1858  }
1859 
1860 
1861  CDisplaySeqalign display(*aln_set, *m_Scope, &masklocs, NULL, m_MatrixName);
1862  int num_align_to_show = results.m_NumActualV + results.m_NumActualD +
1863  results.m_NumActualJ + results.m_NumActualC;
1864  if (m_DbName != m_IgOptions->m_Db[0]->GetDatabaseName()){
1865  num_align_to_show += m_NumAlignments;
1866  }
1867  display.SetNumAlignToShow(num_align_to_show);
1868  display.SetMasterDomain(&domain);
1869  display.SetDbName(m_DbName);
1870  display.SetDbType(!m_DbIsAA);
1871  display.SetLineLen(90);
1872 
1873  if (m_LongSeqId) {
1874  display.UseLongSequenceIds();
1875  }
1876 
1877  if (annots->m_FrameInfo[0] >= 0 && m_IgOptions->m_Translate) {
1880  }
1883  vector<string> chain_type_list;
1884  ITERATE(vector<string>, iter, annots->m_ChainType) {
1885  if (*iter=="N/A"){
1886  chain_type_list.push_back(NcbiEmptyString);
1887  } else {
1888  chain_type_list.push_back(*iter);
1889  }
1890  }
1891  display.SetSequencePropertyLabel(&chain_type_list);
1892  // set the alignment flags
1893 
1894  display.SetAlignOption(flags);
1895  if (m_Program == "blastn" || m_Program == "BLASTN") {
1897  } else {
1899  }
1903  TSeqLocInfoVector subj_masks;
1904  results.GetSubjectMasks(subj_masks);
1905  display.SetSubjectMasks(subj_masks);
1906 
1907  if (m_IsHTML) {
1908  display.SetResultPositionIndex(index);
1909  m_Outfile << "\n<CENTER><b><FONT color=\"green\">Alignments</FONT></b></CENTER>"
1910  << endl;
1911 
1912  } else {
1913  m_Outfile << "\nAlignments" << endl;
1914  }
1915 
1916  display.DisplaySeqalign(m_Outfile);
1917 
1918  // print the ancillary data for this query
1919 
1920  x_PrintOneQueryFooter(*results.GetAncillaryData());
1921  if (m_IsHTML) {
1922  m_Outfile << "<hr>" << endl;
1923  }
1924 }
1925 
1926 void
1927 CBlastFormat::x_ReverseQuery(blast::CIgBlastResults& results)
1928 {
1929  if (!results.HasAlignments()){
1930  return;
1931  }
1932  // create a temporary seq_id
1933  CConstRef<CSeq_id> qid = results.GetSeqId();
1934  string new_id = qid->AsFastaString() + "_reversed";
1935 
1936  // create a bioseq
1937  CBioseq_Handle q_bh = m_Scope->GetBioseqHandle(*qid);
1938  int len = q_bh.GetBioseqLength();
1939  CSeq_loc loc(*(const_cast<CSeq_id *>(&*qid)), 0, len-1, eNa_strand_minus);
1940  CRef<CBioseq> q_new(new CBioseq(loc, new_id));
1941  CConstRef<CSeq_id> new_qid = m_Scope->AddBioseq(*q_new).GetSeqId();
1942  if (qid->IsLocal()) {
1943  string title = sequence::CDeflineGenerator().GenerateDefline(q_bh);
1944  if (title != "") {
1945  CRef<CSeqdesc> des(new CSeqdesc());
1947  des->SetTitle("reversed|" + title);
1948  } else {
1949  des->SetTitle(title);
1950  }
1951  m_Scope->GetBioseqEditHandle(*q_new).SetDescr().Set().push_back(des);
1952  }
1953  }
1954 
1955  // set up the mapping
1956  CSeq_loc new_loc(*(const_cast<CSeq_id *>(&*new_qid)), 0, len-1, eNa_strand_plus);
1957  CSeq_loc_Mapper mapper(loc, new_loc, &*m_Scope);
1958 
1959  // replace the alignment with the new query
1960  CRef<CSeq_align_set> align_set(new CSeq_align_set());
1961  ITERATE(CSeq_align_set::Tdata, align, results.GetSeqAlign()->Get()) {
1962  CRef<CSeq_align> new_align = mapper.Map(**align, 0);
1963  align_set->Set().push_back(new_align);
1964  }
1965  results.SetSeqAlign().Reset(&*align_set);
1966 
1967  // reverse IgAnnotations
1968  CRef<CIgAnnotation> &annots = results.SetIgAnnotation();
1969  for (int i=0; i<6; i+=2) {
1970  int start = annots->m_GeneInfo[i];
1971  if (start >= 0) {
1972  annots->m_GeneInfo[i] = len - annots->m_GeneInfo[i+1];
1973  annots->m_GeneInfo[i+1] = len - start;
1974  }
1975  }
1976 
1977  for (int i=0; i<12; ++i) {
1978  int pos = annots->m_DomainInfo[i];
1979  if (pos >= 0) {
1980  annots->m_DomainInfo[i] = max(0, len - 1 - pos);
1981  }
1982  }
1983 
1984  for (int i=0; i<3; ++i) {
1985  int pos = annots->m_FrameInfo[i];
1986  if (pos >= 0) {
1987  annots->m_FrameInfo[i] = len -1 - pos;
1988  }
1989  }
1990 }
1991 
1992 void
1993 CBlastFormat::PrintPhiResult(const blast::CSearchResultSet& result_set,
1995  unsigned int itr_num
1996  /* = numeric_limits<unsigned int>::max() */,
1997  blast::CPsiBlastIterationState::TSeqIds prev_seqids
1998  /* = CPsiBlastIterationState::TSeqIds() */)
1999 {
2000  // For remote searches, we don't retrieve the sequence data for the query
2001  // sequence when initially sending the request to the BLAST server (if it's
2002  // a GI/accession/TI), so we flush the scope so that it can be retrieved
2003  // (needed if a self-hit is found) again. This is not applicable if the
2004  // query sequence(s) are specified as FASTA (will be identified by local
2005  // IDs).
2006  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
2008  }
2009 
2018  {
2019  ITERATE(CSearchResultSet, result, result_set) {
2020  x_PrintStructuredReport(**result, queries);
2021  }
2022  return;
2023  }
2024 
2025  ITERATE(CSearchResultSet, result, result_set) {
2026  if ((**result).HasErrors()) {
2027  m_Outfile << "\n" << (**result).GetErrorStrings() << "\n";
2028  return; // errors are deemed fatal
2029  }
2030  if ((**result).HasWarnings()) {
2031  m_Outfile << "\n" << (**result).GetWarningStrings() << "\n";
2032  }
2033  }
2034 
2038  ITERATE(CSearchResultSet, result, result_set) {
2039  x_PrintTabularReport(**result, itr_num);
2040  }
2041  return;
2042  }
2044  string reportCaption = "Tax BLAST report";
2046  m_Outfile << reportCaption;
2047  ITERATE(CSearchResultSet, result, result_set) {
2049  }
2050  return;
2051  }
2052 
2053  const CSearchResults& first_results = result_set[0];
2054 
2055  if (itr_num != numeric_limits<unsigned int>::max()) {
2056  m_Outfile << "Results from round " << itr_num << "\n";
2057  }
2058 
2059  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*first_results.GetSeqId(),
2061  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
2062 
2063  // print the preamble for this query
2064 
2065  m_Outfile << "\n\n";
2066  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
2068  m_IsHTML, false,
2069  first_results.GetRID());
2070 
2072  string reportCaption = "Tax BLAST report";
2074  m_Outfile << reportCaption;
2075  ITERATE(CSearchResultSet, result, result_set) {
2077  }
2078  return;
2079  }
2080 
2081  const SPHIQueryInfo *phi_query_info = first_results.GetPhiQueryInfo();
2082 
2083  if (phi_query_info)
2084  {
2085  vector<int> offsets;
2086  for (int index=0; index<phi_query_info->num_patterns; index++)
2087  offsets.push_back(phi_query_info->occurrences[index].offset);
2088 
2089  CBlastFormatUtil::PrintPhiInfo(phi_query_info->num_patterns,
2090  string(phi_query_info->pattern),
2091  phi_query_info->probability,
2092  offsets, m_Outfile);
2093  }
2094 
2095  // quit early if there are no hits
2096  if ( !first_results.HasAlignments() ) {
2097  m_Outfile << "\n\n"
2098  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
2099  << "\n\n";
2100  x_PrintOneQueryFooter(*first_results.GetAncillaryData());
2101  return;
2102  }
2103 
2104  _ASSERT(first_results.HasAlignments());
2105  //-------------------------------------------------
2106 
2107  ITERATE(CSearchResultSet, result, result_set)
2108  {
2109  CConstRef<CSeq_align_set> aln_set = (**result).GetSeqAlign();
2110  x_DisplayDeflines(aln_set, itr_num, prev_seqids);
2111  }
2112 
2113  //-------------------------------------------------
2114  // print the alignments
2115  m_Outfile << "\n";
2116 
2117 
2119  (m_IsBl2Seq && !m_IsDbScan), false);
2120 
2121  if (phi_query_info)
2122  {
2123  SPHIPatternInfo *occurrences = phi_query_info->occurrences;
2124  int index;
2125  for (index=0; index<phi_query_info->num_patterns; index++)
2126  {
2127  list <CDisplaySeqalign::FeatureInfo*> phiblast_pattern;
2128  CSeq_id* id = new CSeq_id;
2129  id->Assign(*(result_set[index]).GetSeqId());
2131  feature_info->seqloc = new CSeq_loc(*id, (TSeqPos) occurrences[index].offset,
2132  (TSeqPos) (occurrences[index].offset + occurrences[index].length - 1));
2133  feature_info->feature_char = '*';
2134  feature_info->feature_id = "pattern";
2135  phiblast_pattern.push_back(feature_info);
2136 
2137  m_Outfile << "\nSignificant alignments for pattern occurrence " << index+1
2138  << " at position " << 1+occurrences[index].offset << "\n\n";
2139 
2140  TMaskedQueryRegions masklocs;
2141  result_set[index].GetMaskedQueryRegions(masklocs);
2142  CConstRef<CSeq_align_set> aln_set = result_set[index].GetSeqAlign();
2143  CSeq_align_set copy_aln_set;
2144  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
2145 
2146  CDisplaySeqalign display(copy_aln_set, *m_Scope, &masklocs, &phiblast_pattern,
2147  m_MatrixName);
2148 
2149  display.SetDbName(m_DbName);
2150  display.SetDbType(!m_DbIsAA);
2151  display.SetLineLen(m_LineLength);
2152 
2153  // set the alignment flags
2154  display.SetAlignOption(flags);
2155 
2156  if (m_LongSeqId) {
2157  display.UseLongSequenceIds();
2158  }
2159 
2160  if (m_Program == "blastn" || m_Program == "megablast") {
2163  }
2164  else {
2167  }
2168 
2172  display.DisplaySeqalign(m_Outfile);
2173  m_Outfile << "\n";
2174 
2175  NON_CONST_ITERATE(list<CDisplaySeqalign::FeatureInfo*>, itr, phiblast_pattern) {
2176  delete *itr;
2177  }
2178  }
2179  }
2180 
2181  // print the ancillary data for this query
2182 
2183  x_PrintOneQueryFooter(*first_results.GetAncillaryData());
2184 }
2185 
2186 
2187 
2188 void
2189 CBlastFormat::PrintEpilog(const blast::CBlastOptions& options)
2190 {
2193  if(!m_AccumulatedResults.empty()) {
2194  CRef <CBlastSearchQuery> q = m_AccumulatedQueries->GetBlastSearchQuery(0);
2195  if(m_IsBl2Seq) {
2198  x_WriteXML2(report_data);
2199  }
2200  else if(m_IsIterative){
2202  m_Scope, m_DbInfo);
2203  x_WriteXML2(report_data);
2204  }
2205  m_AccumulatedResults.clear();
2206  m_AccumulatedQueries->clear();
2207  }
2211  }
2212  else {
2214  }
2215  return;
2216  }
2217 
2221  return;
2222  } else if (m_FormatType >= CFormattingArgs::eTabular)
2223  return; // No footer for these.
2224 
2225  // Most of XML is printed as it's finished.
2226  // the epilog closes the report.
2229  m_AccumulatedResults.clear();
2230  m_AccumulatedQueries->clear();
2231  return;
2232  }
2233 
2234  m_Outfile << NcbiEndl << NcbiEndl;
2235  if (m_Program == "deltablast" && !m_DomainDbInfo.empty()) {
2236  m_Outfile << "Conserved Domain";
2237  CBlastFormatUtil::PrintDbReport(m_DomainDbInfo, kFormatLineLength,
2238  m_Outfile, false);
2239  }
2240 
2241  if ( !m_IsBl2Seq || m_IsDbScan) {
2242  CBlastFormatUtil::PrintDbReport(m_DbInfo, kFormatLineLength,
2243  m_Outfile, false);
2244  }
2245 
2246  if (m_Program == "blastn" || m_Program == "megablast") {
2247  m_Outfile << "\n\nMatrix: " << "blastn matrix " <<
2248  options.GetMatchReward() << " " <<
2249  options.GetMismatchPenalty() << "\n";
2250  }
2251  else {
2252  m_Outfile << "\n\nMatrix: " << options.GetMatrixName() << "\n";
2253  }
2254 
2255  if (options.GetGappedMode() == true) {
2256  double gap_extension = (double) options.GetGapExtensionCost();
2257  if ((m_Program == "megablast" || m_Program == "blastn") && options.GetGapExtensionCost() == 0)
2258  { // Formula from PMID 10890397 applies if both gap values are zero.
2259  gap_extension = -2*options.GetMismatchPenalty() + options.GetMatchReward();
2260  gap_extension /= 2.0;
2261  }
2262  m_Outfile << "Gap Penalties: Existence: "
2263  << options.GetGapOpeningCost() << ", Extension: "
2264  << gap_extension << "\n";
2265  }
2266  if (options.GetWordThreshold()) {
2267  m_Outfile << "Neighboring words threshold: " <<
2268  options.GetWordThreshold() << "\n";
2269  }
2270  if (options.GetWindowSize()) {
2271  m_Outfile << "Window for multiple hits: " <<
2272  options.GetWindowSize() << "\n";
2273  }
2274 
2275  if (m_IsHTML) {
2276  m_Outfile << kHTML_Suffix << "\n";
2277  }
2278 }
2279 
2281 {
2282  // Do not reset the scope for BLAST2Sequences or else we'll loose the
2283  // sequence data! (see x_CreateSubjectBioseq)
2284  if (m_IsBl2Seq){
2285  return;
2286  }
2287 
2288  // Our current XML/ASN.1 libraries do not have provisions for
2289  // incremental object input/output, so with XML output format we
2290  // need to accumulate the whole document before writing any data.
2291 
2292  // This means that XML output requires more memory than other
2293  // output formats.
2294 
2296  {
2298  }
2299 }
2300 
2301 static string s_GetBaseName(const string & baseFile, bool isXML, bool withPath)
2302 {
2303  string dir = kEmptyStr;
2304  string base = kEmptyStr;
2305  string ext = kEmptyStr;
2306  CDirEntry::SplitPath(baseFile, withPath ? &dir:NULL, &base, &ext );
2307  if(!((isXML && NStr::CompareNocase(ext, ".xml") == 0 ) ||
2308  (!isXML && NStr::CompareNocase(ext, ".json") == 0))){
2309  base += ext;
2310  }
2311  if(withPath)
2312  return dir + base;
2313 
2314  return base;
2315 }
2316 
2318 {
2320  BlastXML2_FormatReport(&report_data, &m_Outfile);
2321  }
2322  else if (m_FormatType == CFormattingArgs::eJson_S) {
2323  m_XMLFileCount++;
2324  if(m_XMLFileCount > 1) {
2325  m_Outfile << ",\n";
2326  }
2327  BlastJSON_FormatReport(&report_data, &m_Outfile);
2328  }
2329  else {
2330  m_XMLFileCount++;
2331 
2333  string file_name = s_GetBaseName(m_BaseFile, true, true) + "_" + NStr::IntToString(m_XMLFileCount) + ".xml";
2334  BlastXML2_FormatReport(&report_data, file_name);
2335  }
2336  else {
2337  string file_name = s_GetBaseName(m_BaseFile, false, true) + "_" + NStr::IntToString(m_XMLFileCount) + ".json";
2338  BlastJSON_FormatReport(&report_data, file_name);
2339  }
2340  }
2341 }
2342 
2343 void CBlastFormat::x_PrintXML2Report(const blast::CSearchResults& results,
2345 {
2346  CRef<CSearchResults> res(const_cast<CSearchResults*>(&results));
2347  res->TrimSeqAlign(m_HitlistSize);
2348  if((m_IsIterative) || (m_IsBl2Seq)) {
2349  if(m_AccumulatedResults.empty()) {
2350  _ASSERT(m_AccumulatedQueries->size() == 0);
2351  m_AccumulatedResults.push_back(res);
2352  CConstRef<CSeq_id> query_id = results.GetSeqId();
2353  ITERATE(CBlastQueryVector, itr, *queries) {
2354  if (query_id->Match(*(*itr)->GetQueryId())) {
2355  m_AccumulatedQueries->push_back(*itr);
2356  break;
2357  }
2358  }
2359  }
2360  else {
2361  CConstRef<CSeq_id> query_id = results.GetSeqId();
2362  if(m_AccumulatedResults[0].GetSeqId()->Match(*query_id)) {
2363  m_AccumulatedResults.push_back(res);
2364  }
2365  else {
2366  CRef <CBlastSearchQuery> q = m_AccumulatedQueries->GetBlastSearchQuery(0);
2367  if(m_IsBl2Seq) {
2370  x_WriteXML2(report_data);
2371  }
2372  else {
2374  m_Scope, m_DbInfo);
2375  x_WriteXML2(report_data);
2376  }
2377  m_AccumulatedResults.clear();
2378  m_AccumulatedQueries->clear();
2379 
2380  m_AccumulatedResults.push_back(res);
2381  ITERATE(CBlastQueryVector, itr, *queries) {
2382  if (query_id->Match(*(*itr)->GetQueryId())) {
2383  m_AccumulatedQueries->push_back(*itr);
2384  break;
2385  }
2386  }
2387  }
2388  }
2389  }
2390  else {
2392  CConstRef<CSeq_id> query_id = results.GetSeqId();
2393  ITERATE(CBlastQueryVector, itr, *queries) {
2394  if (query_id->Match(*(*itr)->GetQueryId())) {
2395  q = *itr;
2396  break;
2397  }
2398  }
2399  CCmdLineBlastXML2ReportData report_data (q, *res, m_Options, m_Scope, m_DbInfo);
2400  x_WriteXML2(report_data);
2401  }
2402 }
2403 
2405 {
2407  m_Outfile << "</BlastXML2>\n";
2408  return;
2409  }
2410 
2411  m_Outfile << "<?xml version=\"1.0\"?>\n<BlastXML2\n"
2412  "xmlns=\"http://www.ncbi.nlm.nih.gov\"\n"
2413  "xmlns:xi=\"http://www.w3.org/2003/XInclude\"\n"
2414  "xmlns:xs=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
2415  "xs:schemaLocation=\"http://www.ncbi.nlm.nih.gov http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/NCBI_BlastOutput2.xsd\">\n";
2416 
2417  string base = s_GetBaseName(m_BaseFile, true, false);
2418  for(int i = 1; i <= m_XMLFileCount; i ++) {
2419  string file_name = base + "_" + NStr::IntToString(i) + ".xml";
2420  m_Outfile << "\t<xi:include href=\"" + file_name + "\"/>\n";
2421  }
2422  m_Outfile << "</BlastXML2>\n";
2423 }
2424 
2426 {
2428  m_Outfile << "]\n}\n";
2429  return;
2430  }
2431 
2432  m_Outfile << "{\n\t\"BlastJSON\": [\n";
2433 
2434  string base = s_GetBaseName(m_BaseFile, true, false);
2435  for(int i = 1; i <= m_XMLFileCount; i ++) {
2436  string file_name = base + "_" + NStr::IntToString(i) + ".json";
2437  m_Outfile << "\t\t{\"File\": \"" + file_name + "\" }";
2438  if(i != m_XMLFileCount)
2439  m_Outfile << ",";
2440  m_Outfile << "\n";
2441  }
2442  m_Outfile << "\t]\n}";
2443 }
2444 
2446 {
2447  CSAM_Formatter::SProgramInfo pg("0", blast::CBlastVersion().Print(), m_Cmdline);
2448  pg.m_Name = m_Program;
2451 }
2452 
2454 {
2455  if (program == eBlastp || program == eTblastn ||
2456  program == ePSIBlast || program == ePSITblastn ||
2457  program == eRPSBlast || program == eRPSTblastn ||
2458  program == eBlastx || program == eDeltaBlast) {
2459  return true;
2460  }
2461  return false;
2462 }
2463 
2465 {
2466  if (report.IsEnabled()) {
2468  EProgram task = m_Options->GetProgram();
2469  report.AddParam(CBlastUsageReport::eEvalueThreshold, m_Options->GetEvalueThreshold());
2470  report.AddParam(CBlastUsageReport::eHitListSize, m_Options->GetHitlistSize());
2472 
2473  if (s_SetCompBasedStats(task)) {
2474  report.AddParam(CBlastUsageReport::eCompBasedStats, m_Options->GetCompositionBasedStats());
2475  }
2476 
2477  int num_seqs = 0;
2478  for (size_t i = 0; i < m_DbInfo.size(); i++) {
2479  num_seqs += m_DbInfo[i].number_seqs;
2480  }
2481  if( m_IsBl2Seq) {
2482  report.AddParam(CBlastUsageReport::eBl2seq, "true");
2483  if (m_IsDbScan) {
2484  report.AddParam(CBlastUsageReport::eNumSubjects, num_seqs);
2486  }
2487  else if (m_SeqInfoSrc.NotEmpty()){
2488  report.AddParam(CBlastUsageReport::eNumSubjects, (int) m_SeqInfoSrc->Size());
2489  int total_subj_length = 0;
2490  for (size_t i = 0; i < m_SeqInfoSrc->Size(); i++) {
2491  total_subj_length += (int)m_SeqInfoSrc->GetLength(static_cast<Uint4>(i));
2492  }
2493  report.AddParam(CBlastUsageReport::eSubjectsLength, total_subj_length);
2494  }
2495  }
2496  else {
2497  string dir = kEmptyStr;
2498  CFile::SplitPath(m_DbName, &dir);
2499  string db_name = m_DbName;
2500  if (dir != kEmptyStr) {
2501  db_name = m_DbName.substr(dir.length());
2502  }
2503 
2504  if (db_name.size() > 500) {
2505  db_name.resize(500);
2507  }
2508  report.AddParam(CBlastUsageReport::eDBName, db_name);
2510  report.AddParam(CBlastUsageReport::eDBNumSeqs, num_seqs);
2511  report.AddParam(CBlastUsageReport::eDBDate, m_DbInfo[0].date);
2512  if(m_SearchDb.NotEmpty()){
2513  if(m_SearchDb->GetGiList().NotEmpty()) {
2514  CRef<CSeqDBGiList> l = m_SearchDb->GetGiList();
2515  if (l->GetNumGis()) {
2516  report.AddParam(CBlastUsageReport::eGIList, true);
2517  }
2518  if (l->GetNumSis()){
2520  }
2521  if (l->GetNumTaxIds()){
2523  }
2524  if (l->GetNumPigs()) {
2525  report.AddParam(CBlastUsageReport::eIPGList, true);
2526  }
2527  }
2528  if(m_SearchDb->GetNegativeGiList().NotEmpty()) {
2529  CRef<CSeqDBGiList> l = m_SearchDb->GetNegativeGiList();
2530  if (l->GetNumGis()) {
2532  }
2533  if (l->GetNumSis()){
2535  }
2536  if (l->GetNumTaxIds()){
2538  }
2539  if (l->GetNumPigs()) {
2541  }
2542  }
2543  }
2544  }
2545  }
2546 }
static CRef< CScope > m_Scope
Produce formatted blast output for command line applications.
Formatting of pairwise sequence alignments in XML form.
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
@ eNoSubjMasking
Definition: blast_def.h:236
bool s_SetCompBasedStats(EProgram program)
int s_SetFlags(string &program, blast::CFormattingArgs::EOutputFormat format_type, bool html, bool showgi, bool isbl2seq, bool disableKAStats)
static const string kHTML_Suffix
USING_SCOPE(blast)
static string s_GetBaseName(const string &baseFile, bool isXML, bool withPath)
static void s_SetCloneInfo(const CIgBlastTabularInfo &tabinfo, const CBioseq_Handle &handle, CBlastFormat::SClone &clone_info)
static string s_GetMolType(const CBioseq_Handle &bioseqHandle)
static const string kHTML_Prefix
bool s_IsGlobalSeqAlign(CConstRef< objects::CSeq_align_set > seqalign_set)
USING_NCBI_SCOPE
static bool s_HasLocalIDs(CConstRef< CBlastQueryVector > queries)
Auxialiary function to determine if there are local IDs in the identifiers of the query sequences.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Definition: blast_program.c:53
@ eBlastTypePsiBlast
Definition: blast_program.h:82
Int4 BlastSeqSrcGetNumSeqs(const BlastSeqSrc *seq_src)
Get the number of sequences contained in the sequence source.
Definition: blast_seqsrc.c:177
Int8 BlastSeqSrcGetTotLen(const BlastSeqSrc *seq_src)
Get the total length of all sequences in the sequence source.
Definition: blast_seqsrc.c:219
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ ePSIBlast
PSI Blast.
Definition: blast_types.hpp:67
@ eTblastn
Protein-Translated nucl.
Definition: blast_types.hpp:61
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
@ ePSITblastn
PSI Tblastn.
Definition: blast_types.hpp:68
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
Definition: blast_types.hpp:66
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
Definition: blast_types.hpp:64
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
build_archive declarations
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matrix for the provided matrix name.
@ eAddEOLAtLineEnd
add EOL at the beginning of the string
@ eAddEOLAtLineStart
add spaces at the end of the string
static string AddSpaces(string paramVal, size_t maxParamLength, int spacesFormatFlag=eSpacePosToCenter)
Calculate the number of spaces and add them to paramVal.
static string MapTemplate(string inpString, string tmplParamName, Int8 templParamVal)
Replace template tags by real data.
static void ExtractSeqAlignForSeqList(CRef< objects::CSeq_align_set > &all_aln_set, string alignSeqList)
extract seq_align_set coreesponding to seqid list
CBioseq_Handle –.
void LogBlastSearchInfo(blast::CBlastUsageReport &report)
void x_PrintOneQueryFooter(const blast::CBlastAncillaryData &summary)
Output the ancillary data for one query that was searched.
size_t m_LineLength
TSeqRange m_QueryRange
blast::CFormattingArgs::EOutputFormat m_FormatType
Format type.
string m_SubjectTag
Tag for subject sequences (e.g., name of input file)
void x_CreateDeflinesJson(CConstRef< CSeq_align_set > aln_set)
bool m_IsVdb
Flag indicate vdb search.
CConstRef< blast::CIgBlastOptions > m_IgOptions
Used by Igblast formatting.
string m_CustomOutputFormatSpec
The custom output format specification.
vector< CBlastFormatUtil::SDbInfo > m_DomainDbInfo
Information about DELTA-BLAST domain database.
void x_DisplayDeflinesWithTemplates(CConstRef< CSeq_align_set > aln_set)
bool m_IsUngappedSearch
true if the search was ungapped
void x_WriteXML2(CCmdLineBlastXML2ReportData &report_data)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
CRef< SBlastXMLIncremental > m_BlastXMLIncremental
Structure for incremental XML BLAST output.
CRef< blast::IBlastSeqInfoSrc > m_SeqInfoSrc
Used to retrieve subject sequence information.
CRef< blast::CSearchDatabase > m_SearchDb
CRef< CScope > m_Scope
Scope containing query and subject sequences.
bool m_IsBl2Seq
True if we are formatting for BLAST2Sequences.
void PrintReport(const blast::CSearchResults &results, CBlastFormat::DisplayOption displayOption)
Print Metadata in json format or descriptions in html format or alignments in html format app....
CConstRef< objects::CBioseq > x_CreateSubjectBioseq()
Creates a bioseq to be able to print the acknowledgement for the subject bioseq when formatting bl2se...
CRef< objects::CSeq_annot > x_WrapAlignmentInSeqAnnot(CConstRef< objects::CSeq_align_set > alnset, const string &db_title) const
Wrap the Seq-align-set to be printed in a Seq-annot (as the C toolkit binaries)
void x_ConfigCShowBlastDefline(align_format::CShowBlastDefline &showdef, int skip_from=-1, int skip_to=-1, int index=-1, int num_descriptions_to_show=-1)
Configure the CShowBlastDefline instance passed to it.
const char * m_MatrixName
name of scoring matrix
bool m_IsRemoteSearch
True if this object is formatting the results of a remote search.
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void x_InitAlignTemplates(void)
void x_ReverseQuery(blast::CIgBlastResults &results)
Replace the query with its reversed-compliement.
CBlastFormat(const blast::CBlastOptions &opts, blast::CLocalDbAdapter &db_adapter, blast::CFormattingArgs::EOutputFormat format_type, bool believe_query, CNcbiOstream &outfile, int num_summary, int num_alignments, CScope &scope, const char *matrix_name=BLAST_DEFAULT_MATRIX, bool show_gi=false, bool is_html=false, int qgencode=BLAST_GENETIC_CODE, int dbgencode=BLAST_GENETIC_CODE, bool use_sum_statistics=false, bool is_remote_search=false, int dbfilt_algorithm=-1, const string &custom_output_format=kEmptyStr, bool is_megablast=false, bool is_indexed=false, const blast::CIgBlastOptions *ig_opts=NULL, const blast::CLocalDbAdapter *domain_db_adapter=NULL, const string &cmdline=kEmptyStr, const string &subjectTag=kEmptyStr)
Constructor.
void x_DisplayAlignsWithTemplates(CConstRef< CSeq_align_set > aln_set, const blast::CSearchResults &results)
int m_DbGenCode
database genetic code
int m_NumAlignments
number of database sequences to present alignments for.
void ResetScopeHistory()
Resets the scope history for some output formats.
bool m_DbIsAA
true if database has protein sequences
void x_PrintStructuredReport(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries)
Prints XML and both species of ASN.1.
void x_DisplayDeflines(CConstRef< CSeq_align_set > aln_set, unsigned int itr_num, blast::CPsiBlastIterationState::TSeqIds &prev_seqids, int additional=0, int index=-1, int defline_length=-1)
Display the BLAST deflines in the traditional BLAST report.
void x_GenerateXML2MasterFile(void)
bool m_ShowGi
add GI number of database sequence IDs
CShowBlastDefline::SDeflineTemplates * m_DeflineTemplates
CConstRef< blast::CBlastOptions > m_Options
BLAST options.
DisplayOption
Display options for blast_report.
@ eDescriptionsWithTemplates
int m_HitlistSize
number of database sequences to save results for
string m_Program
blast program
int m_QueryGenCode
query genetic code
int m_QueriesFormatted
Used to count number of searches formatted.
void x_PrintXML2Report(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries)
CNcbiOstream & m_Outfile
stream to receive output
vector< CBlastFormatUtil::SDbInfo > m_DbInfo
internal representation of database information
bool x_IsVdbSearch() const
static void PrintArchive(CRef< objects::CBlast4_archive > archive, CNcbiOstream &out)
Auxiliary function to print the BLAST Archive in multiple output formats.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
~CBlastFormat()
Class destructor.
int m_NumSummary
number of 1-line summaries
bool m_IsHTML
true if HTML output desired
bool m_DisableKAStats
Modify format to accomodate results that do not contain KA stats such as bitscores and evalues.
bool m_Megablast
true if megablast was used.
CNcbiMatrix< int > m_ScoringMatrix
Scoring matrix instantiated from m_MatrixName.
CDisplaySeqalign::SAlignTemplates * m_AlignTemplates
void x_InitDeflineTemplates(void)
string m_DbName
name of blast database
bool m_IndexedMegablast
true if indexed megablast was used.
void x_PrintTaxReport(const blast::CSearchResults &results)
void x_GenerateJSONMasterFile(void)
bool m_LongSeqId
If true, print long sequence ids (database|accession)
string m_AlignSeqList
void x_InitSAMFormatter()
CRef< blast::CBlastQueryVector > m_AccumulatedQueries
Queries are required for XML format only.
@ kFormatLineLength
The line length of pairwise blast output.
bool m_ShowLinkedSetSize
If the output format supports 1-line summaries, the search is ungapped and the alignments have had HS...
string m_CustomDelim
Int8 GetDbTotalLength()
Get total length of the database.
IOS_BASE::iostate m_OrigExceptionMask
bool m_BelieveQuery
true if query sequence IDs are parsed
void PrintProlog()
Print the header of the blast report.
void x_PrintAirrRearrangement(const blast::CIgBlastResults &results, SClone &clone_info, bool fill_clone_info, bool print_airr_format_header)
Prints AirrRearrangement format.
unique_ptr< CBlast_SAM_Formatter > m_SamFormatter
Pointer to the SAM formatting object.
bool m_IsDbScan
True if it is m_IsBl2Seq in dbscan mode.
void x_PrintIgTabularReport(const blast::CIgBlastResults &results, SClone &clone_info, bool fill_clone_info)
Prints IgTabular report for one query.
void x_SplitSeqAlign(CConstRef< CSeq_align_set > full_alignment, CSeq_align_set &repeated_seqs, CSeq_align_set &new_seqs, blast::CPsiBlastIterationState::TSeqIds &prev_seqids)
Split the full alignment into two sets of alignments: one for those seen in the previous iteration an...
blast::CSearchResultSet m_AccumulatedResults
Accumulated results to display in XML format.
void x_PrintTabularReport(const blast::CSearchResults &results, unsigned int itr_num)
Prints Tabular report for one query.
void x_SetAlignParameters(CDisplaySeqalign &cds)
void PrintPhiResult(const blast::CSearchResultSet &result_set, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds())
Print all alignment information for aa PHI-BLAST run.
Query Vector.
Definition: sseqloc.hpp:276
Search Query.
Definition: sseqloc.hpp:147
Class containing information needed for tabular formatting of BLAST results.
Definition: tabular.hpp:55
void SetQueryGeneticCode(int q_gc)
Definition: tabular.hpp:184
void SetParseSubjectDefline(bool val)
Should subject deflien be parsed for id or not?
Definition: tabular.hpp:174
void PrintNumProcessed(int num_queries)
Prints number of queries processed.
Definition: tabular.cpp:1280
EFieldDelimiter
What delimiter to use between fields in each row of the tabular output.
Definition: tabular.hpp:66
@ eComma
Comma.
Definition: tabular.hpp:69
void SetCustomDelim(string customDelim)
Definition: tabular.hpp:135
void SetQueryRange(TSeqRange &q_range)
Set query range.
Definition: tabular.hpp:189
void SetParseLocalIds(bool val)
Should local IDs be parsed or not?
Definition: tabular.hpp:170
virtual void Print(void)
Print one line of tabular output.
Definition: tabular.cpp:1094
int SetFields(const objects::CSeq_align &sal, objects::CScope &scope, CNcbiMatrix< int > *matrix=0)
Set all member fields, given a Seq-align.
Definition: tabular.cpp:743
void PrintHeader(const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Print the tabular output header.
Definition: tabular.cpp:1225
void SetNoFetch(bool nofetch)
Avoid fetching sequence (if possible) If the sequence is needed (e.g., will be formatted,...
Definition: tabular.hpp:502
void SetDbGeneticCode(int db_gc)
Definition: tabular.hpp:185
void AddParam(EUsageParams p, int val)
Strategy class to gather the data for generating BLAST XML output.
Strategy class to gather the data for generating BLAST XML output.
CConstRef –.
Definition: ncbiobj.hpp:1266
Class for computing sequences' titles ("definitions").
@ eShowAlignStatsForMultiAlignView
Definition: showalign.hpp:180
void SetLineLen(size_t len)
number of bases or amino acids per line
Definition: showalign.hpp:305
void SetQueryNumber(int number)
for linking to mapviewer
Definition: showalign.hpp:388
void SetSeqLocColor(SeqLocColorOption option=eBlack)
color for seqloc display such as masked region
Definition: showalign.hpp:298
void SetMasterDomain(list< CRef< DomainInfo > > *domain)
Definition: showalign.hpp:455
void SetMiddleLineStyle(MiddleLineStyle option=eBar)
set middle line style
Definition: showalign.hpp:323
void SetSeqLocChar(SeqLocCharOption option=eX)
character style for seqloc display such as masked region
Definition: showalign.hpp:291
void SetAlignOption(int option)
Set functions.
Definition: showalign.hpp:283
void SetTranslatedFrameForLocalSeq(TranslatedFrameForLocalSeq frame)
Definition: showalign.hpp:191
void DisplaySeqalign(CNcbiOstream &out)
call this to display seqalign
Definition: showalign.cpp:1910
void SetDbType(bool is_na)
database type.
Definition: showalign.hpp:360
void SetResultPositionIndex(int index)
Definition: showalign.hpp:466
void SetDbName(string name)
set blast database name
Definition: showalign.hpp:353
void SetSequencePropertyLabel(const vector< string > *SequencePropertyLabel, EOwnership ownership=eNoOwnership)
Definition: showalign.hpp:460
void SetAlignType(AlignType type)
Needed only if you want to display positives and strand.
Definition: showalign.hpp:346
static CRef< objects::CSeq_align_set > PrepareBlastUngappedSeqalign(const objects::CSeq_align_set &alnset)
static functions Need to call this if the seqalign is stdseg or dendiag for ungapped blast alignment ...
Definition: showalign.cpp:3159
void SetMasterGeneticCode(int code)
Set genetic code for master sequence.
Definition: showalign.hpp:330
void SetSlaveGeneticCode(int code)
Set Genetic cod for slaves.
Definition: showalign.hpp:337
void SetNumAlignToShow(int num)
Display top num seqalign Note this only limit the number of seqalign regardless of the seqids.
Definition: showalign.hpp:316
void SetSubjectMasks(const TSeqLocInfoVector &masks)
Sets the masks and the masking algorithm used for the subject sequences.
Definition: showalign.cpp:1047
void UseLongSequenceIds(void)
Sets usage of long sequence ids (database|accession)
Definition: showalign.hpp:420
void SetAlignTemplates(SAlignTemplates *alignTemplates)
Definition: showalign.hpp:452
@ eJsonSeqalign
JSON seq-align.
@ eJson
JSON XInclude.
@ eTabular
Tabular output.
@ eXml2
XML2 XInclude.
@ eSAM
SAM format.
@ eCommaSeparatedValues
Comma-separated values.
@ eAsnText
ASN.1 text output.
@ eAirrRearrangement
igblast AIRR rearrangement, 19
@ eXml2_S
XML2 single file.
@ eJson_S
JSON2 single file.
@ eXml
XML output.
@ eAsnBinary
ASN.1 binary output.
@ eFlatQueryAnchoredNoIdentities
@ eFlatQueryAnchoredIdentities
Flat query anchored no identities.
@ eQueryAnchoredNoIdentities
Flat query anchored showing identities.
@ eQueryAnchoredIdentities
Query anchored no identities.
@ eTabularWithComments
Tabular output with comments.
Class containing information needed for tabular formatting of BLAST results.
Definition: tabular.hpp:515
void PrintMasterAlign(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &header="# ") const
Print domain information.
Definition: tabular.cpp:2701
int SetFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for all other alignments.
Definition: tabular.cpp:2397
const vector< SIgDomain * > & GetIgDomains() const
Get Ig domain.
Definition: tabular.hpp:698
void PrintHeader(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &domain_sys, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Definition: tabular.cpp:1480
virtual void Print(void)
Override the print method.
Definition: tabular.cpp:2695
void SetIgAnnotation(const CRef< blast::CIgAnnotation > &annot, const CConstRef< blast::CIgBlastOptions > &ig_opts, CConstRef< CSeq_align_set > &align_result, CScope &scope)
One method to set all annotation information.
Definition: tabular.cpp:2500
int SetMasterFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for master alignment.
Definition: tabular.cpp:2374
void PrintHtmlSummary(const CConstRef< blast::CIgBlastOptions > &ig_opts) const
Print Html style summary.
Definition: tabular.cpp:2774
void SetAirrFormatData(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const CBioseq_Handle &query_handle, CConstRef< CSeq_align_set > align_result, const CConstRef< blast::CIgBlastOptions > &ig_opts)
Definition: tabular.cpp:1982
void GetIgInfo(string &v, string &d, string &j, string &c, string &master_chain_to_show, string &cdr3_nuc, string &cdr3_aa, string &productive) const
Getter.
Definition: tabular.hpp:679
void PrintAirrRearrangement(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const string &program_version, const CBioseq &query_bioseq, const string &dbname, const string &domain_sys, const string &rid, unsigned int iteration, const CSeq_align_set *align_set, CConstRef< CBioseq > subj_bioseq, CNcbiMatrix< int > *matrix, bool print_airr_format_header, const CConstRef< blast::CIgBlastOptions > &ig_opts)
Definition: tabular.cpp:2333
CJson_Object push_back_object(void)
Add object type element to the end of the array.
bool Write(std::ostream &out, TJson_Write_Flags flags=fJson_Write_IndentWithSpace, unsigned int indent_char_count=4) const
Write JSON data into a stream.
CJson_Object SetObject(void)
Get JSON object contents of the node.
CJson_Array insert_array(const CJson_Node::TKeyType &name)
Insert array type element into the object.
void insert(const CJson_Node::TKeyType &name)
Insert null element into the object.
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiEnvironment –.
Definition: ncbienv.hpp:110
CNcbiRegistry –.
Definition: ncbireg.hpp:913
NCBI C++ Object Manager dependant implementation of IQueryFactory.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Search Results for All Queries.
Search Results for One Query.
CSeqDB.
Definition: seqdb.hpp:161
@ eNucleotide
Definition: seqdb.hpp:175
@ eProtein
Definition: seqdb.hpp:174
string GetTitle() const
Returns the database title.
Definition: seqdb.cpp:630
bool IsEmpty() const
Tdata::size_type Size() const
CSeq_loc_Mapper –.
This class displays the defline for BLAST result.
Definition: showdefline.hpp:67
void SetDeflineTemplates(SDeflineTemplates *deflineTemplates)
Set this if defline tempaltes are used Param deflineTemplates: struct containg defline templates info...
void SetDbType(bool is_na)
Set this for constructing seqid url.
void DisplayBlastDefline(CNcbiOstream &out)
Display defline.
void SetResultPosIndex(int index)
set and add result position index to <name=seqid> in score quick link for multiple result case
void SetOption(int option)
options per DisplayOption
void SetupPsiblast(TIdString2SeqStatus *seq_status=NULL, PsiblastStatus status=eFirstPass)
Set psiblast specific options.
void Init(void)
Initialize defline params.
void SetQueryNumber(int number)
Set this for linking to mapviewer.
vector< CShowBlastDefline::SDeflineFormattingInfo * > GetFormattingInfo(void)
Get deflines formatting info.
void SetSkipRange(int from, int to)
Skip certain seqaligns (only used in Igblast)
void Display(CNcbiOstream &out)
Display defline.
@ eNewPass
Sequences are newly found in current pass.
@ eRepeatPass
Sequences were found in previous pass.
void SetDbName(string database)
Set this for constructing seqid url.
void DisplayOrgReport(CNcbiOstream &out)
Displays Organism Report.
Definition: taxFormat.cpp:382
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
static CMemoryRegistry registry
Definition: cn3d_tools.cpp:81
void Print(const CCompactSAMApplication::AlignInfo &ai)
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static uch flags
Implementation of interface class to produce data required for generating BLAST XML2 output.
Implementation of interface class to produce data required for generating BLAST XML output.
const char * file_name[]
std::ofstream out("events_result.xml")
main entry point for tests
const size_t kDfltLineLength
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
int offset
Definition: replacements.h:160
static void des(const char *src, const char *out)
Definition: challenge.c:132
EOutputFormat
Definition: grid_cli.hpp:276
int m_DomainInfo_S[10]
Definition: igblast.hpp:108
string m_ChainTypeToShow
Definition: igblast.hpp:99
CRef< objects::CBlast4_archive > BlastBuildArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const CSearchResultSet &results, CRef< CSearchDatabase > search_db, unsigned int num_iters=0)
Returns a blast archive object.
vector< string > m_ChainType
Definition: igblast.hpp:98
int m_FrameInfo[3]
Definition: igblast.hpp:103
int m_GeneInfo[8]
Definition: igblast.hpp:102
int m_DomainInfo[12]
Definition: igblast.hpp:105
string GetRID() const
Returns the RID for these results (if applicable), otherwise returns an empty string.
CRef< CBlastAncillaryData > GetAncillaryData() const
Accessor for the query's search ancillary.
bool IsLocalId(const objects::CSeq_id *seqid)
Returns true if the CSeq_id is a local id.
Definition: blast_aux.cpp:1094
const SPHIQueryInfo * GetPhiQueryInfo() const
Retrieves PHI-BLAST information about pattern on query.
set< objects::CSeq_id_Handle > TSeqIds
List of CSeq_ids.
ESubjectMaskingType GetMaskType() const
int m_CDomain[2]
Definition: igblast.hpp:113
int m_JDomain[5]
Definition: igblast.hpp:111
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
Definition: blast_aux.cpp:813
CConstRef< objects::CSeq_id > GetSeqId() const
Accessor for the query's sequence identifier.
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
Definition: blast_aux.cpp:757
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
EMoleculeType
Molecule of the BLAST database.
bool HasAlignments() const
Return true if there are any alignments for this query.
@ eCompBasedStats
2001 NAR paper
Definition: version.hpp:74
@ eIndexedMegablast
2008 Bioinformatics on indexed megablast
Definition: version.hpp:76
@ eMegaBlast
2000 J Compt Biol paper
Definition: version.hpp:73
@ eDeltaBlast
2012 Biology Direct on DeltaBLAST
Definition: version.hpp:77
@ eCompAdjustedMatrices
submitted for publication
Definition: version.hpp:75
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
static void BlastPrintVersionInfo(const string program, bool html, CNcbiOstream &out)
Print out blast engine version.
static void BlastPrintReference(bool html, size_t line_len, CNcbiOstream &out, blast::CReference::EPublication publication=blast::CReference::eGappedBlast, bool is_psiblast=false)
Print out blast reference.
static void InsertSubjectScores(objects::CSeq_align_set &org_align_set, const objects::CBioseq_Handle &query_handle, TSeqRange query_range=TSeqRange(), ESubjectScores score_type=eQueryCovPerSubj)
string m_SerialXmlEnd
tag to be printed at end.
static CRef< objects::CSeq_annot > CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set &alnset, blast::EProgram program, const string &db_name, const string &db_title, bool vdb_search=false)
void BlastXML2_FormatReport(const IBlastXML2ReportData *data, CNcbiOstream *out_stream)
Fills all fields in the XML BLAST v2 output object.
void BlastJSON_FormatReport(const IBlastXML2ReportData *data, string file_name)
void BlastXML2_PrintHeader(CNcbiOstream *out_stream)
void BlastXML_FormatReport(objects::CBlastOutput &bxmlout, const IBlastXMLReportData *data, CNcbiOstream *out_stream=NULL, SBlastXMLIncremental *incremental_struct=NULL)
Fills all fields in the XML BLAST output object.
void BlastJSON_PrintHeader(CNcbiOstream *out_stream)
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
const string & Get(const string &name, bool *found=NULL) const
Get environment value by name.
Definition: ncbienv.cpp:109
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static void SplitPath(const string &path, string *dir=0, string *base=0, string *ext=0)
Split a path string into its basic components.
Definition: ncbifile.cpp:358
#define MSerial_Json
Definition: serialbase.hpp:699
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_Xml
Definition: serialbase.hpp:698
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
string m_Name
PN - program name.
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:776
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
ELabelType
return the label for a given string
Definition: Seq_id.hpp:603
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
@ eDefault
default is to show type + content
Definition: Seq_id.hpp:611
TSeqPos TPoint
Definition: Seq_loc.hpp:102
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GenerateDefline(const CBioseq_Handle &bsh, TUserFlags flags=0)
Main method.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
Definition: scope.cpp:331
CBioseq_EditHandle GetBioseqEditHandle(const CBioseq &bioseq)
Get edit handle for the specified object Throw an exception if object is not found,...
Definition: scope.cpp:229
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
Definition: scope.hpp:128
void SetDescr(TDescr &v) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TMol GetBioseqMolType(void) const
Get some values from core:
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiBadbit
Definition: ncbistre.hpp:571
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5414
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
@ eTrunc_End
Truncate trailing whitespace only.
Definition: ncbistr.hpp:2241
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
Definition: Seq_inst_.hpp:646
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
T max(T x_, T y_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static PCRE2_SIZE * offsets
Definition: pcre2grep.c:266
#define count
static int * results[]
static FILE * outfile
Definition: pcre2test.c:951
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Defines BLAST database access classes.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition of SSeqLoc structure.
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Structure to hold the Gumbel parameters (for FSC).
Definition: blast_stat.h:94
Structure to hold the Karlin-Altschul parameters.
Definition: blast_stat.h:66
double K
K value used in statistics.
Definition: blast_stat.h:68
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
double H
H value used in statistics.
Definition: blast_stat.h:70
igblast clone info
structure for showing domains on the master sequence
Definition: showalign.hpp:112
structure for store feature display info
Definition: showalign.hpp:105
CConstRef< objects::CSeq_loc > seqloc
Definition: showalign.hpp:106
string alignRowTmpl
Template for displayin actual pairwise alignment - BLAST_ALIGN_ROWS.
Definition: showalign.hpp:271
string alnTitlesTmpl
Template for displaying multiple defline titles.
Definition: showalign.hpp:264
string alignFeatureLinkTmpl
Template for displaying align features link -ALN_FEATURES_LINK.
Definition: showalign.hpp:270
string alnSeqInfoTmpl
Template for displaying sequnce link in defline.
Definition: showalign.hpp:265
string alnTitlesLinkTmpl
Template for displaying link for more defline titles.
Definition: showalign.hpp:263
string alignFeatureTmpl
Template for displaying align features -ALN_FEATURES.
Definition: showalign.hpp:269
string sortInfoTmpl
Template for displaying Sort by header - SORT_ALIGNS_SEQ.
Definition: showalign.hpp:261
string alnDefLineTmpl
Template for displaying one defline ALN_DEFLINE_ROW.
Definition: showalign.hpp:262
string alignInfoTmpl
Template for displaying singe align params - BLAST_ALIGN_PARAMS_NUC,BLAST_ALIGN_PARAMS_PROT.
Definition: showalign.hpp:266
string alignHeaderTmpl
Template for displaying header,deflines and gene info - BLAST_ALIGN_HEADER.
Definition: showalign.hpp:260
string alignRowTmplLast
Template for displayin actual last pairwise alignment - BLAST_ALIGN_ROWS_LST.
Definition: showalign.hpp:272
string scoreInfoTmpl
score info template
string seqInfoTmpl
sequence infor template
string defLineTmpl
whole defilne template
Structure to hold data for incremental XML formatting.
Information about a single pattern occurence in the query.
Definition: blast_def.h:292
Int4 offset
Starting offset of this pattern occurrence.
Definition: blast_def.h:293
In PHI BLAST, structure containing information about all pattern occurrences in query.
Definition: blast_def.h:300
char * pattern
Pattern used, saved here for formatting purposes.
Definition: blast_def.h:306
double probability
Estimated probability of the pattern.
Definition: blast_def.h:305
Int4 num_patterns
Number of pattern occurrences in query.
Definition: blast_def.h:301
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Definition: blast_def.h:302
#define _ASSERT
else result
Definition: token2.c:20
Modified on Wed Sep 04 15:05:44 2024 by modify_doxy.py rev. 669887