36 #include <ncbi_pch.hpp>
41 #include <objmgr/util/sequence.hpp>
44 #include <corelib/ncbiutil.hpp> // for FindBestChoice
49 #include <algo/blast/format/data4xmlformat.hpp> /* NCBI_FAKE_WARNING */
51 #include <algo/blast/format/data4xml2format.hpp> /* NCBI_FAKE_WARNING */
54 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB
55 #include <serial/objostrxml.hpp>
57 #include <corelib/ncbistre.hpp>
61 USING_SCOPE(blast);
63 USING_SCOPE(align_format);
64 USING_SCOPE(sequence);
65 #endif
68 CBlastFormat::CBlastFormat(const blast::CBlastOptions& options,
69  blast::CLocalDbAdapter& db_adapter,
71  bool believe_query, CNcbiOstream& outfile,
72  int num_summary,
73  int num_alignments,
74  CScope & scope,
75  const char *matrix_name /* = BLAST_DEFAULT_MATRIX */,
76  bool show_gi /* = false */,
77  bool is_html /* = false */,
78  int qgencode /* = BLAST_GENETIC_CODE */,
79  int dbgencode /* = BLAST_GENETIC_CODE */,
80  bool use_sum_statistics /* = false */,
81  bool is_remote_search /* = false */,
82  int dbfilt_algorithm /* = -1 */,
83  const string& custom_output_format /* = kEmptyStr */,
84  bool is_megablast /* = false */,
85  bool is_indexed /* = false */,
86  const blast::CIgBlastOptions *ig_opts /* = NULL */,
87  const blast::CLocalDbAdapter* domain_db_adapter /* = NULL*/,
88  const string & cmdline /* =kEMptyStr*/,
89  const string& subjectTag /* =kEmptyStr */)
90  : m_FormatType(format_type), m_IsHTML(is_html),
91  m_DbIsAA(db_adapter.IsProtein()), m_BelieveQuery(believe_query),
92  m_Outfile(outfile), m_NumSummary(num_summary),
93  m_NumAlignments(num_alignments), m_HitlistSize(options.GetHitlistSize()),
94  m_Program(Blast_ProgramNameFromType(options.GetProgramType())),
95  m_DbName(kEmptyStr),
96  m_QueryGenCode(qgencode), m_DbGenCode(dbgencode),
97  m_ShowGi(show_gi), m_ShowLinkedSetSize(false),
98  m_IsUngappedSearch(!options.GetGappedMode()),
99  m_MatrixName(matrix_name),
100  m_Scope(& scope),
101  m_IsBl2Seq(false),
102  m_IsDbScan(false),
103  m_SubjectTag(subjectTag),
104  m_IsRemoteSearch(is_remote_search),
105  m_QueriesFormatted(0),
106  m_Megablast(is_megablast),
107  m_IndexedMegablast(is_indexed),
108  m_CustomOutputFormatSpec(custom_output_format),
109  m_IgOptions(ig_opts),
110  m_Options(&options),
111  m_IsVdb(false),
112  m_IsIterative(false),
113  m_BaseFile(kEmptyStr),
114  m_XMLFileCount(0),
115  m_LineLength(align_format::kDfltLineLength),
116  m_OrigExceptionMask(outfile.exceptions()),
117  m_Cmdline(cmdline)
118 {
119  m_Outfile.exceptions(NcbiBadbit);
120  m_DbName = db_adapter.GetDatabaseName();
121  m_IsBl2Seq = (m_DbName == kEmptyStr ? true : false);
122  m_IsDbScan = db_adapter.IsDbScanMode();
123  if (m_IsBl2Seq) {
124  m_SeqInfoSrc.Reset(db_adapter.MakeSeqInfoSrc());
125  }
126  else {
127  m_SearchDb = db_adapter.GetSearchDatabase();
128  }
129  if(m_IsDbScan) {
130  int num_seqs=0;
131  int total_length=0;
132  if (!is_remote_search)
133  {
134  BlastSeqSrc* seqsrc = db_adapter.MakeSeqSrc();
135  num_seqs=BlastSeqSrcGetNumSeqs(seqsrc);
136  total_length=static_cast<int>(BlastSeqSrcGetTotLen(seqsrc));
137  }
138  CBlastFormatUtil::FillScanModeBlastDbInfo(m_DbInfo, m_DbIsAA,
139  num_seqs, total_length, m_SubjectTag);
140  } else {
141  int filteringAlgorithmId = db_adapter.GetFilteringAlgorithm();
142  if(filteringAlgorithmId == -1) {
143  CRef <CSearchDatabase> db_Info = db_adapter.GetSearchDatabase();
144  if (db_Info && db_Info.NotEmpty()) {
145  ESubjectMaskingType maskType = db_Info->GetMaskType();
146  if(maskType != eNoSubjMasking) {
148  ERR_POST(Warning << "Subject mask not found in " + m_DbName +", proceeding without subject masking.");
149  }
150  }
151  }
152  CBlastFormatUtil::GetBlastDbInfo(m_DbInfo, m_DbName, m_DbIsAA,
153  dbfilt_algorithm, is_remote_search);
154  }
158  }
163  }
165  if (use_sum_statistics && m_IsUngappedSearch) {
166  m_ShowLinkedSetSize = true;
167  }
168  if ( m_Program == "blastn" &&
169  options.GetMatchReward() == 0 &&
170  options.GetMismatchPenalty() == 0 )
171  {
172  /* This combination is an indicator that we have used matrices
173  * solely to develop the hsp score. Also for the time being it
174  * indicates that KA stats are not available. -RMH-
175  */
176  m_DisableKAStats = true;
177  }
178  else
179  {
180  m_DisableKAStats = false;
181  }
185  if (options.GetProgram() == eDeltaBlast) {
186  _ASSERT(options.GetProgramType() == eBlastTypePsiBlast);
187  m_Program = "deltablast";
189  if (domain_db_adapter) {
190  CBlastFormatUtil::GetBlastDbInfo(m_DomainDbInfo,
191  domain_db_adapter->GetDatabaseName(),
192  true, -1, is_remote_search);
193  }
194  }
196  m_IsIterative = options.IsIterativeSearch();
199  }
202  if (app) {
203  const CNcbiRegistry& registry = app->GetConfig();
204  m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1");
205  }
206  m_HitsSortOption = -1;
207  m_HspsSortOption = -1;
208 }
210 CBlastFormat::CBlastFormat(const blast::CBlastOptions& opts,
211  const vector< CBlastFormatUtil::SDbInfo >& dbinfo_list,
213  bool believe_query, CNcbiOstream& outfile,
214  int num_summary,
215  int num_alignments,
216  CScope& scope,
217  bool show_gi,
218  bool is_html,
219  bool is_remote_search,
220  const string& custom_output_format,
221  bool is_vdb,
222  const string & cmdline)
223  : m_FormatType(format_type),
224  m_IsHTML(is_html),
225  m_DbIsAA(!Blast_SubjectIsNucleotide(opts.GetProgramType())),
226  m_BelieveQuery(believe_query),
227  m_Outfile(outfile),
228  m_NumSummary(num_summary),
229  m_NumAlignments(num_alignments),
230  m_HitlistSize(opts.GetHitlistSize()),
231  m_Program(Blast_ProgramNameFromType(opts.GetProgramType())),
232  m_DbName(kEmptyStr),
233  m_QueryGenCode(opts.GetQueryGeneticCode()),
234  m_DbGenCode(opts.GetDbGeneticCode()),
235  m_ShowGi(show_gi),
236  m_ShowLinkedSetSize(false),
237  m_IsUngappedSearch(!opts.GetGappedMode()),
238  m_MatrixName(opts.GetMatrixName()),
239  m_Scope(&scope),
240  m_IsBl2Seq(false),
241  m_IsDbScan (false),
242  m_IsRemoteSearch(is_remote_search),
243  m_QueriesFormatted(0),
244  m_Megablast(opts.GetProgram() == eMegablast ||
245  opts.GetProgram() == eDiscMegablast),
246  m_IndexedMegablast(opts.GetMBIndexLoaded()),
247  m_CustomOutputFormatSpec(custom_output_format),
248  m_Options(&opts),
249  m_IsVdb(is_vdb),
250  m_IsIterative(false),
251  m_BaseFile(kEmptyStr),
252  m_XMLFileCount(0),
253  m_LineLength(align_format::kDfltLineLength),
254  m_OrigExceptionMask(outfile.exceptions()),
255  m_Cmdline(cmdline)
256 {
257  m_Outfile.exceptions(NcbiBadbit);
258  m_DbInfo.assign(dbinfo_list.begin(), dbinfo_list.end());
259  vector< CBlastFormatUtil::SDbInfo >::const_iterator itInfo;
260  for (itInfo = m_DbInfo.begin(); itInfo != m_DbInfo.end(); itInfo++)
261  {
262  if(itInfo != m_DbInfo.begin())
263  m_DbName += " ";
265  m_DbName += itInfo->name;
266  }
268  m_IsBl2Seq = false;
273  }
278  }
280  if (opts.GetSumStatisticsMode() && m_IsUngappedSearch) {
281  m_ShowLinkedSetSize = true;
282  }
284  if ( m_Program == "blastn" &&
285  opts.GetMatchReward() == 0 &&
286  opts.GetMismatchPenalty() == 0 )
287  {
288  /* This combination is an indicator that we have used matrices
289  * solely to develop the hsp score. Also for the time being it
290  * indicates that KA stats are not available. -RMH-
291  */
292  m_DisableKAStats = true;
293  }
294  else
295  {
296  m_DisableKAStats = false;
297  }
301  if (opts.GetProgram() == eDeltaBlast) {
302  _ASSERT(opts.GetProgramType() == eBlastTypePsiBlast);
303  m_Program = "deltablast";
304  }
305  m_IsIterative = opts.IsIterativeSearch();
308  }
310  if (app) {
311  const CNcbiRegistry& registry = app->GetConfig();
312  m_LongSeqId = (registry.Get("BLAST", "LONG_SEQID") == "1");
313  }
314  m_HitsSortOption = -1;
315  m_HspsSortOption = -1;
316 }
319 {
320  try {
321  m_Outfile.exceptions(m_OrigExceptionMask);
322  } catch (...) {/*ignore exceptions*/}
323  m_Outfile.flush();
324 }
326 static const string kHTML_Prefix =
327 "<HTML>\n"
328 "<HEAD><TITLE>BLAST Search Results</TITLE></HEAD>\n"
329 "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" VLINK=\"#660099\" ALINK=\"#660099\">\n"
330 "<PRE>\n";
332 static const string kHTML_Suffix =
333 "</PRE>\n"
334 "</BODY>\n"
335 "</HTML>";
337 Int8
339 {
340  Int8 retv = 0L;
341  for (size_t i = 0; i < m_DbInfo.size(); i++) {
342  retv += m_DbInfo[i].total_length;
343  }
344  return retv;
345 }
347 void
349 {
350  // no header for some output types
354  }
357  }
358  return;
359  }
361  if (m_IsHTML) {
362  m_Outfile << kHTML_Prefix << "\n";
363  }
364  // Make sure no-one confuses us with the standard BLASTN
365  // algorithm. -RMH-
366  if ( m_Program == "blastn" &&
367  m_DisableKAStats == true )
368  {
370  m_Outfile);
371  m_Outfile << "\n\n";
372  m_Outfile << "Reference: Robert M. Hubley, Arian Smit\n";
373  m_Outfile << "RMBlast - RepeatMasker Search Engine\n";
374  m_Outfile << "2010 <>";
375  }else
376  {
378  m_Outfile);
379  }
381  if (m_IsBl2Seq && !m_IsDbScan) {
382  return;
383  }
386  if (m_Program == "deltablast") {
389  m_Outfile << "\n";
390  }
392  if (m_Megablast)
395  else
397  m_Outfile);
400  {
401  m_Outfile << "\n";
404  }
406  if (m_Program == "psiblast" || m_Program == "deltablast") {
407  m_Outfile << "\n";
410  }
411  if (m_Program == "psiblast" || m_Program == "blastp") {
412  m_Outfile << "\n";
415  (bool)(m_Program == "psiblast"));
416  }
418  if (m_Program == "deltablast" || !m_DomainDbInfo.empty()) {
419  m_Outfile << "\n\n";
420  if (!m_DomainDbInfo.empty()) {
421  m_Outfile << "\n\n" << "Conserved Domain ";
422  CBlastFormatUtil::PrintDbReport(m_DomainDbInfo, kFormatLineLength,
423  m_Outfile, true);
424  }
425  }
426  else {
427  m_Outfile << "\n\n";
428  }
429  if (!m_IsBl2Seq || m_IsDbScan)
430  CBlastFormatUtil::PrintDbReport(m_DbInfo, kFormatLineLength,
431  m_Outfile, true);
432 }
434 void
435 CBlastFormat::x_PrintOneQueryFooter(const blast::CBlastAncillaryData& summary)
436 {
437  /* Skip printing KA parameters if the program is rmblastn -RMH- */
438  if ( m_DisableKAStats )
439  return;
441  const Blast_KarlinBlk *kbp_ungap =
442  (m_Program == "psiblast" || m_Program == "deltablast")
443  ? summary.GetPsiUngappedKarlinBlk()
444  : summary.GetUngappedKarlinBlk();
445  const Blast_GumbelBlk *gbp = summary.GetGumbelBlk();
446  m_Outfile << NcbiEndl;
447  if (kbp_ungap) {
448  CBlastFormatUtil::PrintKAParameters(kbp_ungap->Lambda,
449  kbp_ungap->K, kbp_ungap->H,
451  false, gbp);
452  }
454  const Blast_KarlinBlk *kbp_gap =
455  (m_Program == "psiblast" || m_Program == "deltablast")
456  ? summary.GetPsiGappedKarlinBlk()
457  : summary.GetGappedKarlinBlk();
458  m_Outfile << "\n";
459  if (kbp_gap) {
460  CBlastFormatUtil::PrintKAParameters(kbp_gap->Lambda,
461  kbp_gap->K, kbp_gap->H,
463  true, gbp);
464  }
466  m_Outfile << "\n";
467  m_Outfile << "Effective search space used: " <<
468  summary.GetSearchSpace() << "\n";
469 }
471 /// Auxialiary function to determine if there are local IDs in the identifiers
472 /// of the query sequences
473 /// @param queries query sequence(s) [in]
474 static bool
476 {
477  bool retval = false;
478  ITERATE(CBlastQueryVector, itr, *queries) {
479  if (blast::IsLocalId((*itr)->GetQuerySeqLoc()->GetId())) {
480  retval = true;
481  break;
482  }
483  }
484  return retval;
485 }
487 void
489  int skip_from, int skip_to, int index,
490  int num_descriptions_to_show /* = -1 */)
491 {
492  int flags = 0;
495  if (m_IsHTML){
497  if (index >= 0) {
498  showdef.SetResultPosIndex(index);
499  }
500  }
501  if (m_ShowGi)
503  if (num_descriptions_to_show == 0)
505  if (m_LongSeqId) {
507  }
508  if(m_HitsSortOption >= 0) {
512  }
513  showdef.SetOption(flags);
514  showdef.SetDbName(m_DbName);
515  showdef.SetDbType(!m_DbIsAA);
516  showdef.SetSkipRange(skip_from, skip_to);
517 }
519 void
521  CSeq_align_set& repeated_seqs,
522  CSeq_align_set& new_seqs,
523  blast::CPsiBlastIterationState::TSeqIds& prev_seqids)
524 {
525  static const CSeq_align::TDim kSubjRow = 1;
526  _ASSERT( !prev_seqids.empty() );
527  _ASSERT( !full_alignment->IsEmpty() );
528  _ASSERT(repeated_seqs.IsEmpty());
529  _ASSERT(new_seqs.IsEmpty());
531  unsigned int count = 0;
532  ITERATE(CSeq_align_set::Tdata, alignment, full_alignment->Get()) {
533  CSeq_id_Handle subj_id =
534  CSeq_id_Handle::GetHandle((*alignment)->GetSeq_id(kSubjRow));
535  if (prev_seqids.find(subj_id) != prev_seqids.end()) {
536  // if found among previously seen Seq-ids...
537  repeated_seqs.Set().push_back(*alignment);
538  } else {
539  // ... else add them as new
540  new_seqs.Set().push_back(*alignment);
541  }
542  count++;
543  if(count >= (unsigned int)m_NumSummary)
544  break;
545  }
546 }
548 bool
550 {
551  bool kIsGlobal = (seqalign_set->IsSet() && seqalign_set->CanGet() &&
552  seqalign_set->Get().front()->CanGetType() &&
553  seqalign_set->Get().front()->GetType() == CSeq_align_Base::eType_global);
555  return kIsGlobal;
556 }
559 void
561  unsigned int itr_num,
562  blast::CPsiBlastIterationState::TSeqIds& prev_seqids,
563  int additional,
564  int index,
565  int defline_length )
566 {
568  if (itr_num != numeric_limits<unsigned int>::max() &&
569  !prev_seqids.empty()) {
570  // Split seq-align-set
571  CSeq_align_set repeated_seqs, new_seqs;
572  x_SplitSeqAlign(aln_set, repeated_seqs, new_seqs, prev_seqids);
574  // Show deflines for 'repeat' sequences
575  {{
576  CShowBlastDefline showdef(repeated_seqs, *m_Scope,
578  repeated_seqs.Size());
579  x_ConfigCShowBlastDefline(showdef);
582  }}
583  m_Outfile << "\n";
585  // Show deflines for 'new' sequences
586  {{
587  CShowBlastDefline showdef(new_seqs, *m_Scope, kFormatLineLength,
588  new_seqs.Size());
589  x_ConfigCShowBlastDefline(showdef);
592  }}
594  } else {
596  CShowBlastDefline showdef(*aln_set, *m_Scope,
597  defline_length == -1 ? kFormatLineLength:defline_length,
598  m_NumSummary + additional);
599  x_ConfigCShowBlastDefline(showdef, -1, -1, index,
600  m_NumSummary+additional);
602  }
603  m_Outfile << "\n";
604 }
606 int
607 s_SetFlags(string& program,
609  bool html, bool showgi, bool isbl2seq, bool disableKAStats)
610 {
611  // set the alignment flags
614  if ( isbl2seq ) {
616  }
618  if (html)
620  if (showgi)
623  if (format_type >= CFormattingArgs::eQueryAnchoredIdentities &&
626  }
627  else {
630  }
632  if (format_type == CFormattingArgs::eQueryAnchoredIdentities ||
635  }
636  if (format_type == CFormattingArgs::eQueryAnchoredIdentities ||
639  }
640  if (program == "tblastx") {
642  }
644  if (disableKAStats)
647  return flags;
648 }
650 bool
652 {
653  return m_IsVdb;
654 }
655 // Port of jzmisc.c's AddAlignInfoToSeqAnnotEx (CVS revision 6.11)
658  const string& db_title) const
659 {
662  m_DbName, db_title,
663  x_IsVdbSearch());
664 }
666 void
667 CBlastFormat::x_PrintStructuredReport(const blast::CSearchResults& results,
669 {
670  string db_title;
671  if (!m_DbInfo.empty()) {
672  db_title = m_DbInfo.front().definition;
673  for (size_t i=1;i < m_DbInfo.size();i++) {
674  db_title += "; ";
675  db_title += m_DbInfo[i].definition;
676  }
677  }
679  // ASN.1 formatting is straightforward
681  if (results.HasAlignments()) {
682  CRef<CSeq_align_set> aln_set (new CSeq_align_set);
683  CBlastFormatUtil::PruneSeqalign(*(results.GetSeqAlign()), *aln_set, m_HitlistSize);
685  m_Outfile << MSerial_AsnText << *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
686  else
687  m_Outfile << MSerial_Json << *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
688  }
689  return;
691  if (results.HasAlignments()) {
692  CRef<CSeq_align_set> aln_set (new CSeq_align_set);
693  CBlastFormatUtil::PruneSeqalign(*(results.GetSeqAlign()), *aln_set, m_HitlistSize);
695  *x_WrapAlignmentInSeqAnnot(aln_set, db_title);
696  }
697  return;
698  } else if (m_FormatType == CFormattingArgs::eXml) {
699  CRef<CSearchResults> res(const_cast<CSearchResults*>(&results));
700  res->TrimSeqAlign(m_HitlistSize);
701  m_AccumulatedResults.push_back(res);
702  CConstRef<CSeq_id> query_id = results.GetSeqId();
703  // FIXME: this can be a bottleneck with large numbers of queries
704  ITERATE(CBlastQueryVector, itr, *queries) {
705  if (query_id->Match(*(*itr)->GetQueryId())) {
706  m_AccumulatedQueries->push_back(*itr);
707  break;
708  }
709  }
711  objects::CBlastOutput xml_output;
712  if(x_IsVdbSearch()) {
718  BlastXML_FormatReport(xml_output, &report_data, &m_Outfile,
721  }
722  else {
728  BlastXML_FormatReport(xml_output, &report_data, &m_Outfile,
730  }
731  m_AccumulatedResults.clear();
732  m_AccumulatedQueries->clear();
733  return;
734  }
737  x_PrintXML2Report(results, queries);
738  return;
739  }
740  else if (m_FormatType == CFormattingArgs::eSAM) {
741  if(results.HasAlignments()) {
742  m_SamFormatter->Print(*(results.GetSeqAlign()));
743  }
744  return;
745  }
746 }
748 void
749 CBlastFormat::x_PrintTabularReport(const blast::CSearchResults& results,
750  unsigned int itr_num)
751 {
752  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
753  if (m_IsUngappedSearch && results.HasAlignments()) {
755  }
756  // other output types will need a bioseq handle
757  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
760  // tabular formatting just prints each alignment in turn
761  // (plus a header)
765  const CBlastTabularInfo::EFieldDelimiter kDelim =
770  if(!m_CustomDelim.empty()) {
771  tabinfo.SetCustomDelim(m_CustomDelim);
772  }
775  tabinfo.SetParseSubjectDefline(true);
776  }
777  tabinfo.SetQueryRange(m_QueryRange);
778  if (ncbi::NStr::ToLower(m_Program) == string("blastn"))
779  tabinfo.SetNoFetch(true);
782  string strProgVersion =
783  NStr::ToUpper(m_Program) + " " + blast::CBlastVersion().Print();
784  string dbname;
785  if (m_IsDbScan)
786  dbname = string("User specified sequence set (Input: ") + m_SubjectTag + string(")");
787  else
788  dbname = m_DbName;
789  CConstRef<CBioseq> subject_bioseq;
790  // dbname used in place of Bioseq in most cases.
791  if (dbname.empty())
792  subject_bioseq.Reset(x_CreateSubjectBioseq());
793  tabinfo.PrintHeader(strProgVersion, *(bhandle.GetBioseqCore()),
794  dbname, results.GetRID(), itr_num, aln_set,
795  subject_bioseq);
796  }
798  if (results.HasAlignments()) {
799  CSeq_align_set copy_aln_set;
800  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_HitlistSize);
802  {
803  unsigned int scores = CBlastFormatUtil::eNoQuerySubjCov;
804  if(string::npos != m_CustomOutputFormatSpec.find("qcovs"))
806  if(string::npos != m_CustomOutputFormatSpec.find("qcovus") &&
807  ncbi::NStr::ToLower(m_Program) == string("blastn"))
812  }
814  tabinfo.SetDbGeneticCode(m_DbGenCode);
815  ITERATE(CSeq_align_set::Tdata, itr, copy_aln_set.Get()) {
816  const CSeq_align& s = **itr;
817  tabinfo.SetFields(s, *m_Scope, &m_ScoringMatrix);
818  tabinfo.Print();
819  }
820  }
821  return;
822  }
823 }
825 static void s_SetCloneInfo(const CIgBlastTabularInfo& tabinfo,
826  const CBioseq_Handle& handle,
827  CBlastFormat::SClone& clone_info) {
829  if (handle.GetSeqId()->Which() == CSeq_id::e_Local){
830  CDeflineGenerator defline (handle.GetSeq_entry_Handle());
831  clone_info.seqid = defline.GenerateDefline(handle).substr(0, 45);
833  // clone_info.seqid = CDeflineGenerator.substr(0, 45);
834  } else {
835  string seqid;
837  wid->GetLabel(&seqid, CSeq_id::eContent);
838  clone_info.seqid = seqid.substr(0, 45);
839  }
840  tabinfo.GetIgInfo (clone_info.v_gene, clone_info.d_gene, clone_info.j_gene,
841  clone_info.c_gene,
842  clone_info.chain_type,, clone_info.aa, clone_info.productive);
843  clone_info.identity = 0;
844  const vector<CIgBlastTabularInfo::SIgDomain*>& domains = tabinfo.GetIgDomains();
845  int length = 0;
846  int num_match = 0;
847  for (unsigned int i=0; i<domains.size(); ++i) {
848  if (domains[i]->length > 0) {
849  length += domains[i]->length;
850  num_match += domains[i]->num_match;
851  }
852  }
853  if (length > 0){
854  clone_info.identity = ((double)num_match)/length;
856  }
858 }
860 void
861 CBlastFormat::x_PrintTaxReport(const blast::CSearchResults& results)
862 {
863  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
865  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
866  if(m_IsHTML) {
867  m_Outfile << "<pre>";
868  }
869  else {
870  m_Outfile << "\n";
871  }
872  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
874  m_IsHTML, false,
875  results.GetRID());
877  if(m_IsHTML) {
878  m_Outfile << "</pre>";
879  }
880  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
881  if (m_IsUngappedSearch && results.HasAlignments()) {
883  }
885  CRef<CSeq_align_set> new_aln_set(const_cast<CSeq_align_set*>(aln_set.GetPointer()));
887  taxFormatRes->DisplayOrgReport(m_Outfile);
888 }
890 void
891 CBlastFormat::x_PrintIgTabularReport(const blast::CIgBlastResults& results,
892  SClone& clone_info,
893  bool fill_clone_info)
894 {
895  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
896  /* TODO do we support ungapped Igblast search?
897  if (m_IsUngappedSearch && results.HasAlignments()) {
898  aln_set.Reset(CDisplaySeqalign::PrepareBlastUngappedSeqalign(*aln_set));
899  } */
900  // other output types will need a bioseq handle
901  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
904  // tabular formatting just prints each alignment in turn
905  // (plus a header)
917  string strProgVersion =
918  "IG" + NStr::ToUpper(m_Program);
919  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
921  if (m_IsHTML) {
922  m_Outfile << "<html><body><pre>\n";
923  }
924  if (results.HasAlignments()) {
925  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
926  CSeq_align_set::Tdata::const_iterator itr = aln_set->Get().begin();
927  tabinfo.SetMasterFields(**itr, *m_Scope,
928  annots->m_ChainType[0],
929  annots->m_ChainTypeToShow,
930  &m_ScoringMatrix);
931  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
932  if (fill_clone_info) {
933  s_SetCloneInfo(tabinfo, bhandle, clone_info);
934  }
935  tabinfo.PrintHeader(m_IgOptions, strProgVersion, *(bhandle.GetBioseqCore()),
936  m_DbName,
937  m_IgOptions->m_DomainSystem,
938  results.GetRID(),
940  aln_set, subject_bioseq);
942  int j = 1;
943  for (; itr != aln_set->Get().end(); ++itr) {
944  tabinfo.SetFields(**itr, *m_Scope,
945  annots->m_ChainType[j++],
946  annots->m_ChainTypeToShow,
947  &m_ScoringMatrix);
948  tabinfo.Print();
949  }
950  } else {
951  tabinfo.PrintHeader(m_IgOptions, strProgVersion, *(bhandle.GetBioseqCore()),
952  m_DbName,
953  m_IgOptions->m_DomainSystem,
954  results.GetRID(),
956  0, subject_bioseq);
957  }
958  if (m_IsHTML) {
959  m_Outfile << "\n</pre></body></html>\n";
960  }
961 }
964 void CBlastFormat::x_PrintAirrRearrangement(const blast::CIgBlastResults& results,
965  SClone& clone_info,
966  bool fill_clone_info,
967  bool print_airr_format_header)
968 {
969  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
971  // other output types will need a bioseq handle
972  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
975  // tabular formatting just prints each alignment in turn
976  // (plus a header)
983  string strProgVersion =
984  "IG" + NStr::ToUpper(m_Program);
985  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
987  CRef<CIgAnnotation> annots(null);
988  if (results.HasAlignments()) {
989  annots = results.GetIgAnnotation();
990  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
991  if (fill_clone_info) {
992  s_SetCloneInfo(tabinfo, bhandle, clone_info);
993  }
994  }
995  tabinfo.SetAirrFormatData(*m_Scope, annots,
996  bhandle, aln_set, m_IgOptions);
999  tabinfo.PrintAirrRearrangement(*m_Scope, annots, strProgVersion,
1000  *(bhandle.GetBioseqCore()),
1001  m_DbName,
1002  m_IgOptions->m_DomainSystem,
1003  results.GetRID(),
1005  aln_set, subject_bioseq, &m_ScoringMatrix,
1006  print_airr_format_header,
1007  m_IgOptions);
1009 }
1012 {
1013  if ( !m_IsBl2Seq && !m_IsDbScan) {
1014  return CConstRef<CBioseq>();
1015  }
1019  static Uint4 subj_index = 0;
1021  list< CRef<CSeq_id> > ids = m_SeqInfoSrc->GetId(subj_index++);
1023  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*id,
1025  // If this assertion fails, we're not able to get the subject, possibly a
1026  // programming error (see @note in this function's declaration - was the
1027  // order of calls altered?)
1028  _ASSERT(bhandle);
1030  // reset the subject index if necessary
1031  if (subj_index >= m_SeqInfoSrc->Size()) {
1032  subj_index = 0;
1033  }
1034  return bhandle.GetBioseqCore();
1035 }
1037 /// Auxiliary function to print the BLAST Archive in multiple output formats
1039 {
1040  if (archive.Empty()) {
1041  return;
1042  }
1043  string outfmt = CNcbiEnvironment().Get("ARCHIVE_FORMAT");
1044  if (outfmt.empty()) {
1045  out << MSerial_AsnText << *archive;
1046  } else if (!NStr::CompareNocase(outfmt, "xml")) {
1047  out << MSerial_Xml << *archive;
1048  } else if (NStr::StartsWith(outfmt, "bin", NStr::eNocase)) {
1049  out << MSerial_AsnBinary << *archive;
1050  }
1051 }
1053 void
1054 CBlastFormat::WriteArchive(blast::IQueryFactory& queries,
1055  blast::CBlastOptionsHandle& options_handle,
1056  const CSearchResultSet& results,
1057  unsigned int num_iters,
1058  const list<CRef<CBlast4_error> > & msg)
1059 {
1061  if (m_IsBl2Seq)
1062  {
1063  CRef<CBlastQueryVector> query_vector(new CBlastQueryVector);
1064  for (unsigned int i=0; i<m_SeqInfoSrc->Size(); i++)
1065  {
1066  list< CRef<CSeq_id> > ids = m_SeqInfoSrc->GetId(i);
1068  CRef<CSeq_loc> seq_loc(new CSeq_loc);
1069  seq_loc->SetWhole(*id);
1070  CRef<CBlastSearchQuery> search_query(new CBlastSearchQuery(*seq_loc, *m_Scope));
1071  query_vector->AddQuery(search_query);
1072  }
1073  CObjMgr_QueryFactory subjects(*query_vector);
1074  archive = BlastBuildArchive(queries, options_handle, results, subjects);
1076  }
1077  else if (!m_SearchDb.Empty())
1078  {
1079  // Use only by psi blast
1080  if(num_iters != 0) {
1081  archive = BlastBuildArchive(queries, options_handle, results, m_SearchDb , num_iters);
1082  }
1083  else {
1084  archive = BlastBuildArchive(queries, options_handle, results, m_SearchDb );
1085  }
1086  }
1087  else
1088  {
1089  if(m_DbInfo.empty()) {
1090  NCBI_THROW(CException, eUnknown, "Subject or DB info not available");
1091  }
1092  string db_list = kEmptyStr;
1094  for (unsigned int i=0; i < m_DbInfo.size(); i++) {
1095  db_list += m_DbInfo[i].name;
1096  }
1097  CRef<CSearchDatabase> sdb (new CSearchDatabase(db_list, mol_type));
1098  archive = BlastBuildArchive(queries, options_handle, results, sdb);
1099  }
1101  if(msg.size() > 0) {
1102  archive->SetMessages() = msg;
1103  }
1104  PrintArchive(archive, m_Outfile);
1105 }
1107 void
1108 CBlastFormat::WriteArchive(objects::CPssmWithParameters & pssm,
1109  blast::CBlastOptionsHandle& options_handle,
1110  const CSearchResultSet& results,
1111  unsigned int num_iters,
1112  const list<CRef<CBlast4_error> > & msg)
1113 {
1114  CRef<objects::CBlast4_archive> archive(BlastBuildArchive(pssm, options_handle, results, m_SearchDb, num_iters));
1116  if(msg.size() > 0) {
1117  archive->SetMessages() = msg;
1118  }
1119  PrintArchive(archive, m_Outfile);
1120 }
1124 {
1126  int delineFormatOption = 0;
1129  deflines.SetQueryNumber(1);//m_Query_number
1130  deflines.SetDbType (!m_DbIsAA);
1131  deflines.SetDbName(m_DbName);
1132  delineFormatOption |= CShowBlastDefline::eHtml;
1133  delineFormatOption |= CShowBlastDefline::eShowPercentIdent;
1134  deflines.SetOption(delineFormatOption); //m_defline_option
1136  //Next three lines are for proper initialization in formatting of defline
1138  deflineTemplates->advancedView = true;
1139  deflines.SetDeflineTemplates (deflineTemplates);
1142  vector <CShowBlastDefline::SDeflineFormattingInfo *> sdlFortInfoVec = deflines.GetFormattingInfo();
1143  CJson_Document doc;
1144  CJson_Object top_obj = doc.SetObject();
1145  CJson_Array defline_array = top_obj.insert_array("deflines");
1147  for(size_t i = 0; i < sdlFortInfoVec.size(); i++) {
1148  CJson_Object obj = defline_array.push_back_object();
1150  obj.insert("dfln_url",sdlFortInfoVec[i]->dfln_url);
1151  obj.insert("dfln_rid",sdlFortInfoVec[i]->dfln_rid);
1152  obj.insert("dfln_gi",sdlFortInfoVec[i]->dfln_gi);
1153  obj.insert("dfln_seqid",sdlFortInfoVec[i]->dfln_seqid);
1154  obj.insert("full_dfln_defline",sdlFortInfoVec[i]->full_dfln_defline);
1155  obj.insert("dfln_defline",sdlFortInfoVec[i]->dfln_defline);
1156  obj.insert("dfln_id",sdlFortInfoVec[i]->dfln_id);
1157  obj.insert("dflnFrm_id",sdlFortInfoVec[i]->dflnFrm_id);
1158  obj.insert("dflnFASTA_id",sdlFortInfoVec[i]->dflnFASTA_id);
1159  obj.insert("dflnAccs",sdlFortInfoVec[i]->dflnAccs);
1161  obj.insert("score_info",sdlFortInfoVec[i]->score_info);
1162  obj.insert("dfln_hspnum",sdlFortInfoVec[i]->dfln_hspnum);
1163  obj.insert("dfln_alnLen",sdlFortInfoVec[i]->dfln_alnLen);
1164  obj.insert("dfln_blast_rank",sdlFortInfoVec[i]->dfln_blast_rank);
1165  obj.insert("total_bit_string",sdlFortInfoVec[i]->total_bit_string);
1166  obj.insert("percent_coverage",sdlFortInfoVec[i]->percent_coverage);
1167  obj.insert("evalue_string",sdlFortInfoVec[i]->evalue_string);
1168  obj.insert("percent_identity",sdlFortInfoVec[i]->percent_identity);
1169  }
1170  doc.Write(m_Outfile);
1171 }
1175 {
1179  int delineFormatOption = 0;
1182  deflines.SetQueryNumber(1);//m_Query_number
1183  deflines.SetDbType (!m_DbIsAA);
1184  deflines.SetDbName(m_DbName);
1185  delineFormatOption |= CShowBlastDefline::eHtml;
1186  delineFormatOption |= CShowBlastDefline::eShowPercentIdent;
1187  deflines.SetOption(delineFormatOption); //m_defline_option
1190  deflines.Init();
1191  deflines.Display(m_Outfile);
1192 }
1196 {
1200  TMaskedQueryRegions masklocs;
1201  results.GetMaskedQueryRegions(masklocs);
1203  CSeq_align_set copy_aln_set;
1204  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
1206  CRef<CSeq_align_set> seqAlnSet(const_cast<CSeq_align_set*>(&copy_aln_set));
1207  if(!m_AlignSeqList.empty()) {
1209  }
1211  CDisplaySeqalign display(*seqAlnSet, *m_Scope, &masklocs, NULL, m_MatrixName);
1212  x_SetAlignParameters(display);
1215  display.DisplaySeqalign(m_Outfile);
1216 }
1219 {
1221  if(!app) return;
1222  const CNcbiRegistry& reg = app->GetConfig();
1226  string defLineTmpl;
1228  m_DeflineTemplates->defLineTmpl = reg.Get("Templates", "DFL_TABLE_ROW");
1229  m_DeflineTemplates->scoreInfoTmpl = reg.Get("Templates", "DFL_TABLE_SCORE_INFO");
1230  m_DeflineTemplates->seqInfoTmpl = reg.Get("Templates", "DFL_TABLE_SEQ_INFO");
1232 }
1235 {
1237  if(!app) return;
1238  const CNcbiRegistry& reg = app->GetConfig();
1242  m_AlignTemplates->alignHeaderTmpl = reg.Get("Templates", "BLAST_ALIGN_HEADER");
1243  string blastAlignParamsTemplData = reg.Get("Templates", "BLAST_ALIGN_PARAMS");
1244  string blastAlignParamsTag = (m_Program == "blastn") ? "ALIGN_PARAMS_NUC" : "ALIGN_PARAMS_PROT";
1245  string blastAlignProtParamsTable = reg.Get("Templates", blastAlignParamsTag);
1246  m_AlignTemplates->alignInfoTmpl = CAlignFormatUtil::MapTemplate(blastAlignParamsTemplData,"align_params",blastAlignProtParamsTable);
1247  m_AlignTemplates->sortInfoTmpl = reg.Get("Templates", "SORT_ALIGNS_SEQ");
1248  m_AlignTemplates->alignFeatureTmpl = reg.Get("Templates", "ALN_FEATURES");
1249  m_AlignTemplates->alignFeatureLinkTmpl = reg.Get("Templates", "ALN_FEATURES_LINK");
1251  m_AlignTemplates->alnDefLineTmpl = reg.Get("Templates", "ALN_DEFLINE_ROW");
1252  m_AlignTemplates->alnTitlesLinkTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES_LNK");
1253  m_AlignTemplates->alnTitlesTmpl = reg.Get("Templates", "ALN_DEFLINE_TITLES");
1254  m_AlignTemplates->alnSeqInfoTmpl = reg.Get("Templates", "ALN_DEFLINE_SEQ_INFO");
1255  m_AlignTemplates->alignRowTmpl = reg.Get("Templates", "BLAST_ALIGN_ROWS");
1256  m_AlignTemplates->alignRowTmplLast = reg.Get("Templates", "BLAST_ALIGN_ROWS_LST");
1257 }
1262 {
1264  int AlignOption = 0;
1266  AlignOption += CDisplaySeqalign::eShowMiddleLine;
1268  if (m_Program == "tblastx") {
1270  }
1271  AlignOption += CDisplaySeqalign::eShowBlastInfo;
1272  AlignOption += CDisplaySeqalign::eShowBlastStyleId;
1273  AlignOption += CDisplaySeqalign::eHtml;
1274  AlignOption += CDisplaySeqalign::eShowSortControls;//*******????
1275  AlignOption += CDisplaySeqalign::eDynamicFeature;
1276  cds.SetAlignOption(AlignOption);
1278  cds.SetDbName(m_DbName);
1279  cds.SetDbType(!m_DbIsAA);
1280  cds.SetLineLen(m_LineLength);
1282  if (m_Program == "blastn" || m_Program == "megablast") {
1285  } else {
1288  }
1289  cds.SetQueryNumber(1); //m_Query_number
1294 }
1298 static string s_GetMolType(const CBioseq_Handle& bioseqHandle)
1299 {
1300  int molType = bioseqHandle.GetBioseqMolType();
1301  string molTypeString;
1303  switch(molType) {
1305  molTypeString = "cdna";
1306  break;
1307  case CSeq_inst::eMol_dna:
1308  molTypeString = "dna";
1309  break;
1310  case CSeq_inst::eMol_rna:
1311  molTypeString = "rna";
1312  break;
1313  case CSeq_inst::eMol_aa:
1314  molTypeString = "amino acid";
1315  break;
1316  case CSeq_inst::eMol_na:
1317  molTypeString = "nucleic acid";
1318  break;
1319  default:
1320  molTypeString = "Unknown";
1321  }
1322  return molTypeString;
1323 }
1325 void
1326 CBlastFormat::PrintReport(const blast::CSearchResults& results,
1327  CBlastFormat::DisplayOption displayOption)
1328 {
1329  if (displayOption == eMetadata) {//Metadata in json format
1331  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1333  //string seqID = CAlignFormatUtil::GetSeqIdString(*bioseq, m_BelieveQuery);
1334  string seqID;
1335  CConstRef <CSeq_id> queryID = sequence::GetId(bhandle).GetSeqId();
1336  CSeq_id::ELabelType labelType = (queryID->IsLocal()) ? CSeq_id::eDefault : CSeq_id::eContent;
1337  queryID->GetLabel(&seqID,labelType);
1340  string seqDescr = CBlastFormatUtil::GetSeqDescrString(*bioseq);
1341  seqDescr = seqDescr.empty() ? "None" : seqDescr;
1343  string molType = s_GetMolType(bhandle);
1345  int length = 0;
1346  if(bioseq->IsSetInst() && bioseq->GetInst().CanGetLength()){
1347  length = bioseq->GetInst().GetLength();
1348  }
1350  CJson_Document doc;
1351  CJson_Object obj = doc.SetObject();
1352  obj.insert("Query",seqID);
1353  obj.insert("Query_descr",seqDescr);
1354  obj.insert("IsQueryLocal",queryID->IsLocal());
1355  obj.insert("Length",NStr::IntToString(length));
1356  obj.insert("Moltype",molType);
1357  obj.insert("Database",m_DbName);
1358  string dbTitle;
1359  try {
1360  CRef<CSeqDB> seqdb;
1362  dbTitle = seqdb->GetTitle();
1363  }
1364  catch (...) {/*ignore exceptions for now*/}
1365  obj.insert("Database_descr",dbTitle);
1366  obj.insert("IsDBProtein",m_DbIsAA);
1367  obj.insert("Program",m_Program);
1370  if (results.HasErrors()) {
1371  obj.insert("Error",results.GetErrorStrings());
1372  }
1373  if (results.HasWarnings()) {
1374  obj.insert("Warning",results.GetWarningStrings());
1375  }
1376  doc.Write(m_Outfile);
1377  }
1378  else {
1379  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1380  _ASSERT(results.HasAlignments());
1381  if (m_IsUngappedSearch) {
1383  }
1385  if (displayOption == eDescriptionsWithTemplates) {//Descriptions with html templates
1387  }
1388  if (displayOption == eDescriptions) {//Descriptions with html templates
1389  x_CreateDeflinesJson(aln_set);
1390  }
1391  else if (displayOption == eAlignments) {// print the alignments with html templates
1393  }
1394  }
1395 }
1397 void
1398 CBlastFormat::PrintOneResultSet(const blast::CSearchResults& results,
1400  unsigned int itr_num
1401  /* = numeric_limits<unsigned int>::max() */,
1402  blast::CPsiBlastIterationState::TSeqIds prev_seqids
1403  /* = CPsiBlastIterationState::TSeqIds() */,
1404  bool is_deltablast_domain_result /* = false */)
1405 {
1406  // For remote searches, we don't retrieve the sequence data for the query
1407  // sequence when initially sending the request to the BLAST server (if it's
1408  // a GI/accession/TI), so we flush the scope so that it can be retrieved
1409  // (needed if a self-hit is found) again. This is not applicable if the
1410  // query sequence(s) are specified as FASTA (will be identified by local
1411  // IDs).
1412  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
1414  }
1416  // Used with tabular output to print number of searches formatted at end.
1428  {
1429  x_PrintStructuredReport(results, queries);
1430  return;
1431  }
1433  if (results.HasErrors()) {
1434  ERR_POST(Error << results.GetErrorStrings());
1435  return; // errors are deemed fatal
1436  }
1437  if (results.HasWarnings()) {
1438  ERR_POST(Warning << results.GetWarningStrings());
1439  }
1444  x_PrintTabularReport(results, itr_num);
1445  return;
1446  }
1448  string reportCaption = "Tax BLAST report";
1450  m_Outfile << reportCaption;
1452  return;
1453  }
1454  const bool kIsTabularOutput = false;
1456  if (is_deltablast_domain_result) {
1457  m_Outfile << "Results from domain search" << "\n";
1458  }
1460  if (itr_num != numeric_limits<unsigned int>::max()) {
1461  m_Outfile << "Results from round " << itr_num << "\n";
1462  }
1464  // other output types will need a bioseq handle
1465  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
1467  // If we're not able to get the query, most likely a bug. SB-981 , GP-2207
1468  if( !bhandle ){
1469  string message = "Failed to resolve SeqId: "+results.GetSeqId()->AsFastaString();
1470  ERR_POST(message);
1471  NCBI_THROW(CException, eUnknown, message);
1472  }
1473  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1475  // print the preamble for this query
1477  m_Outfile << "\n\n";
1478  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
1480  m_IsHTML, kIsTabularOutput,
1481  results.GetRID());
1483  if (m_IsBl2Seq && !m_IsDbScan) {
1484  m_Outfile << "\n";
1485  // FIXME: this might be configurable in the future
1486  const bool kBelieveSubject = false;
1487  CConstRef<CBioseq> subject_bioseq = x_CreateSubjectBioseq();
1488  CBlastFormatUtil::AcknowledgeBlastSubject(*subject_bioseq,
1490  m_Outfile, kBelieveSubject,
1491  m_IsHTML, kIsTabularOutput);
1492  }
1494  // quit early if there are no hits
1495  if ( !results.HasAlignments() ) {
1496  m_Outfile << "\n\n"
1497  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
1498  << "\n\n";
1499  x_PrintOneQueryFooter(*results.GetAncillaryData());
1500  return;
1501  }
1503  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1504  _ASSERT(results.HasAlignments());
1505  if (m_IsUngappedSearch) {
1507  }
1509  //invoke sorting only for m_HitsSortOption > CAlignFormatUtil::eEvalue or m_HspsSortOption > CAlignFormatUtil::eHspEvalue
1510  if(m_HitsSortOption > 0 || m_HspsSortOption > 0) {
1511  aln_set = CBlastFormatUtil::SortSeqalignForSortableFormat(
1512  *(const_cast<CSeq_align_set*>(aln_set.GetPointer())),
1513  (m_Program == "tblastx") ? true : false,
1516  }
1518  const bool kIsGlobal = s_IsGlobalSeqAlign(aln_set);
1520  //-------------------------------------------------
1521  // print 1-line summaries
1522  // Also disable when program is rmblastn. At this time
1523  // we do not want summary bit scores/evalues for this
1524  // program. -RMH-
1525  if ( (!m_IsBl2Seq || m_IsDbScan) && !(m_DisableKAStats || kIsGlobal) ) {
1526  x_DisplayDeflines(aln_set, itr_num, prev_seqids);
1527  }
1529  //-------------------------------------------------
1530  // print the alignments
1531  m_Outfile << "\n";
1533  TMaskedQueryRegions masklocs;
1534  results.GetMaskedQueryRegions(masklocs);
1536  CSeq_align_set copy_aln_set;
1537  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
1540  (m_IsBl2Seq && !m_IsDbScan), (m_DisableKAStats || kIsGlobal));
1542  CDisplaySeqalign display(copy_aln_set, *m_Scope, &masklocs, NULL, m_MatrixName);
1543  display.SetDbName(m_DbName);
1544  display.SetDbType(!m_DbIsAA);
1545  display.SetLineLen(m_LineLength);
1546  int kAlignToShow=2000000000; // Nice large number per SB-1817
1547  display.SetNumAlignToShow(kAlignToShow);
1549  // set the alignment flags
1550  display.SetAlignOption(flags);
1552  if (m_LongSeqId) {
1553  display.UseLongSequenceIds();
1554  }
1556  if (m_Program == "blastn" || m_Program == "megablast") {
1559  }
1560  else {
1563  }
1568  TSeqLocInfoVector subj_masks;
1569  results.GetSubjectMasks(subj_masks);
1570  display.SetSubjectMasks(subj_masks);
1571  display.DisplaySeqalign(m_Outfile);
1573  // print the ancillary data for this query
1575  x_PrintOneQueryFooter(*results.GetAncillaryData());
1576 }
1578 void
1581  SClone& clone_info,
1582  bool fill_clone_info,
1583  bool print_airr_format_header,
1584  int index)
1585 {
1586 = NcbiEmptyString;
1587  clone_info.aa = NcbiEmptyString;
1589  // For remote searches, we don't retrieve the sequence data for the query
1590  // sequence when initially sending the request to the BLAST server (if it's
1591  // a GI/accession/TI), so we flush the scope so that it can be retrieved
1592  // (needed if a self-hit is found) again. This is not applicable if the
1593  // query sequence(s) are specified as FASTA (will be identified by local
1594  // IDs).
1595  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
1597  }
1599  // Used with tabular output to print number of searches formatted at end.
1610  {
1611  x_PrintStructuredReport(results, queries);
1612  return;
1613  }
1615  if (results.HasErrors()) {
1616  ERR_POST(Error << results.GetErrorStrings());
1617  return; // errors are deemed fatal
1618  }
1619  if (results.HasWarnings()) {
1620  ERR_POST(Warning << results.GetWarningStrings());
1621  }
1623  if (results.GetIgAnnotation()->m_MinusStrand) {
1625  }
1626  //set j domain
1627  CRef<CIgAnnotation> & annots_edit = results.SetIgAnnotation();
1628  if (annots_edit->m_JDomain[1] > 0 && annots_edit->m_DomainInfo[9] > 0 &&
1629  annots_edit->m_JDomain[1] > annots_edit->m_DomainInfo[9]){
1630  annots_edit->m_JDomain[0] = annots_edit->m_DomainInfo[9] + 1 ;
1631  //fwr4
1632  if (annots_edit->m_JDomain[3] > 0) {
1633  annots_edit->m_JDomain[2] = annots_edit->m_JDomain[1] + 1 ;
1634  }
1635  }
1641  x_PrintIgTabularReport(results, clone_info, fill_clone_info);
1642  return;
1643  }
1647  if (m_Program == "blastn" || m_Program == "BLASTN") {
1648  x_PrintAirrRearrangement(results, clone_info, fill_clone_info, print_airr_format_header);
1649  } else {
1650  m_Outfile << "The AIRR format is only available for nucleotide sequence search" << endl;
1651  }
1652  return;
1653  }
1656  string reportCaption = "Tax BLAST report";
1658  m_Outfile << reportCaption;
1660  return;
1661  }
1663  const bool kIsTabularOutput = false;
1665  // other output types will need a bioseq handle
1666  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*results.GetSeqId(),
1668  // If this assertion fails, we're not able to get the query, most likely a
1669  // bug
1670  _ASSERT(bhandle);
1671  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
1673  // print the preamble for this query
1675  m_Outfile << "\n\n";
1677  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
1679  m_IsHTML, kIsTabularOutput,
1680  results.GetRID());
1682  // quit early if there are no hits
1683  if ( !results.HasAlignments() ) {
1684  m_Outfile << "\n\n"
1685  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
1686  << "\n\n";
1687  x_PrintOneQueryFooter(*results.GetAncillaryData());
1688  return;
1689  }
1691  CConstRef<CSeq_align_set> aln_set = results.GetSeqAlign();
1692  _ASSERT(results.HasAlignments());
1693  if (m_IsUngappedSearch) {
1695  }
1697  //-------------------------------------------------
1698  // print 1-line summaries
1699  if ( !m_IsBl2Seq ) {
1701  int additional = results.m_NumActualV +results.m_NumActualD + results.m_NumActualJ +
1702  results.m_NumActualC;
1703  x_DisplayDeflines(aln_set, numeric_limits<unsigned int>::max(), prev_ids, additional, index, 100);
1704  }
1706  //-------------------------------------------------
1707  // print the alignments
1708  m_Outfile << "\n";
1710  const CBlastTabularInfo::EFieldDelimiter kDelim =
1717  // print the master alignment
1718  if (results.HasAlignments()) {
1719  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
1720  CSeq_align_set::Tdata::const_iterator itr = aln_set->Get().begin();
1721  tabinfo.SetMasterFields(**itr, *m_Scope,
1722  annots->m_ChainType[0],
1723  annots->m_ChainTypeToShow,
1724  &m_ScoringMatrix);
1725  tabinfo.SetIgAnnotation(annots, m_IgOptions, aln_set, *m_Scope);
1726  if (fill_clone_info) {
1727  s_SetCloneInfo(tabinfo, bhandle, clone_info);
1728  }
1729  m_Outfile << "Domain classification requested: " << m_IgOptions->m_DomainSystem << endl << endl;
1730  if (m_IsHTML) {
1731  tabinfo.PrintHtmlSummary(m_IgOptions);
1732  } else {
1733  tabinfo.PrintMasterAlign(m_IgOptions, "");
1734  }
1735  }
1737  TMaskedQueryRegions masklocs;
1738  results.GetMaskedQueryRegions(masklocs);
1748  }
1750  if (m_IsHTML) {
1753  }
1755  list < CRef<CDisplaySeqalign::DomainInfo> > domain;
1757  string kabat_domain_name[] = {"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "C region"};
1758  string imgt_domain_name[] = {"FR1-IMGT", "CDR1-IMGT", "FR2-IMGT", "CDR2-IMGT", "FR3-IMGT", "CDR3-IMGT", "FR4-IMGT", "C region"};
1759  int domain_name_length = 8;
1760  vector<string> domain_name;
1761  if (m_IgOptions->m_DomainSystem == "kabat") {
1762  for (int i = 0; i < domain_name_length; i ++) {
1763  domain_name.push_back(kabat_domain_name[i]);
1764  }
1765  } else {
1766  for (int i = 0; i < domain_name_length; i ++) {
1767  domain_name.push_back(imgt_domain_name[i]);
1768  }
1769  }
1771  const CRef<CIgAnnotation> & annots = results.GetIgAnnotation();
1773  for (int i=0; i<9; i = i + 2) {
1774  if (annots->m_DomainInfo[i] >= 0){
1776  int start = annots->m_DomainInfo[i];
1777  int subject_start = annots->m_DomainInfo_S[i];
1779  int stop = annots->m_DomainInfo[i+1];
1780  int subject_stop = annots->m_DomainInfo_S[i+1];
1782  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1783  (CSeq_loc::TPoint) start,
1784  (CSeq_loc::TPoint) stop);
1785  temp->subject_seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(1),
1786  (CSeq_loc::TPoint) subject_start,
1787  (CSeq_loc::TPoint) subject_stop);
1788  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1789  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1790  temp->domain_name = domain_name[i/2];
1791  domain.push_back(temp);
1792  }
1793  }
1795  //J domain
1796  //cdr3
1797  if (annots->m_JDomain[0] > 0 && annots->m_JDomain[1] > 0){
1799  int start = annots->m_JDomain[0];
1800  int subject_start = -1;
1801  int stop = annots->m_JDomain[1];
1802  int subject_stop = -1;
1804  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1805  (CSeq_loc::TPoint) start,
1806  (CSeq_loc::TPoint) stop);
1807  CRef<CSeq_id> id_holder (new CSeq_id);
1808  temp->subject_seqloc = new CSeq_loc(*id_holder,
1809  (CSeq_loc::TPoint) subject_start,
1810  (CSeq_loc::TPoint) subject_stop);
1811  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1812  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1813  temp->domain_name = domain_name[5];
1814  domain.push_back(temp);
1815  }
1816  //fwr4
1817  if (annots->m_JDomain[2] > 0 && annots->m_JDomain[3] > 0){
1819  int start = annots->m_JDomain[2];
1820  int subject_start = -1;
1821  int stop = annots->m_JDomain[3];
1822  int subject_stop = -1;
1824  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1825  (CSeq_loc::TPoint) start,
1826  (CSeq_loc::TPoint) stop);
1827  CRef<CSeq_id> id_holder (new CSeq_id);
1828  temp->subject_seqloc = new CSeq_loc(*id_holder,
1829  (CSeq_loc::TPoint) subject_start,
1830  (CSeq_loc::TPoint) subject_stop);
1831  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1832  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1833  temp->domain_name = domain_name[6];
1834  domain.push_back(temp);
1835  }
1837  //C region
1839  if (annots->m_CDomain[0] > 0 && annots->m_CDomain[1] > 0 &&
1840  annots->m_JDomain[2] > 0 && annots->m_JDomain[3] > 0){
1842  int start = annots->m_CDomain[0];
1843  int subject_start = -1;
1844  int stop = annots->m_CDomain[1];
1845  int subject_stop = -1;
1847  temp->seqloc = new CSeq_loc((CSeq_loc::TId &) aln_set->Get().front()->GetSeq_id(0),
1848  (CSeq_loc::TPoint) start,
1849  (CSeq_loc::TPoint) stop);
1850  CRef<CSeq_id> id_holder (new CSeq_id);
1851  temp->subject_seqloc = new CSeq_loc(*id_holder,
1852  (CSeq_loc::TPoint) subject_start,
1853  (CSeq_loc::TPoint) subject_stop);
1854  temp->is_subject_start_valid = subject_start > 0 ? true:false;
1855  temp->is_subject_stop_valid = subject_stop > 0 ? true:false;
1856  temp->domain_name = domain_name[7];
1857  domain.push_back(temp);
1858  }
1861  CDisplaySeqalign display(*aln_set, *m_Scope, &masklocs, NULL, m_MatrixName);
1862  int num_align_to_show = results.m_NumActualV + results.m_NumActualD +
1863  results.m_NumActualJ + results.m_NumActualC;
1864  if (m_DbName != m_IgOptions->m_Db[0]->GetDatabaseName()){
1865  num_align_to_show += m_NumAlignments;
1866  }
1867  display.SetNumAlignToShow(num_align_to_show);
1868  display.SetMasterDomain(&domain);
1869  display.SetDbName(m_DbName);
1870  display.SetDbType(!m_DbIsAA);
1871  display.SetLineLen(90);
1873  if (m_LongSeqId) {
1874  display.UseLongSequenceIds();
1875  }
1877  if (annots->m_FrameInfo[0] >= 0 && m_IgOptions->m_Translate) {
1880  }
1883  vector<string> chain_type_list;
1884  ITERATE(vector<string>, iter, annots->m_ChainType) {
1885  if (*iter=="N/A"){
1886  chain_type_list.push_back(NcbiEmptyString);
1887  } else {
1888  chain_type_list.push_back(*iter);
1889  }
1890  }
1891  display.SetSequencePropertyLabel(&chain_type_list);
1892  // set the alignment flags
1894  display.SetAlignOption(flags);
1895  if (m_Program == "blastn" || m_Program == "BLASTN") {
1897  } else {
1899  }
1903  TSeqLocInfoVector subj_masks;
1904  results.GetSubjectMasks(subj_masks);
1905  display.SetSubjectMasks(subj_masks);
1907  if (m_IsHTML) {
1908  display.SetResultPositionIndex(index);
1909  m_Outfile << "\n<CENTER><b><FONT color=\"green\">Alignments</FONT></b></CENTER>"
1910  << endl;
1912  } else {
1913  m_Outfile << "\nAlignments" << endl;
1914  }
1916  display.DisplaySeqalign(m_Outfile);
1918  // print the ancillary data for this query
1920  x_PrintOneQueryFooter(*results.GetAncillaryData());
1921  if (m_IsHTML) {
1922  m_Outfile << "<hr>" << endl;
1923  }
1924 }
1926 void
1927 CBlastFormat::x_ReverseQuery(blast::CIgBlastResults& results)
1928 {
1929  if (!results.HasAlignments()){
1930  return;
1931  }
1932  // create a temporary seq_id
1933  CConstRef<CSeq_id> qid = results.GetSeqId();
1934  string new_id = qid->AsFastaString() + "_reversed";
1936  // create a bioseq
1937  CBioseq_Handle q_bh = m_Scope->GetBioseqHandle(*qid);
1938  int len = q_bh.GetBioseqLength();
1939  CSeq_loc loc(*(const_cast<CSeq_id *>(&*qid)), 0, len-1, eNa_strand_minus);
1940  CRef<CBioseq> q_new(new CBioseq(loc, new_id));
1941  CConstRef<CSeq_id> new_qid = m_Scope->AddBioseq(*q_new).GetSeqId();
1942  if (qid->IsLocal()) {
1943  string title = sequence::CDeflineGenerator().GenerateDefline(q_bh);
1944  if (title != "") {
1945  CRef<CSeqdesc> des(new CSeqdesc());
1947  des->SetTitle("reversed|" + title);
1948  } else {
1949  des->SetTitle(title);
1950  }
1951  m_Scope->GetBioseqEditHandle(*q_new).SetDescr().Set().push_back(des);
1952  }
1953  }
1955  // set up the mapping
1956  CSeq_loc new_loc(*(const_cast<CSeq_id *>(&*new_qid)), 0, len-1, eNa_strand_plus);
1957  CSeq_loc_Mapper mapper(loc, new_loc, &*m_Scope);
1959  // replace the alignment with the new query
1960  CRef<CSeq_align_set> align_set(new CSeq_align_set());
1961  ITERATE(CSeq_align_set::Tdata, align, results.GetSeqAlign()->Get()) {
1962  CRef<CSeq_align> new_align = mapper.Map(**align, 0);
1963  align_set->Set().push_back(new_align);
1964  }
1965  results.SetSeqAlign().Reset(&*align_set);
1967  // reverse IgAnnotations
1968  CRef<CIgAnnotation> &annots = results.SetIgAnnotation();
1969  for (int i=0; i<6; i+=2) {
1970  int start = annots->m_GeneInfo[i];
1971  if (start >= 0) {
1972  annots->m_GeneInfo[i] = len - annots->m_GeneInfo[i+1];
1973  annots->m_GeneInfo[i+1] = len - start;
1974  }
1975  }
1977  for (int i=0; i<12; ++i) {
1978  int pos = annots->m_DomainInfo[i];
1979  if (pos >= 0) {
1980  annots->m_DomainInfo[i] = max(0, len - 1 - pos);
1981  }
1982  }
1984  for (int i=0; i<3; ++i) {
1985  int pos = annots->m_FrameInfo[i];
1986  if (pos >= 0) {
1987  annots->m_FrameInfo[i] = len -1 - pos;
1988  }
1989  }
1990 }
1992 void
1993 CBlastFormat::PrintPhiResult(const blast::CSearchResultSet& result_set,
1995  unsigned int itr_num
1996  /* = numeric_limits<unsigned int>::max() */,
1997  blast::CPsiBlastIterationState::TSeqIds prev_seqids
1998  /* = CPsiBlastIterationState::TSeqIds() */)
1999 {
2000  // For remote searches, we don't retrieve the sequence data for the query
2001  // sequence when initially sending the request to the BLAST server (if it's
2002  // a GI/accession/TI), so we flush the scope so that it can be retrieved
2003  // (needed if a self-hit is found) again. This is not applicable if the
2004  // query sequence(s) are specified as FASTA (will be identified by local
2005  // IDs).
2006  if (m_IsRemoteSearch && !s_HasLocalIDs(queries)) {
2008  }
2018  {
2019  ITERATE(CSearchResultSet, result, result_set) {
2020  x_PrintStructuredReport(**result, queries);
2021  }
2022  return;
2023  }
2025  ITERATE(CSearchResultSet, result, result_set) {
2026  if ((**result).HasErrors()) {
2027  m_Outfile << "\n" << (**result).GetErrorStrings() << "\n";
2028  return; // errors are deemed fatal
2029  }
2030  if ((**result).HasWarnings()) {
2031  m_Outfile << "\n" << (**result).GetWarningStrings() << "\n";
2032  }
2033  }
2038  ITERATE(CSearchResultSet, result, result_set) {
2039  x_PrintTabularReport(**result, itr_num);
2040  }
2041  return;
2042  }
2044  string reportCaption = "Tax BLAST report";
2046  m_Outfile << reportCaption;
2047  ITERATE(CSearchResultSet, result, result_set) {
2049  }
2050  return;
2051  }
2053  const CSearchResults& first_results = result_set[0];
2055  if (itr_num != numeric_limits<unsigned int>::max()) {
2056  m_Outfile << "Results from round " << itr_num << "\n";
2057  }
2059  CBioseq_Handle bhandle = m_Scope->GetBioseqHandle(*first_results.GetSeqId(),
2061  CConstRef<CBioseq> bioseq = bhandle.GetBioseqCore();
2063  // print the preamble for this query
2065  m_Outfile << "\n\n";
2066  CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq, kFormatLineLength,
2068  m_IsHTML, false,
2069  first_results.GetRID());
2072  string reportCaption = "Tax BLAST report";
2074  m_Outfile << reportCaption;
2075  ITERATE(CSearchResultSet, result, result_set) {
2077  }
2078  return;
2079  }
2081  const SPHIQueryInfo *phi_query_info = first_results.GetPhiQueryInfo();
2083  if (phi_query_info)
2084  {
2085  vector<int> offsets;
2086  for (int index=0; index<phi_query_info->num_patterns; index++)
2087  offsets.push_back(phi_query_info->occurrences[index].offset);
2089  CBlastFormatUtil::PrintPhiInfo(phi_query_info->num_patterns,
2090  string(phi_query_info->pattern),
2091  phi_query_info->probability,
2092  offsets, m_Outfile);
2093  }
2095  // quit early if there are no hits
2096  if ( !first_results.HasAlignments() ) {
2097  m_Outfile << "\n\n"
2098  << "***** " << CBlastFormatUtil::kNoHitsFound << " *****" << "\n"
2099  << "\n\n";
2100  x_PrintOneQueryFooter(*first_results.GetAncillaryData());
2101  return;
2102  }
2104  _ASSERT(first_results.HasAlignments());
2105  //-------------------------------------------------
2107  ITERATE(CSearchResultSet, result, result_set)
2108  {
2109  CConstRef<CSeq_align_set> aln_set = (**result).GetSeqAlign();
2110  x_DisplayDeflines(aln_set, itr_num, prev_seqids);
2111  }
2113  //-------------------------------------------------
2114  // print the alignments
2115  m_Outfile << "\n";
2119  (m_IsBl2Seq && !m_IsDbScan), false);
2121  if (phi_query_info)
2122  {
2123  SPHIPatternInfo *occurrences = phi_query_info->occurrences;
2124  int index;
2125  for (index=0; index<phi_query_info->num_patterns; index++)
2126  {
2127  list <CDisplaySeqalign::FeatureInfo*> phiblast_pattern;
2128  CSeq_id* id = new CSeq_id;
2129  id->Assign(*(result_set[index]).GetSeqId());
2131  feature_info->seqloc = new CSeq_loc(*id, (TSeqPos) occurrences[index].offset,
2132  (TSeqPos) (occurrences[index].offset + occurrences[index].length - 1));
2133  feature_info->feature_char = '*';
2134  feature_info->feature_id = "pattern";
2135  phiblast_pattern.push_back(feature_info);
2137  m_Outfile << "\nSignificant alignments for pattern occurrence " << index+1
2138  << " at position " << 1+occurrences[index].offset << "\n\n";
2140  TMaskedQueryRegions masklocs;
2141  result_set[index].GetMaskedQueryRegions(masklocs);
2142  CConstRef<CSeq_align_set> aln_set = result_set[index].GetSeqAlign();
2143  CSeq_align_set copy_aln_set;
2144  CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set, m_NumAlignments);
2146  CDisplaySeqalign display(copy_aln_set, *m_Scope, &masklocs, &phiblast_pattern,
2147  m_MatrixName);
2149  display.SetDbName(m_DbName);
2150  display.SetDbType(!m_DbIsAA);
2151  display.SetLineLen(m_LineLength);
2153  // set the alignment flags
2154  display.SetAlignOption(flags);
2156  if (m_LongSeqId) {
2157  display.UseLongSequenceIds();
2158  }
2160  if (m_Program == "blastn" || m_Program == "megablast") {
2163  }
2164  else {
2167  }
2172  display.DisplaySeqalign(m_Outfile);
2173  m_Outfile << "\n";
2175  NON_CONST_ITERATE(list<CDisplaySeqalign::FeatureInfo*>, itr, phiblast_pattern) {
2176  delete *itr;
2177  }
2178  }
2179  }
2181  // print the ancillary data for this query
2183  x_PrintOneQueryFooter(*first_results.GetAncillaryData());
2184 }
2188 void
2189 CBlastFormat::PrintEpilog(const blast::CBlastOptions& options)
2190 {
2193  if(!m_AccumulatedResults.empty()) {
2194  CRef <CBlastSearchQuery> q = m_AccumulatedQueries->GetBlastSearchQuery(0);
2195  if(m_IsBl2Seq) {
2198  x_WriteXML2(report_data);
2199  }
2200  else if(m_IsIterative){
2202  m_Scope, m_DbInfo);
2203  x_WriteXML2(report_data);
2204  }
2205  m_AccumulatedResults.clear();
2206  m_AccumulatedQueries->clear();
2207  }
2211  }
2212  else {
2214  }
2215  return;
2216  }
2221  return;
2222  } else if (m_FormatType >= CFormattingArgs::eTabular)
2223  return; // No footer for these.
2225  // Most of XML is printed as it's finished.
2226  // the epilog closes the report.
2229  m_AccumulatedResults.clear();
2230  m_AccumulatedQueries->clear();
2231  return;
2232  }
2234  m_Outfile << NcbiEndl << NcbiEndl;
2235  if (m_Program == "deltablast" && !m_DomainDbInfo.empty()) {
2236  m_Outfile << "Conserved Domain";
2237  CBlastFormatUtil::PrintDbReport(m_DomainDbInfo, kFormatLineLength,
2238  m_Outfile, false);
2239  }
2241  if ( !m_IsBl2Seq || m_IsDbScan) {
2242  CBlastFormatUtil::PrintDbReport(m_DbInfo, kFormatLineLength,
2243  m_Outfile, false);
2244  }
2246  if (m_Program == "blastn" || m_Program == "megablast") {
2247  m_Outfile << "\n\nMatrix: " << "blastn matrix " <<
2248  options.GetMatchReward() << " " <<
2249  options.GetMismatchPenalty() << "\n";
2250  }
2251  else {
2252  m_Outfile << "\n\nMatrix: " << options.GetMatrixName() << "\n";
2253  }
2255  if (options.GetGappedMode() == true) {
2256  double gap_extension = (double) options.GetGapExtensionCost();
2257  if ((m_Program == "megablast" || m_Program == "blastn") && options.GetGapExtensionCost() == 0)
2258  { // Formula from PMID 10890397 applies if both gap values are zero.
2259  gap_extension = -2*options.GetMismatchPenalty() + options.GetMatchReward();
2260  gap_extension /= 2.0;
2261  }
2262  m_Outfile << "Gap Penalties: Existence: "
2263  << options.GetGapOpeningCost() << ", Extension: "
2264  << gap_extension << "\n";
2265  }
2266  if (options.GetWordThreshold()) {
2267  m_Outfile << "Neighboring words threshold: " <<
2268  options.GetWordThreshold() << "\n";
2269  }
2270  if (options.GetWindowSize()) {
2271  m_Outfile << "Window for multiple hits: " <<
2272  options.GetWindowSize() << "\n";
2273  }
2275  if (m_IsHTML) {
2276  m_Outfile << kHTML_Suffix << "\n";
2277  }
2278 }
2281 {
2282  // Do not reset the scope for BLAST2Sequences or else we'll loose the
2283  // sequence data! (see x_CreateSubjectBioseq)
2284  if (m_IsBl2Seq){
2285  return;
2286  }
2288  // Our current XML/ASN.1 libraries do not have provisions for
2289  // incremental object input/output, so with XML output format we
2290  // need to accumulate the whole document before writing any data.
2292  // This means that XML output requires more memory than other
2293  // output formats.
2296  {
2298  }
2299 }
2301 static string s_GetBaseName(const string & baseFile, bool isXML, bool withPath)
2302 {
2303  string dir = kEmptyStr;
2304  string base = kEmptyStr;
2305  string ext = kEmptyStr;
2306  CDirEntry::SplitPath(baseFile, withPath ? &dir:NULL, &base, &ext );
2307  if(!((isXML && NStr::CompareNocase(ext, ".xml") == 0 ) ||
2308  (!isXML && NStr::CompareNocase(ext, ".json") == 0))){
2309  base += ext;
2310  }
2311  if(withPath)
2312  return dir + base;
2314  return base;
2315 }
2318 {
2320  BlastXML2_FormatReport(&report_data, &m_Outfile);
2321  }
2322  else if (m_FormatType == CFormattingArgs::eJson_S) {
2323  m_XMLFileCount++;
2324  if(m_XMLFileCount > 1) {
2325  m_Outfile << ",\n";
2326  }
2327  BlastJSON_FormatReport(&report_data, &m_Outfile);
2328  }
2329  else {
2330  m_XMLFileCount++;
2333  string file_name = s_GetBaseName(m_BaseFile, true, true) + "_" + NStr::IntToString(m_XMLFileCount) + ".xml";
2334  BlastXML2_FormatReport(&report_data, file_name);
2335  }
2336  else {
2337  string file_name = s_GetBaseName(m_BaseFile, false, true) + "_" + NStr::IntToString(m_XMLFileCount) + ".json";
2338  BlastJSON_FormatReport(&report_data, file_name);
2339  }
2340  }
2341 }
2343 void CBlastFormat::x_PrintXML2Report(const blast::CSearchResults& results,
2345 {
2346  CRef<CSearchResults> res(const_cast<CSearchResults*>(&results));
2347  res->TrimSeqAlign(m_HitlistSize);
2348  if((m_IsIterative) || (m_IsBl2Seq)) {
2349  if(m_AccumulatedResults.empty()) {
2350  _ASSERT(m_AccumulatedQueries->size() == 0);
2351  m_AccumulatedResults.push_back(res);
2352  CConstRef<CSeq_id> query_id = results.GetSeqId();
2353  ITERATE(CBlastQueryVector, itr, *queries) {
2354  if (query_id->Match(*(*itr)->GetQueryId())) {
2355  m_AccumulatedQueries->push_back(*itr);
2356  break;
2357  }
2358  }
2359  }
2360  else {
2361  CConstRef<CSeq_id> query_id = results.GetSeqId();
2362  if(m_AccumulatedResults[0].GetSeqId()->Match(*query_id)) {
2363  m_AccumulatedResults.push_back(res);
2364  }
2365  else {
2366  CRef <CBlastSearchQuery> q = m_AccumulatedQueries->GetBlastSearchQuery(0);
2367  if(m_IsBl2Seq) {
2370  x_WriteXML2(report_data);
2371  }
2372  else {
2374  m_Scope, m_DbInfo);
2375  x_WriteXML2(report_data);
2376  }
2377  m_AccumulatedResults.clear();
2378  m_AccumulatedQueries->clear();
2380  m_AccumulatedResults.push_back(res);
2381  ITERATE(CBlastQueryVector, itr, *queries) {
2382  if (query_id->Match(*(*itr)->GetQueryId())) {
2383  m_AccumulatedQueries->push_back(*itr);
2384  break;
2385  }
2386  }
2387  }
2388  }
2389  }
2390  else {
2392  CConstRef<CSeq_id> query_id = results.GetSeqId();
2393  ITERATE(CBlastQueryVector, itr, *queries) {
2394  if (query_id->Match(*(*itr)->GetQueryId())) {
2395  q = *itr;
2396  break;
2397  }
2398  }
2399  CCmdLineBlastXML2ReportData report_data (q, *res, m_Options, m_Scope, m_DbInfo);
2400  x_WriteXML2(report_data);
2401  }
2402 }
2405 {
2407  m_Outfile << "</BlastXML2>\n";
2408  return;
2409  }
2411  m_Outfile << "<?xml version=\"1.0\"?>\n<BlastXML2\n"
2412  "xmlns=\"\"\n"
2413  "xmlns:xi=\"\"\n"
2414  "xmlns:xs=\"\"\n"
2415  "xs:schemaLocation=\"\">\n";
2417  string base = s_GetBaseName(m_BaseFile, true, false);
2418  for(int i = 1; i <= m_XMLFileCount; i ++) {
2419  string file_name = base + "_" + NStr::IntToString(i) + ".xml";
2420  m_Outfile << "\t<xi:include href=\"" + file_name + "\"/>\n";
2421  }
2422  m_Outfile << "</BlastXML2>\n";
2423 }
2426 {
2428  m_Outfile << "]\n}\n";
2429  return;
2430  }
2432  m_Outfile << "{\n\t\"BlastJSON\": [\n";
2434  string base = s_GetBaseName(m_BaseFile, true, false);
2435  for(int i = 1; i <= m_XMLFileCount; i ++) {
2436  string file_name = base + "_" + NStr::IntToString(i) + ".json";
2437  m_Outfile << "\t\t{\"File\": \"" + file_name + "\" }";
2438  if(i != m_XMLFileCount)
2439  m_Outfile << ",";
2440  m_Outfile << "\n";
2441  }
2442  m_Outfile << "\t]\n}";
2443 }
2446 {
2447  CSAM_Formatter::SProgramInfo pg("0", blast::CBlastVersion().Print(), m_Cmdline);
2448  pg.m_Name = m_Program;
2451 }
2454 {
2455  if (program == eBlastp || program == eTblastn ||
2456  program == ePSIBlast || program == ePSITblastn ||
2457  program == eRPSBlast || program == eRPSTblastn ||
2458  program == eBlastx || program == eDeltaBlast) {
2459  return true;
2460  }
2461  return false;
2462 }
2465 {
2466  if (report.IsEnabled()) {
2468  EProgram task = m_Options->GetProgram();
2469  report.AddParam(CBlastUsageReport::eEvalueThreshold, m_Options->GetEvalueThreshold());
2470  report.AddParam(CBlastUsageReport::eHitListSize, m_Options->GetHitlistSize());
2473  if (s_SetCompBasedStats(task)) {
2474  report.AddParam(CBlastUsageReport::eCompBasedStats, m_Options->GetCompositionBasedStats());
2475  }
2477  int num_seqs = 0;
2478  for (size_t i = 0; i < m_DbInfo.size(); i++) {
2479  num_seqs += m_DbInfo[i].number_seqs;
2480  }
2481  if( m_IsBl2Seq) {
2482  report.AddParam(CBlastUsageReport::eBl2seq, "true");
2483  if (m_IsDbScan) {
2484  report.AddParam(CBlastUsageReport::eNumSubjects, num_seqs);
2486  }
2487  else if (m_SeqInfoSrc.NotEmpty()){
2488  report.AddParam(CBlastUsageReport::eNumSubjects, (int) m_SeqInfoSrc->Size());
2489  int total_subj_length = 0;
2490  for (size_t i = 0; i < m_SeqInfoSrc->Size(); i++) {
2491  total_subj_length += (int)m_SeqInfoSrc->GetLength(static_cast<Uint4>(i));
2492  }
2493  report.AddParam(CBlastUsageReport::eSubjectsLength, total_subj_length);
2494  }
2495  }
2496  else {
2497  string dir = kEmptyStr;
2498  CFile::SplitPath(m_DbName, &dir);
2499  string db_name = m_DbName;
2500  if (dir != kEmptyStr) {
2501  db_name = m_DbName.substr(dir.length());
2502  }
2504  if (db_name.size() > 500) {
2505  db_name.resize(500);
2507  }
2508  report.AddParam(CBlastUsageReport::eDBName, db_name);
2510  report.AddParam(CBlastUsageReport::eDBNumSeqs, num_seqs);
2511  report.AddParam(CBlastUsageReport::eDBDate, m_DbInfo[0].date);
2512  if(m_SearchDb.NotEmpty()){
2513  if(m_SearchDb->GetGiList().NotEmpty()) {
2514  CRef<CSeqDBGiList> l = m_SearchDb->GetGiList();
2515  if (l->GetNumGis()) {
2516  report.AddParam(CBlastUsageReport::eGIList, true);
2517  }
2518  if (l->GetNumSis()){
2520  }
2521  if (l->GetNumTaxIds()){
2523  }
2524  if (l->GetNumPigs()) {
2525  report.AddParam(CBlastUsageReport::eIPGList, true);
2526  }
2527  }
2528  if(m_SearchDb->GetNegativeGiList().NotEmpty()) {
2529  CRef<CSeqDBGiList> l = m_SearchDb->GetNegativeGiList();
2530  if (l->GetNumGis()) {
2532  }
2533  if (l->GetNumSis()){
2535  }
2536  if (l->GetNumTaxIds()){
2538  }
2539  if (l->GetNumPigs()) {
2541  }
2542  }
2543  }
2544  }
2545  }
2546 }
