NCBI C++ ToolKit
blast_app_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_app_util.cpp 100888 2023-09-25 14:06:54Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_app_util.cpp
31  * Utility functions for BLAST command line applications
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include "blast_app_util.hpp"
36 
37 #include <serial/serial.hpp>
38 #include <serial/objostr.hpp>
39 
42 #include <algo/blast/api/objmgr_query_data.hpp> // for CObjMgr_QueryFactory
45 #include <algo/blast/blastinput/blast_input.hpp> // for CInputException
49 #include <objmgr/util/sequence.hpp>
50 
52 #include <serial/typeinfo.hpp> // for CTypeInfo, needed by SerialClone
56 
57 #if defined(NCBI_OS_LINUX) && HAVE_MALLOC_H
58 #include <malloc.h>
59 #endif
60 
63 USING_SCOPE(blast);
64 
66 {
67  if (hits >= 0) {
68  double ratio = 1.0 * (hits+1) / m_BatchSize;
69  m_Ratio = (m_Ratio < 0) ? ratio
70  : k_MixIn * ratio + (1.0 - k_MixIn) * m_Ratio;
71  m_BatchSize = (Int4) (1.0 * m_TargetHits / m_Ratio);
72  }
75  m_Ratio = -1.0;
76  } else if (m_BatchSize < k_MinBatchSize) {
78  m_Ratio = -1.0;
79  }
80  return m_BatchSize;
81 }
82 
87  bool verbose_output,
88  const string& client_id /* = kEmptyStr */,
90  /* = CRef<objects::CPssmWithParameters>() */)
91 {
92  _ASSERT(queries || pssm);
93  _ASSERT(db_args);
94  _ASSERT(opts_hndl);
95 
96  CRef<CRemoteBlast> retval;
97 
98  CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
99  if (search_db.NotEmpty()) {
100  if (pssm.NotEmpty()) {
101  _ASSERT(queries.Empty());
102  retval.Reset(new CRemoteBlast(pssm, opts_hndl, *search_db));
103  } else {
104  retval.Reset(new CRemoteBlast(queries, opts_hndl, *search_db));
105  }
106  } else {
107  if (pssm.NotEmpty()) {
108  NCBI_THROW(CInputException, eInvalidInput,
109  "Remote PSI-BL2SEQ is not supported");
110  } else {
111  // N.B.: there is NO scope needed in the GetSubjects call because
112  // the subjects (if any) should have already been added in
113  // InitializeSubject
114  retval.Reset(new CRemoteBlast(queries, opts_hndl,
115  db_args->GetSubjects()));
116  }
117  }
118  if (verbose_output) {
119  retval->SetVerbose();
120  }
121  if (client_id != kEmptyStr) {
122  retval->SetClientId(client_id);
123  }
124  return retval;
125 }
126 
127 blast::SDataLoaderConfig
129  CRef<blast::CLocalDbAdapter> db_adapter)
130 {
131  SDataLoaderConfig retval(query_is_protein);
133 
134  /* Load the BLAST database into the data loader configuration for the query
135  * so that if the query sequence(s) are specified as seq-ids, these can be
136  * fetched from the BLAST database being searched */
137  if (db_adapter->IsBlastDb() && /* this is a BLAST database search */
138  retval.m_UseBlastDbs && /* the BLAST database data loader is requested */
139  (query_is_protein == db_adapter->IsProtein())) { /* the same database type is used for both queries and subjects */
140  // Make sure we don't add the same database more than once
141  vector<string> default_dbs;
142  NStr::Split(retval.m_BlastDbName, " ", default_dbs);
143  if (default_dbs.size() &&
144  (find(default_dbs.begin(), default_dbs.end(),
145  db_adapter->GetDatabaseName()) == default_dbs.end())) {
146  CNcbiOstrstream oss;
147  oss << db_adapter->GetDatabaseName() << " " << retval.m_BlastDbName;
149  }
150  }
151  if (retval.m_UseBlastDbs) {
152  _TRACE("Initializing query data loader to '" << retval.m_BlastDbName
153  << "' (" << (query_is_protein ? "protein" : "nucleotide")
154  << " BLAST database)");
155  }
156  if (retval.m_UseGenbank) {
157  _TRACE("Initializing query data loader to use GenBank data loader");
158  }
159  return retval;
160 }
161 
162 void
165  bool is_remote_search,
166  CRef<blast::CLocalDbAdapter>& db_adapter,
167  CRef<objects::CScope>& scope)
168 {
169  string dl = kEmptyStr;
170  db_adapter.Reset();
171 
172  _ASSERT(db_args.NotEmpty());
173  CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
174 
175  // Initialize the scope...
176  if (is_remote_search) {
177  const bool is_protein =
178  Blast_SubjectIsProtein(opts_hndl->GetOptions().GetProgramType())
179  ? true : false;
180  CRef<CBlastScopeSource> scope_src;
181  if (search_db.Empty()) {
182  SDataLoaderConfig config(is_protein);
183  scope_src.Reset(new CBlastScopeSource(config));
184  }
185  else {
186  SDataLoaderConfig config(search_db->GetDatabaseName(), is_protein);
187  scope_src.Reset(new CBlastScopeSource(config));
188  }
189  // configure scope to fetch sequences remotely for formatting
190  if (scope.NotEmpty()) {
191  scope_src->AddDataLoaders(scope);
192  } else {
193  scope = scope_src->NewScope();
194  }
195  } else {
196  if (scope.Empty()) {
198  }
199  }
200  _ASSERT(scope.NotEmpty());
201 
202  // ... and then the subjects
203  CRef<IQueryFactory> subjects;
204  if ( (subjects = db_args->GetSubjects(scope)) ) {
205  _ASSERT(search_db.Empty());
206  char* bl2seq_legacy = getenv("BL2SEQ_LEGACY");
207  if (bl2seq_legacy)
208  db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, false));
209  else
210  db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, true));
211  } else {
212  _ASSERT(search_db.NotEmpty());
213  try {
214  // Try to open the BLAST database even for remote searches, as if
215  // it is available locally, it will be better to fetch the
216  // sequence data for formatting from this (local) source
217  CRef<CSeqDB> seqdb = search_db->GetSeqDb();
218  db_adapter.Reset(new CLocalDbAdapter(*search_db));
219  dl = RegisterOMDataLoader(seqdb);
220  scope->AddDataLoader(dl);
221  } catch (const CSeqDBException&) {
222  // The BLAST database couldn't be found, report this for local
223  // searches, but for remote searches go on.
224  dl = kEmptyStr;
225  if (is_remote_search ) {
226  db_adapter.Reset(new CLocalDbAdapter(*search_db));
227  } else {
228  throw;
229  }
230  }
231  }
232 
233  /// Set the BLASTDB data loader as the default data loader (if applicable)
234  if (search_db.NotEmpty()) {
235  if ( dl != kEmptyStr) {
236  // FIXME: will this work with multiple BLAST DBs?
237  scope->AddDataLoader(dl, CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
238  _TRACE("Setting " << dl << " priority to "
240  << " for subjects");
241  }
242  }
243  return;
244 }
245 
247 {
248  // the blast formatter requires that the database coexist in
249  // the same scope with the query sequences
254  CBlastDbDataLoader::SBlastDbParam param(db_handle);
255  string retval(CBlastDbDataLoader::GetLoaderNameFromArgs(param));
256  _TRACE("Registering " << retval << " at priority " <<
258  << " for subjects");
259  return retval;
260 }
261 
262 
267  const string& client_id /* = kEmptyStr */,
269  /* = CRef<objects::CPssmWithParameters>() */,
270  unsigned int num_iters
271  /* = 0 */)
272 {
273  _ASSERT(queries || pssm);
274  _ASSERT(db_args);
275  _ASSERT(opts_hndl);
276 
277  CRef<CExportStrategy> retval;
278 
279  CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
280  if (search_db.NotEmpty())
281  {
282  if (pssm.NotEmpty())
283  {
284  _ASSERT(queries.Empty());
285  if(num_iters != 0)
286  retval.Reset(new blast::CExportStrategy(pssm, opts_hndl, search_db, client_id, num_iters));
287  else
288  retval.Reset(new blast::CExportStrategy(pssm, opts_hndl, search_db, client_id));
289  }
290  else
291  {
292  if(num_iters != 0)
293  retval.Reset(new blast::CExportStrategy(queries, opts_hndl, search_db, client_id, num_iters));
294  else
295  retval.Reset(new blast::CExportStrategy(queries, opts_hndl, search_db, client_id));
296  }
297  }
298  else
299  {
300  if (pssm.NotEmpty())
301  {
302  NCBI_THROW(CInputException, eInvalidInput,
303  "Remote PSI-BL2SEQ is not supported");
304  }
305  else
306  {
307  retval.Reset(new blast::CExportStrategy(queries, opts_hndl,
308  db_args->GetSubjects(), client_id));
309  }
310  }
311 
312  return retval;
313 }
314 
315 
316 /// Real implementation of search strategy extraction
317 /// @todo refactor this code so that it can be reused in other contexts
318 static void
321  CRef<blast::CBlastOptionsHandle> options_handle,
324  /* = CRef<objects::CPssmWithParameters>() */
325  unsigned int num_iters /* = 0 */)
326 {
327  if ( !out )
328  return;
329 
330  _ASSERT(db_args);
331  _ASSERT(options_handle);
332 
333  try
334  {
335  CRef<CExportStrategy> export_strategy =
336  s_InitializeExportStrategy(queries, db_args, options_handle,
337  kEmptyStr, pssm, num_iters);
338  export_strategy->ExportSearchStrategy_ASN1(out);
339  }
340  catch (const CBlastException& e)
341  {
343  NCBI_THROW(CInputException, eInvalidInput,
344  "Saving search strategies with gi lists is currently "
345  "not supported");
346  }
347  throw;
348  }
349 }
350 
351 /// Converts a list of Bioseqs into a TSeqLocVector. All Bioseqs are added to
352 /// the same CScope object
353 /// @param subjects Bioseqs to convert
354 static TSeqLocVector
356 {
357  TSeqLocVector retval;
358  CRef<CScope> subj_scope(new CScope(*CObjectManager::GetInstance()));
359  ITERATE(CBlast4_subject::TSequences, bioseq, subjects) {
360  subj_scope->AddBioseq(**bioseq);
361  CRef<CSeq_id> seqid = FindBestChoice((*bioseq)->GetId(),
363  const TSeqPos length = (*bioseq)->GetInst().GetLength();
364  CRef<CSeq_loc> sl(new CSeq_loc(*seqid, 0, length-1));
365  retval.push_back(SSeqLoc(sl, subj_scope));
366  }
367  return retval;
368 }
369 
370 /// Import PSSM into the command line arguments object
371 static void
374  blast::CBlastAppArgs* cmdline_args)
375 {
377  (const_cast<CPssmWithParameters*>(&queries.GetPssm()));
378  CPsiBlastAppArgs* psi_args = NULL;
379  CTblastnAppArgs* tbn_args = NULL;
380 
381  if ( (psi_args = dynamic_cast<CPsiBlastAppArgs*>(cmdline_args)) ) {
382  psi_args->SetInputPssm(pssm);
383  } else if ( (tbn_args =
384  dynamic_cast<CTblastnAppArgs*>(cmdline_args))) {
385  tbn_args->SetInputPssm(pssm);
386  } else {
387  EBlastProgramType p = opts_hndl->GetOptions().GetProgramType();
388  string msg("PSSM found in saved strategy, but not supported ");
389  msg += "for " + Blast_ProgramNameFromType(p);
390  NCBI_THROW(CBlastException, eNotSupported, msg);
391  }
392 }
393 
394 /// Import queries into command line arguments object
395 static void
398  blast::CBlastAppArgs* cmdline_args)
399 {
401 
402  // Stuff the query bioseq or seqloc list in the input stream of the
403  // cmdline_args
404  if (queries.IsSeq_loc_list()) {
405  const CBlast4_queries::TSeq_loc_list& seqlocs =
406  queries.GetSeq_loc_list();
409 
410  EBlastProgramType prog = opts_hndl->GetOptions().GetProgramType();
413  CBlastScopeSource scope_src(dlconfig);
414  CRef<CScope> scope(scope_src.NewScope());
415 
416  ITERATE(CBlast4_queries::TSeq_loc_list, itr, seqlocs) {
417  if ((*itr)->GetId()) {
418  CBioseq_Handle bh = scope->GetBioseqHandle(*(*itr)->GetId());
419  out.Write(bh);
420  }
421  }
422  scope.Reset();
423  scope_src.RevokeBlastDbDataLoader();
424 
425  } else {
426  _ASSERT(queries.IsBioseq_set());
427  const CBlast4_queries::TBioseq_set& bioseqs =
428  queries.GetBioseq_set();
431 
432  ITERATE(CBioseq_set::TSeq_set, seq_entry, bioseqs.GetSeq_set()){
433  out.Write(**seq_entry);
434  }
435  }
436 
437  const string& fname = tmpfile->GetFileName();
438  tmpfile.Reset(new CTmpFile(fname));
439  cmdline_args->SetInputStream(tmpfile);
440 }
441 
442 /// Import the database and return it in a CBlastDatabaseArgs object
445  CBlastOptionsBuilder& opts_builder,
446  bool subject_is_protein,
447  bool is_remote_search)
448 {
449  _ASSERT(subj.IsDatabase());
451  const CSearchDatabase::EMoleculeType mol = subject_is_protein
454  const string dbname(subj.GetDatabase());
455  CRef<CSearchDatabase> search_db(new CSearchDatabase(dbname, mol));
456 
457  if (opts_builder.HaveEntrezQuery()) {
458  string limit(opts_builder.GetEntrezQuery());
459  search_db->SetEntrezQueryLimitation(limit);
460  if ( !is_remote_search ) {
461  string msg("Entrez query '");
462  msg += limit + string("' will not be processed locally.\n");
463  msg += string("Please use the -remote option.");
464  throw runtime_error(msg);
465  }
466  }
467 
468  if (opts_builder.HaveGiList() || opts_builder.HaveTaxidList()) {
469  CSeqDBGiList *gilist = new CSeqDBGiList();
470  if (opts_builder.HaveGiList()) {
471  ITERATE(list<TGi>, gi, opts_builder.GetGiList()) {
472  gilist->AddGi(*gi);
473  }
474  }
475  if (opts_builder.HaveTaxidList()) {
476  list<TTaxId> list = opts_builder.GetTaxidList();
477  set<TTaxId> taxids(list.begin(), list.end());
478  gilist->AddTaxIds(taxids);
479  }
480  search_db->SetGiList(gilist);
481  }
482 
483  if (opts_builder.HaveNegativeGiList() || opts_builder.HaveNegativeTaxidList()) {
484  CSeqDBGiList *gilist = new CSeqDBGiList();
485  if (opts_builder.HaveNegativeGiList()) {
486  ITERATE(list<TGi>, gi, opts_builder.GetNegativeGiList()) {
487  gilist->AddGi(*gi);
488  }
489  }
490  if (opts_builder.HaveNegativeTaxidList()) {
491  list<TTaxId> list = opts_builder.GetNegativeTaxidList();
492  set<TTaxId> taxids(list.begin(), list.end());
493  gilist->AddTaxIds(taxids);
494  }
495  search_db->SetNegativeGiList(gilist);
496  }
497 
498  if (opts_builder.HasDbFilteringAlgorithmKey()) {
499  string algo_key = opts_builder.GetDbFilteringAlgorithmKey();
501  if(opts_builder.HasSubjectMaskingType())
502  mask_type = opts_builder.GetSubjectMaskingType();
503  search_db->SetFilteringAlgorithm(algo_key, mask_type);
504 
505  } else if (opts_builder.HasDbFilteringAlgorithmId()) {
506  int algo_id = opts_builder.GetDbFilteringAlgorithmId();
508  if(opts_builder.HasSubjectMaskingType())
509  mask_type = opts_builder.GetSubjectMaskingType();
510  search_db->SetFilteringAlgorithm(algo_id, mask_type);
511  }
512 
513  db_args->SetSearchDatabase(search_db);
514  return db_args;
515 }
516 
517 /// Import the subject sequences into a CBlastDatabaseArgs object
519 s_ImportSubjects(const CBlast4_subject& subj, bool subject_is_protein)
520 {
521  _ASSERT(subj.IsSequences());
523  TSeqLocVector subjects =
525  CRef<CScope> subj_scope = subjects.front().scope;
526  CRef<IQueryFactory> subject_factory(new CObjMgr_QueryFactory(subjects));
527  db_args->SetSubjects(subject_factory, subj_scope, subject_is_protein);
528  return db_args;
529 }
530 
531 /// Imports search strategy, using CImportStrategy.
532 static void
534  blast::CBlastAppArgs* cmdline_args,
535  bool is_remote_search,
536  bool override_query,
537  bool override_subject)
538 {
539  if ( !in ) {
540  return;
541  }
542 
543  CRef<CBlast4_request> b4req;
544  try {
545  b4req = ExtractBlast4Request(*in);
546  } catch (const CSerialException&) {
547  NCBI_THROW(CInputException, eInvalidInput,
548  "Failed to read search strategy");
549  }
550 
551  CImportStrategy strategy(b4req);
552 
553  CRef<blast::CBlastOptionsHandle> opts_hndl = strategy.GetOptionsHandle();
554  cmdline_args->SetOptionsHandle(opts_hndl);
555  const EBlastProgramType prog = opts_hndl->GetOptions().GetProgramType();
556  cmdline_args->SetTask(strategy.GetTask());
557 #if _DEBUG
558  {
559  char* program_string = 0;
560  BlastNumber2Program(prog, &program_string);
561  _TRACE("EBlastProgramType=" << program_string << " task=" << strategy.GetTask());
562  sfree(program_string);
563  }
564 #endif
565 
566  // Get the subject
567  if (override_subject) {
568  ERR_POST(Warning << "Overriding database/subject in saved strategy");
569  } else {
571  CRef<CBlast4_subject> subj = strategy.GetSubject();
572  const bool subject_is_protein = Blast_SubjectIsProtein(prog) ? true : false;
573 
574  if (subj->IsDatabase()) {
575  db_args = s_ImportDatabase(*subj, strategy.GetOptionsBuilder(),
576  subject_is_protein, is_remote_search);
577  } else {
578  db_args = s_ImportSubjects(*subj, subject_is_protein);
579  }
580  _ASSERT(db_args.NotEmpty());
581  cmdline_args->SetBlastDatabaseArgs(db_args);
582  }
583 
584  // Get the query, queries, or pssm
585  if (override_query) {
586  ERR_POST(Warning << "Overriding query in saved strategy");
587  } else {
588  CRef<CBlast4_queries> queries = strategy.GetQueries();
589  if (queries->IsPssm()) {
590  s_ImportPssm(*queries, opts_hndl, cmdline_args);
591  } else {
592  s_ImportQueries(*queries, opts_hndl, cmdline_args);
593  }
594  // Set the range restriction for the query, if applicable
595  const TSeqRange query_range = strategy.GetQueryRange();
596  if (query_range != TSeqRange::GetEmpty()) {
597  cmdline_args->GetQueryOptionsArgs()->SetRange(query_range);
598  }
599  }
600 
601  if ( CPsiBlastAppArgs* psi_args = dynamic_cast<CPsiBlastAppArgs*>(cmdline_args) )
602  {
603  psi_args->SetNumberOfIterations(strategy.GetPsiNumOfIterations());
604  }
605 }
606 
607 bool
608 RecoverSearchStrategy(const CArgs& args, blast::CBlastAppArgs* cmdline_args)
609 {
610  CNcbiIstream* in = cmdline_args->GetImportSearchStrategyStream(args);
611  if ( !in ) {
612  return false;
613  }
614  const bool is_remote_search =
615  (args.Exist(kArgRemote) && args[kArgRemote].HasValue() && args[kArgRemote].AsBoolean());
616  const bool override_query = (args[kArgQuery].HasValue() &&
617  args[kArgQuery].AsString() != kDfltArgQuery);
618  const bool override_subject = CBlastDatabaseArgs::HasBeenSet(args);
619 
620  if (CMbIndexArgs::HasBeenSet(args)) {
621  if (args[kArgUseIndex].AsBoolean() != kDfltArgUseIndex)
622  ERR_POST(Warning << "Overriding megablast BLAST DB indexed options in saved strategy");
623  }
624 
625  s_ImportSearchStrategy(in, cmdline_args, is_remote_search, override_query,
626  override_subject);
627 
628  return true;
629 }
630 
631 // Process search strategies
632 // FIXME: save program options,
633 // Save task if provided, no other options (only those in the cmd line) should
634 // be saved
635 void
637  blast::CBlastAppArgs* cmdline_args,
641  /* = CRef<objects::CPssmWithParameters>() */,
642  unsigned int num_iters /* =0 */)
643 {
644  CNcbiOstream* out = cmdline_args->GetExportSearchStrategyStream(args);
645  if ( !out ) {
646  return;
647  }
648 
649  s_ExportSearchStrategy(out, queries, opts_hndl,
650  cmdline_args->GetBlastDatabaseArgs(),
651  pssm, num_iters);
652 }
653 
654 /// Extracts the subject sequence IDs and ranges from the BLAST results
655 /// @note if this ever needs to be refactored for popular developer
656 /// consumption, this function should operate on CSeq_align_set as opposed to
657 /// blast::CSearchResultSet
658 static void
659 s_ExtractSeqidsAndRanges(const blast::CSearchResultSet& results,
660  CScope::TIds& ids, vector<TSeqRange>& ranges)
661 {
662  static const CSeq_align::TDim kQueryRow = 0;
663  static const CSeq_align::TDim kSubjRow = 1;
664  ids.clear();
665  ranges.clear();
666 
667  typedef map< CConstRef<CSeq_id>,
668  vector<TSeqRange>,
670  > TSeqIdRanges;
671  TSeqIdRanges id_ranges;
672 
673  ITERATE(blast::CSearchResultSet, result, results) {
674  if ( !(*result)->HasAlignments() ) {
675  continue;
676  }
677  ITERATE(CSeq_align_set::Tdata, aln, (*result)->GetSeqAlign()->Get()) {
678  CConstRef<CSeq_id> subj(&(*aln)->GetSeq_id(kSubjRow));
679  TSeqRange subj_range((*aln)->GetSeqRange(kSubjRow));
680  if ((*aln)->GetSeqStrand(kQueryRow) == eNa_strand_minus &&
681  (*aln)->GetSeqStrand(kSubjRow) == eNa_strand_plus) {
682  TSeqRange r(subj_range);
683  // flag the range as needed to be flipped once the sequence
684  // length is known
685  subj_range.SetFrom(r.GetToOpen());
686  subj_range.SetToOpen(r.GetFrom());
687  }
688  id_ranges[subj].push_back(subj_range);
689  }
690  }
691 
692  ITERATE(TSeqIdRanges, itr, id_ranges) {
693  ITERATE(vector<TSeqRange>, range, itr->second) {
694  ids.push_back(CSeq_id_Handle::GetHandle(*itr->first));
695  ranges.push_back(*range);
696  }
697  }
698  _ASSERT(ids.size() == ranges.size());
699 }
700 
701 /// Returns true if the remote BLAST DB data loader is being used
702 static bool
704 {
707  ITERATE(CObjectManager::TRegisteredNames, name, data_loaders) {
708  if (NStr::StartsWith(*name, objects::CRemoteBlastDbDataLoader::kNamePrefix)) {
709  return true;
710  }
711  }
712  return false;
713 }
714 
715 static bool
717 {
718  if ((format_type == CFormattingArgs::eAsnText) ||
719  (format_type == CFormattingArgs::eAsnBinary) ||
720  (format_type == CFormattingArgs::eArchiveFormat)||
721  (format_type == CFormattingArgs::eJsonSeqalign)) {
722  return false;
723  }
724  return true;
725 }
726 
727 static bool
728 s_PreFetchSeqs(const blast::CSearchResultSet& results,
730 {
731  {
732  char * pre_fetch_limit_str = getenv("PRE_FETCH_SEQS_LIMIT");
733  if (pre_fetch_limit_str) {
734  int pre_fetch_limit = NStr::StringToInt(pre_fetch_limit_str);
735  if(pre_fetch_limit == 0) {
736  return false;
737  }
738  if(pre_fetch_limit == INT_MAX){
739  return true;
740  }
741  int num_of_seqs = 0;
742  for(unsigned int i=0; i < results.GetNumResults(); i++) {
743  if(results[i].HasAlignments()) {
744  num_of_seqs += results[i].GetSeqAlign()->Size();
745  }
746  }
747  if(num_of_seqs > pre_fetch_limit) {
748  return false;
749  }
750  }
751  }
752 
753  return s_IsPrefetchFormat(format_type);
754 }
755 
756 void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet& results,
757  CRef<CScope> scope,
759 {
760  _ASSERT(scope.NotEmpty());
761  if (results.size() == 0) {
762  return;
763  }
764  if(!s_PreFetchSeqs(results, format_type)){
765  return;
766  }
767  try {
768  CScope::TIds ids;
769  vector<TSeqRange> ranges;
770  s_ExtractSeqidsAndRanges(results, ids, ranges);
771  _TRACE("Prefetching " << ids.size() << " sequence lengths");
772  LoadSequencesToScope(ids, ranges, scope);
773  } catch (CException& e) {
775  ERR_POST(Warning << "Error fetching sequence data from BLAST databases at NCBI, "
776  "please try again later");
777  }
778  else {
779  ERR_POST(Warning << "Error pre-fetching sequence data");
780  }
781  }
782 
783 }
784 
785 /// Auxiliary function to extract the ancillary data from the PSSM.
788 {
789  _ASSERT(pssm.CanGetPssm());
790  pair<double, double> lambda, k, h;
791  lambda.first = pssm.GetPssm().GetLambdaUngapped();
792  lambda.second = pssm.GetPssm().GetLambda();
793  k.first = pssm.GetPssm().GetKappaUngapped();
794  k.second = pssm.GetPssm().GetKappa();
795  h.first = pssm.GetPssm().GetHUngapped();
796  h.second = pssm.GetPssm().GetH();
798  true));
799 }
800 
801 void
802 CheckForFreqRatioFile(const string& rps_dbname, CRef<CBlastOptionsHandle> & opt_handle, bool isRpsblast)
803 {
804  bool use_cbs = (opt_handle->GetOptions().GetCompositionBasedStats() == eNoCompositionBasedStats) ? false : true;
805  if(use_cbs) {
806  vector<string> db;
807  NStr::Split(rps_dbname, " ", db);
808  list<string> failed_db;
809  for (unsigned int i=0; i < db.size(); i++) {
810  string path;
811  try {
812  vector<string> dbpath;
814  path = *dbpath.begin();
815  } catch (const CSeqDBException& e) {
816  NCBI_RETHROW(e, CBlastException, eRpsInit,
817  "Cannot retrieve path to RPS database");
818  }
819 
820  CFile f(path + ".freq");
821  if(!f.Exists()) {
822  failed_db.push_back(db[i]);
823  }
824 
825  }
826  if(!failed_db.empty()) {
828  string all_failed = NStr::Join(failed_db, ", ");
829  string prog_str = isRpsblast ? "RPSBLAST": "DELTABLAST";
830  string msg = all_failed + " contain(s) no freq ratios " \
831  + "needed for composition-based statistics.\n" \
832  + prog_str + " will be run without composition-based statistics.";
833  ERR_POST(Warning << msg);
834  }
835 
836  }
837  return;
838 }
839 
840 bool
842 {
843 #ifdef NCBI_OS_MSWIN
844  char c;
845  in.setf(ios::skipws);
846  if (!(in >> c))
847  return true;
848  in.unget();
849  return false;
850 #else
851  char c;
852  CNcbiStreampos orig_p = in.tellg();
853  // Piped input
854  if(orig_p < 0)
855  return false;
856 
857  IOS_BASE::iostate orig_state = in.rdstate();
858  IOS_BASE::fmtflags orig_flags = in.setf(ios::skipws);
859 
860  if(! (in >> c))
861  return true;
862 
863  in.seekg(orig_p);
864  in.flags(orig_flags);
865  in.clear();
866  in.setstate(orig_state);
867 
868  return false;
869 #endif
870 }
871 
872 string
874 {
875  string cmd = kEmptyStr;
876  for(unsigned int i=0; i < a.Size(); i++) {
877  cmd += a[i] + " ";
878  }
879  return cmd;
880 }
881 
882 bool
883 UseXInclude(const CFormattingArgs & f, const string & s)
884 {
885  CFormattingArgs::EOutputFormat fmt = f.GetFormattedOutputChoice();
886  if((fmt == CFormattingArgs::eXml2) || (fmt == CFormattingArgs::eJson)) {
887  if (s == "-"){
888  string f_str = (fmt == CFormattingArgs::eXml2) ? "14.": "13.";
889  NCBI_THROW(CInputException, eEmptyUserInput,
890  "Please provide a file name for outfmt " + f_str);
891  }
892  return true;
893  }
894  return false;
895 }
896 
897 string
898 GetSubjectFile(const CArgs& args)
899 {
900  string filename="";
901 
902  if (args.Exist(kArgSubject) && args[kArgSubject].HasValue())
903  filename = args[kArgSubject].AsString();
904 
905  return filename;
906 }
907 
908 void PrintErrorArchive(const CArgs & a, const list<CRef<CBlast4_error> > & msg)
909 {
910  try {
913 
914  CBlast4_request & req = archive->SetRequest();
915  CBlast4_get_request_info_request & info= req.SetBody().SetGet_request_info();
916  info.SetRequest_id("Error");
917  CBlast4_get_search_results_reply & results = archive->SetResults();
918  // Pacify unused varaible warning, the set above is used to populate mandatory field
919  (void) results;
920  archive->SetMessages() = msg;
921  CBlastFormat::PrintArchive(archive, a[kArgOutput].AsOutputFile());
922  }
923  } catch (...) {}
924 }
925 
927 {
928 #if defined(NCBI_OS_LINUX) && HAVE_MALLOC_H
929  malloc_trim(0);
930 #endif
931  return;
932 
933 }
934 
935 void LogQueryInfo(CBlastUsageReport & report, const CBlastInput & q_info)
936 {
939 }
940 
941 
942 void LogBlastOptions(blast::CBlastUsageReport & report, const CBlastOptions & opt)
943 {
944  EBlastProgramType prog_type = opt.GetProgramType();
945  report.AddParam(CBlastUsageReport::eProgram, Blast_ProgramNameFromType(prog_type));
947  report.AddParam(CBlastUsageReport::eHitListSize, opt.GetHitlistSize());
948  if (!Blast_ProgramIsNucleotide(prog_type)) {
950  }
951 }
952 
953 void LogCmdOptions(blast::CBlastUsageReport & report, const CBlastAppArgs & args)
954 {
955  if (args.GetBlastDatabaseArgs().NotEmpty() &&
958 
960  string db_name = db->GetDBNameList();
961  int off = db_name.find_last_of(CFile::GetPathSeparator());
962  if (off != -1) {
963  db_name.erase(0, off+1);
964  }
965  report.AddParam(CBlastUsageReport::eDBName, db_name);
966  report.AddParam(CBlastUsageReport::eDBLength, (Int8) db->GetTotalLength());
967  report.AddParam(CBlastUsageReport::eDBNumSeqs, db->GetNumSeqs());
968  report.AddParam(CBlastUsageReport::eDBDate, db->GetDate());
969  }
970 
971  if(args.GetFormattingArgs().NotEmpty()){
973  }
974 }
975 
976 int GetMTByQueriesBatchSize(EProgram p, int num_threads, const string & task)
977 {
978  int batch_size = 0;
979 
980  char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
981  if (mt_query_batch_env) {
982  batch_size = NStr::StringToInt(mt_query_batch_env);
983  }
984  else {
985  batch_size = GetQueryBatchSize(p);
986  }
987  if (task == "blastx-fast")
988  { // Set batch_size to 20004
989  batch_size *= 2;
990  }
991  return batch_size;
992 }
993 
994 void MTByQueries_DBSize_Warning(const Int8 length_limit, bool is_db_protein)
995 {
996  string warn = "This database is probably too large to benefit from -mt_mode=1. " \
997  "We suggest using -mt_mode=1 only if the database is less than " \
998  + NStr::Int8ToString(length_limit, NStr::fWithCommas);
999  if (is_db_protein) {
1000  warn += + " residues ";
1001  }
1002  else {
1003  warn += " bases ";
1004  }
1005  ERR_POST(Warning << warn + "or if the search is limited by an option such as -taxids, -taxidlist or -gilist.");
1006  return;
1007 }
1008 
1010 {
1011  string warning = "This set of queries is too small to fully benefit from the -mt_mode=1 option. " \
1012  "The total number of letters should be at least ";
1013  warning += NStr::IntToString(batch_size);
1015  if (Blast_QueryIsProtein(p)) {
1016  warning += " residues";
1017  }
1018  else {
1019  warning += " bases";
1020  }
1021  warning += " per thread, and there should be at least one query of this length per thread.";
1022  ERR_POST(Warning << warning);
1023 }
1024 
User-defined methods of the data storage class.
Produce formatted blast output for command line applications.
Data loader implementation that uses the blast databases.
Data loader implementation that uses the blast databases remotely.
void LogCmdOptions(blast::CBlastUsageReport &report, const CBlastAppArgs &args)
USING_SCOPE(objects)
void CheckForFreqRatioFile(const string &rps_dbname, CRef< CBlastOptionsHandle > &opt_handle, bool isRpsblast)
static TSeqLocVector s_ConvertBioseqs2TSeqLocVector(const CBlast4_subject::TSequences &subjects)
Converts a list of Bioseqs into a TSeqLocVector.
CRef< blast::CRemoteBlast > InitializeRemoteBlast(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
Initializes a CRemoteBlast instance for usage by command line BLAST binaries.
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration(bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
Initialize the data loader configuration for the query.
void SaveSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Save the search strategy corresponding to the current command line search.
void QueryBatchCleanup()
Clean up formatter scope and release.
static bool s_IsPrefetchFormat(blast::CFormattingArgs::EOutputFormat format_type)
string RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Register the BLAST database data loader using the already initialized CSeqDB object.
static void s_ImportQueries(const CBlast4_queries &queries, CRef< blast::CBlastOptionsHandle > opts_hndl, blast::CBlastAppArgs *cmdline_args)
Import queries into command line arguments object.
CRef< CBlastAncillaryData > ExtractPssmAncillaryData(const CPssmWithParameters &pssm)
Auxiliary function to extract the ancillary data from the PSSM.
string GetSubjectFile(const CArgs &args)
Get name of subject file @parameter args arguments class [in].
bool RecoverSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args)
Recover search strategy from input file.
int GetMTByQueriesBatchSize(EProgram p, int num_threads, const string &task)
void MTByQueries_DBSize_Warning(const Int8 length_limit, bool is_db_protein)
static void s_ExportSearchStrategy(CNcbiOstream *out, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > options_handle, CRef< blast::CBlastDatabaseArgs > db_args, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Real implementation of search strategy extraction.
void CheckMTByQueries_QuerySize(EProgram prog, int batch_size)
static bool s_IsUsingRemoteBlastDbDataLoader()
Returns true if the remote BLAST DB data loader is being used.
void PrintErrorArchive(const CArgs &a, const list< CRef< CBlast4_error > > &msg)
Function to print blast archive with only error messages (search failed) to output stream.
static void s_ImportPssm(const CBlast4_queries &queries, CRef< blast::CBlastOptionsHandle > opts_hndl, blast::CBlastAppArgs *cmdline_args)
Import PSSM into the command line arguments object.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
string GetCmdlineArgs(const CNcbiArguments &a)
static CRef< blast::CBlastDatabaseArgs > s_ImportSubjects(const CBlast4_subject &subj, bool subject_is_protein)
Import the subject sequences into a CBlastDatabaseArgs object.
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
void LogBlastOptions(blast::CBlastUsageReport &report, const CBlastOptions &opt)
static void s_ExtractSeqidsAndRanges(const blast::CSearchResultSet &results, CScope::TIds &ids, vector< TSeqRange > &ranges)
Extracts the subject sequence IDs and ranges from the BLAST results.
static void s_ImportSearchStrategy(CNcbiIstream *in, blast::CBlastAppArgs *cmdline_args, bool is_remote_search, bool override_query, bool override_subject)
Imports search strategy, using CImportStrategy.
bool UseXInclude(const CFormattingArgs &f, const string &s)
static CRef< blast::CBlastDatabaseArgs > s_ImportDatabase(const CBlast4_subject &subj, CBlastOptionsBuilder &opts_builder, bool subject_is_protein, bool is_remote_search)
Import the database and return it in a CBlastDatabaseArgs object.
static CRef< blast::CExportStrategy > s_InitializeExportStrategy(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, const string &client_id, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
bool IsIStreamEmpty(CNcbiIstream &in)
static bool s_PreFetchSeqs(const blast::CSearchResultSet &results, blast::CFormattingArgs::EOutputFormat format_type)
void LogQueryInfo(CBlastUsageReport &report, const CBlastInput &q_info)
Utility functions for BLAST command line applications.
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
@ eSoftSubjMasking
Definition: blast_def.h:237
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Interface for converting sources of sequence data into blast sequence input.
int GetQueryBatchSize(EProgram program, bool is_ungapped=false, bool remote=false, bool use_default=true, string task="", bool mt_mode=false)
Retrieve the appropriate batch size for the specified task.
Declares the CBlastOptionsBuilder class.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Definition: blast_program.c:50
Boolean Blast_ProgramIsNucleotide(EBlastProgramType p)
Definition: blast_program.c:82
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
Definition: blast_util.c:312
CArgs –.
Definition: ncbiargs.hpp:379
const Int4 k_MinBatchSize
Int4 GetBatchSize(Int4 hits=-1)
const double k_MixIn
const Int4 k_MaxBatchSize
CBioseq_Handle –.
CBlast4_archive –.
CBlast4_get_request_info_request –.
CBlast4_get_search_results_reply –.
CBlast4_request –.
CBlast4_subject –.
Class used to return ancillary data from a blast search, i.e.
Base command line argument class for a generic BLAST command line binary.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:889
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the database/subject sequence has been set.
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:936
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:893
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
Definition: blast_args.hpp:946
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
Definition: blast_args.hpp:939
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Definition: bdbloader.cpp:164
Defines BLAST error codes (user errors included)
static void PrintArchive(CRef< objects::CBlast4_archive > archive, CNcbiOstream &out)
Auxiliary function to print the BLAST Archive in multiple output formats.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Int8 GetNumSeqsProcessed() const
Int8 GetTotalLengthProcessed() const
Class to build CBlastOptionsHandle from blast4 ASN objects.
Encapsulates ALL the BLAST algorithm's options.
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
void AddDataLoaders(CRef< objects::CScope > scope)
Add the data loader configured in the object to the provided scope.
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
void AddParam(EUsageParams p, int val)
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
CFile –.
Definition: ncbifile.hpp:1604
Argument class to collect formatting options, use this to create a CBlastFormat object.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
EOutputFormat
Defines the output formats supported by our command line formatter.
@ eJsonSeqalign
JSON seq-align.
@ eJson
JSON XInclude.
@ eXml2
XML2 XInclude.
@ eAsnText
ASN.1 text output.
@ eArchiveFormat
BLAST archive format.
@ eAsnBinary
ASN.1 binary output.
Class to return parts of the CBlast4_request, or data associated with a CBlast4_request,...
Defines user input exceptions.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the megablast database indexing options have been set.
CNcbiArguments –.
Definition: ncbienv.hpp:236
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle command line arguments for psiblast binary Programs supported: psiblast, phi-blastn,...
void SetInputPssm(CRef< objects::CPssmWithParameters > pssm)
Set the PSSM from the saved search strategy.
double GetH() const
Definition: Pssm.cpp:120
double GetKappa() const
Definition: Pssm.cpp:111
double GetKappaUngapped() const
Definition: Pssm.cpp:138
double GetLambdaUngapped() const
Definition: Pssm.cpp:129
double GetHUngapped() const
Definition: Pssm.cpp:147
double GetLambda() const
Definition: Pssm.cpp:102
API for Remote Blast Requests.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
CSeqDBException.
Definition: seqdbcommon.hpp:73
CSeqDBGiList.
void AddTaxIds(const set< TTaxId > &tax_ids)
void AddGi(TGi gi)
Add a new GI to the list.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
Definition: seqdb.cpp:1040
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
Definition: seqdb.cpp:685
const string & GetDBNameList() const
Get list of database names.
Definition: seqdb.cpp:760
@ eProtein
Definition: seqdb.hpp:174
int GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdb.cpp:670
string GetDate() const
Returns the construction date of the database.
Definition: seqdb.cpp:635
Root class for all serialization exceptions.
Definition: exception.hpp:50
Handles command line arguments for Tblastn binary.
void SetInputPssm(CRef< objects::CPssmWithParameters > pssm)
Set the PSSM from the saved search strategy.
CTmpFile –.
Definition: ncbifile.hpp:2352
Definition: map.hpp:338
const string kArgOutput
Output file name.
const string kArgRemote
Argument to determine whether searches should be run locally or remotely.
const string kArgUseIndex
Flag to force using or not using megablast database index.
const bool kDfltArgUseIndex
Default value for megablast database index flag.
const string kDfltArgQuery
Default value for query sequence input.
const string kArgQuery
Query sequence(s)
const string kArgSubject
Subject input file to search.
@ eNoCompositionBasedStats
Don't use composition based statistics.
struct config config
std::ofstream out("events_result.xml")
main entry point for tests
const string kArgOutputFormat
Argument to select formatted output type.
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
#define true
Definition: bool.h:35
EOutputFormat
Definition: grid_cli.hpp:276
void SetEntrezQueryLimitation(const string &entrez_query)
Mutator for the entrez query.
string GetDatabaseName() const
Accessor for the database name.
void SetCompositionBasedStats(ECompoAdjustModes mode)
double GetEvalueThreshold() const
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
int GetDbFilteringAlgorithmId()
Get the database filtering algorithm ID.
bool HaveNegativeTaxidList()
Check whether a negative tax id list is specified.
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
void SetVerbose(EDebugMode verb=eDebug)
Adjust the debugging level.
list< TGi > GetNegativeGiList()
Get the negative GI list.
ESubjectMaskingType GetSubjectMaskingType()
int GetHitlistSize() const
bool HaveGiList()
Check whether a GI list is specified.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetClientId(const string &client_id)
Sets the client ID used by this object to send requests.
list< TTaxId > GetTaxidList()
Get the Tax list.
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
Definition: blast_aux.cpp:709
bool HaveEntrezQuery()
Check whether an Entrez query is specified.
list< TTaxId > GetNegativeTaxidList()
Get the negative tax id list.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
CRef< objects::CBlast4_request > ExtractBlast4Request(CNcbiIstream &in)
Extract a Blast4-request (a.k.a.
bool HasDbFilteringAlgorithmKey()
Check whether a database filtering algorithm key is specified.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
list< TGi > GetGiList()
Get the GI list.
bool HasSubjectMaskingType()
Get Subject Maksing Type (soft/hard)
ECompoAdjustModes GetCompositionBasedStats() const
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
Definition: blast_aux.cpp:813
void LoadSequencesToScope(objects::CScope::TIds &ids, vector< TSeqRange > &ranges, CRef< objects::CScope > &scope)
This method retrieve sequence data in bulk to scope @ids seq id list [in] @ranges seq range list [in]...
bool HaveNegativeGiList()
Check whether a negative GI list is specified.
string GetEntrezQuery()
Get the Entrez query.
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
string GetDbFilteringAlgorithmKey()
Get the database filtering algorithm key.
bool HasDbFilteringAlgorithmId()
Check whether a database filtering algorithm ID is specified.
EMoleculeType
Molecule of the BLAST database.
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eNotSupported
Feature not supported.
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
bool Exist(const string &name) const
Check existence of argument description.
Definition: ncbiargs.cpp:1813
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)
Generic macro to re-throw an exception.
Definition: ncbiexpt.hpp:737
CNcbiOstream & AsOutputFile(EIfExists if_exists, IOS_BASE::openmode mode=IOS_BASE::out)
Definition: ncbifile.cpp:5455
const string & GetFileName(void) const
Return used file name (generated or given in the constructor).
Definition: ncbifile.cpp:5429
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
Definition: ncbifile.cpp:433
@ eIfExists_Throw
You can make call of AsInputFile/AsOutputFile only once, on each following call throws CFileException...
Definition: ncbifile.hpp:2377
@ eNoRemove
Do not remove file.
Definition: ncbifile.hpp:2357
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void GetRegisteredNames(TRegisteredNames &names)
Get names of all registered data loaders.
vector< string > TRegisteredNames
vector< CSeq_id_Handle > TIds
Definition: scope.hpp:143
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
TThisType & SetToOpen(position_type toOpen)
Definition: range.hpp:175
static TThisType GetEmpty(void)
Definition: range.hpp:306
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::streampos CNcbiStreampos
Portable alias for streampos.
Definition: ncbistre.hpp:134
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5159
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
strategy
Block allocation strategies.
Definition: bmconst.h:146
const TSequences & GetSequences(void) const
Get the variant data.
bool IsSeq_loc_list(void) const
Check if variant Seq_loc_list is selected.
bool IsDatabase(void) const
Check if variant Database is selected.
void SetResults(TResults &value)
Assign a value to Results data member.
const TDatabase & GetDatabase(void) const
Get the variant data.
void SetRequest(TRequest &value)
Assign a value to Request data member.
bool IsPssm(void) const
Check if variant Pssm is selected.
void SetBody(TBody &value)
Assign a value to Body data member.
bool IsSequences(void) const
Check if variant Sequences is selected.
list< CRef< CSeq_loc > > TSeq_loc_list
const TBioseq_set & GetBioseq_set(void) const
Get the variant data.
bool IsBioseq_set(void) const
Check if variant Bioseq_set is selected.
const TSeq_loc_list & GetSeq_loc_list(void) const
Get the variant data.
TMessages & SetMessages(void)
Assign a value to Messages data member.
const TPssm & GetPssm(void) const
Get the variant data.
list< CRef< CBioseq > > TSequences
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
bool CanGetPssm(void) const
Check if it is safe to call GetPssm method.
const TPssm & GetPssm(void) const
Get the Pssm member data.
list< CRef< CSeq_align > > Tdata
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
int i
static MDB_envinfo info
Definition: mdb_load.c:37
static char * prog
Definition: mdb_load.c:33
range(_Ty, _Ty) -> range< _Ty >
unsigned int a
Definition: ncbi_localip.c:102
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Main argument class for PSI-BLAST application.
Declares the CRemoteBlast class.
Declares the CImportStrategy and CExportStrategy.
CRef< objects::CObjectManager > om
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Configuration structure for the CBlastScopeSource.
bool m_UseGenbank
Use the Genbank data loader.
string m_BlastDbName
Name of the BLAST database to use (non-empty if m_UseBlastDbs is true)
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
bool m_UseBlastDbs
Use the BLAST database data loaders.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
Main argument class for TBLASTN application.
#define _ASSERT
else result
Definition: token2.c:20
Modified on Wed Apr 17 13:10:33 2024 by modify_doxy.py rev. 669887