NCBI C++ ToolKit
blast_seq_tool_job.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_seq_tool_job.cpp 46083 2021-01-21 19:55:02Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
33 #include <corelib/ncbiexec.hpp>
34 
36 
38 
39 #include <gui/objutils/utils.hpp>
40 #include <gui/objutils/label.hpp>
41 
46 
47 #include <objmgr/feat_ci.hpp>
48 #include <objmgr/util/sequence.hpp>
49 
52 
53 #include <wx/filename.h>
54 
57 USING_SCOPE(blast);
58 
59 ///////////////////////////////////////////////////////////////////////////////
60 /// CBLASTSeqToolJob
61 
62 // utility function for translating between plugin arg loc-lists
63 // and BLAST loc vectors
64 static void x_ToBlastVec(const TConstScopedObjects& locs, bool mask_lc_regions,
65  bool mask_rep_feats, TSeqLocVector& vec)
66 {
67  vec.reserve(locs.size());
68 
69  ITERATE(TConstScopedObjects, it, locs) {
70  CObject* obj = const_cast<CObject*>(it->object.GetPointer());
71 
72  SSeqLoc loc;
73  loc.seqloc = dynamic_cast<CSeq_loc*>(obj);
74  loc.scope = it->scope;
75 
76  if (mask_lc_regions || mask_rep_feats) {
78 
79  CRef<CSeq_loc> mask_loc(new CSeq_loc);
80  if (mask_lc_regions) {
82  CFeat_CI feat_iter(*loc.scope, *loc.seqloc, sel);
83  for ( ; feat_iter; ++feat_iter) {
84  if (feat_iter->GetData().GetRegion()
85  == "lowercase in FASTA file") {
86  mask_loc->Add(feat_iter->GetLocation());
87  }
88  }
89  }
90  if (mask_rep_feats) {
92  CFeat_CI feat_iter(*loc.scope, *loc.seqloc, sel);
93  for ( ; feat_iter; ++feat_iter) {
94  mask_loc->Add(feat_iter->GetLocation());
95  }
96  }
97  if (mask_loc->Which() != CSeq_loc::e_not_set) {
98  mask_loc->ResetStrand();
99  loc.mask = mask_loc;
100  }
101  }
102  vec.push_back(loc);
103  }
104 }
105 
106 
107 /// static callback for BLAST interruption
108 /// this must match the API in CBl2Seq and BLAST
110 {
111  if( prog && prog->user_data ){
112  CBLASTSeqToolJob* job =
113  reinterpret_cast<CBLASTSeqToolJob*>(prog->user_data)
114  ;
115  if( job->IsCanceled() ){
116  return TRUE;
117  }
118  }
119  return FALSE;
120 }
121 
123  #if defined(NCBI_OS_UNIX)
124  (pid_t)-1
125  #elif defined(NCBI_OS_MSWIN)
127  #else
128  -1
129  #endif
130 ;
131 
133  const CBLASTParams& params
134 )
135  : m_Params( params )
136  , m_ProcHandle( kInvalidProcHandle )
137 {
138  m_Descr = "BLAST Sequences"; // TODO
139 }
140 
142 {
143  x_SetStatusText("Preparing input sequences for BLAST ...");
144 
145  // get locations from parameters and convert them to TSeqLocVector
148 
149  string localDB;
151 
153  if (subjectType == CBLASTParams::eSequences) {
154  if (s_locs.empty())
155  return;
156  }
157  else if (subjectType == CBLASTParams::eLocalDB) {
158  bool nuc = m_Params.IsSubjNucInput();
159  if (nuc)
160  localDB = m_Params.GetLocalNucDB();
161  else
162  localDB = m_Params.GetLocalProtDB();
163  if (localDB.empty())
164  return;
165 
167  loader = CLBLASTObjectLoader::CreateLoader(localDB, !nuc);
168  }
169 
170  // compose the query and target sequences into BLAST forms
171  TSeqLocVector query_vec, subject_vec;
172 
173  const CBLASTParams::SProgParams& prog_params = m_Params.GetCurrParams();
174  bool mask_lc = prog_params.m_MaskLowercase;
175  bool mask_rp = prog_params.m_MaskRepeats;
176 
177  x_ToBlastVec(q_locs, mask_lc, mask_rp, query_vec);
178  if (subjectType == CBLASTParams::eSequences)
179  x_ToBlastVec(s_locs, mask_lc, mask_rp, subject_vec);
180 
181  vector<string> arg_vec;
183 
184  if( IsCanceled() ){
185  return;
186  }
187 
188  unique_ptr<CAutoEnvironmentVariable> p_wm_path;
189  if( m_Params.IsNucInput() && prog_params.m_WM_TaxId != 0 ){
190  const string& wm_dir = m_Params.GetWmDir();
191  if( !wm_dir.empty() ){
192  p_wm_path.reset(new CAutoEnvironmentVariable( "WINDOW_MASKER_PATH", wm_dir.c_str() ));
193  blast::WindowMaskerPathInit( wm_dir );
194  }
195  else {
196  ERR_POST(Error << "Directory for WINDOW_MASKER files is not defined wm_taxid=" << prog_params.m_WM_TaxId);
197  }
198  }
199 
200  if (subjectType == CBLASTParams::eLocalDB || m_Params.IsStandaloneRequested()){
201  // run standalone BLAST
202 
203  //- determine proper executable (outside of "try")
204  string prog_name = arg_vec.front();
205  #ifdef NCBI_OS_MSWIN
206  prog_name += ".exe";
207  #endif
208  arg_vec.erase( arg_vec.begin() );
209 
210  vector< CRef<CSeq_annot> > res_annots;
211  CRef<CMappingRanges> ranges( new CMappingRanges() );
213  TSeqTypes seq_types;
214 
215  string subjects_fname = CDirEntry::GetTmpName();
216  string queries_fname = subjects_fname + "-q";
217  string output_fname = subjects_fname + "-out";
218  subjects_fname += "-s";
219  try {
220 
221  //- export subjects as one FASTA file
222 
223  if (!s_locs.empty()) {
224  CNcbiOfstream sub_ostr(subjects_fname.c_str());
225  CFastaOstream fasta_sub_ostr(sub_ostr);
226 
227  int si = 0;
228  ITERATE(TSeqLocVector, locit, subject_vec){
229  const SSeqLoc& sloc = *locit;
230  const CSeq_loc* loc = sloc.seqloc;
231  if (loc == NULL){
232  continue;
233  }
234 
235  CScope* scope = const_cast<CScope*>(sloc.scope.GetPointer());
236 
237  si++;
238  CBioseq_Handle handle = scope->GetBioseqHandle(*loc->GetId());
240 
241  if (loc->IsWhole()){
242  fasta_sub_ostr.Write(handle);
243 
244  }
245  else {
246  fasta_sub_ostr.Write(handle, loc);
247  }
248 
249  string seq_name = "Subject_" + NStr::IntToString(si);
250  CRef<CSeq_id> seq_id(new CSeq_id(CSeq_id::e_Local, seq_name));
251  CSeq_id_Handle local_sihd = CSeq_id_Handle::GetHandle(*seq_id);
252  seq_types[local_sihd] = seqType;
253 
254  CSeq_id_Handle base_sihd = CSeq_id_Handle::GetHandle(*loc->GetId());
255  seq_types[base_sihd] = seqType;
256 
257  int trcf = handle.IsNucleotide() ? 1 : 3;
258 
259  // prepare for final mapping
260  ranges->AddConversion(
261  local_sihd,
262  0 * trcf, sequence::GetLength(*loc, scope) *trcf,
264  base_sihd,
265  sequence::GetStart(*loc, scope) *trcf,
266  sequence::GetStrand(*loc, scope)
267  );
268  }
269  sub_ostr.close();
270  }
271 
272  CNcbiOfstream que_ostr( queries_fname.c_str() );
273  CFastaOstream fasta_que_ostr( que_ostr );
274 
275  int oi = 0;
276  ITERATE( TSeqLocVector, locit, query_vec ){
277  const SSeqLoc& sloc = *locit;
278  const CSeq_loc* loc = sloc.seqloc;
279  if( loc == NULL ){
280  continue;
281  }
282 
283  CScope* scope = const_cast<CScope*>( sloc.scope.GetPointer() );
284 
285  oi++;
286  CBioseq_Handle handle = scope->GetBioseqHandle(*loc->GetId());
288 
289  if( loc->IsWhole() ){
290  fasta_que_ostr.Write( handle );
291 
292  } else {
293  fasta_que_ostr.Write( handle, loc );
294  }
295 
296  string seq_name = "Query_" + NStr::IntToString(oi);
297  CRef<CSeq_id> seq_id( new CSeq_id( CSeq_id::e_Local, seq_name ) );
298  CSeq_id_Handle local_sihd = CSeq_id_Handle::GetHandle( *seq_id );
299  seq_types[local_sihd] = seqType;
300 
301  CSeq_id_Handle base_sihd = CSeq_id_Handle::GetHandle( *loc->GetId() );
302  seq_types[base_sihd] = seqType;
303 
304  int trcf = handle.IsNucleotide() ? 1: 3;
305 
306  // prepare for final mapping
307  ranges->AddConversion(
308  local_sihd,
309  0 *trcf, sequence::GetLength( *loc, scope ) *trcf,
311  base_sihd,
312  sequence::GetStart( *loc, scope ) *trcf,
313  sequence::GetStrand( *loc, scope )
314  );
315  }
316  que_ostr.close();
317 
318 
319  wxString agent_path = CSysPath::GetStdPath();
320 
321  wxFileName fname( agent_path, wxEmptyString );
322  #if !defined( NCBI_OS_DARWIN ) || defined( _DEBUG )
323  fname.AppendDir(wxT("bin"));
324  #endif
325  fname.SetFullName( ToWxString( prog_name ) );
326 
327  agent_path = fname.GetFullPath();
328 
329  //- specify output format as text ASN.1
330  arg_vec.push_back( "-" + string("outfmt") );
331  arg_vec.push_back( "8" );
332 
333  //- specify subjects
334  if (subjectType == CBLASTParams::eLocalDB) {
335  arg_vec.push_back("-" + kArgDb);
336 
337  if (localDB.find(' ') != string::npos)
338  arg_vec.push_back('"' + localDB + '"');
339  else
340  arg_vec.push_back(localDB);
341  }
342  else {
343  arg_vec.push_back("-" + kArgSubject);
344  arg_vec.push_back(subjects_fname);
345  }
346 
347  //- specify queries
348  arg_vec.push_back( "-" + kArgQuery );
349  arg_vec.push_back( queries_fname );
350 
351  //- specify output
352  arg_vec.push_back( "-" + kArgOutput );
353  arg_vec.push_back( output_fname );
354 
355  if( IsCanceled() ){
356  return;
357  }
358 
359  string agent_args;
360  ITERATE( vector<string>, argit, arg_vec ){
361  agent_args += *argit + " ";
362  }
363 
364 
365  LOG_POST( Info << "We are about to start program on path '" << agent_path.ToUTF8() << "'" );
366  LOG_POST( Info << "with parameters '" << agent_args << "'" );
367  ERR_POST( Info << "Window masker path: " << blast::WindowMaskerPathGet() /*CNcbiApplication::Instance()->SetEnvironment().Get("WINDOW_MASKER_PATH")*/ );
368 
369  x_SetStatusText( string("Executing BLAST for: ") + agent_args );
370 
371 
372  // preparing argv for SpawnV
373  const char** arg_vec_ptr = new const char*[arg_vec.size() +2];
374  size_t xi = 0;
375  arg_vec_ptr[xi] = NULL; // it will be filled with cmdline inside SpawnV
376  while( xi < arg_vec.size() ){
377  const char* arg_ptr = arg_vec[xi].c_str();
378  arg_vec_ptr[++xi] = arg_ptr;
379  }
380  arg_vec_ptr[++xi] = NULL;
381 
382  CExec::CResult proc_result = CExec::SpawnV( CExec::eNoWait, agent_path.utf8_str(), arg_vec_ptr );
383  m_ProcHandle = proc_result.GetProcessHandle();
384 
385  delete[] arg_vec_ptr;
386 
387  CProcess::CExitInfo exit_info;
388  CProcess( m_ProcHandle ).Wait( kInfiniteTimeoutMs, &exit_info );
389 
390  CNcbiIfstream result_istr( output_fname.c_str() );
391 
392  try {
393  //- get output as ASN Seq-annot
394  while( !result_istr.eof() ){
395  CRef<CSeq_annot> next_annot( new CSeq_annot() );
396 
397  result_istr >> MSerial_AsnText >> *next_annot;
398 
399  res_annots.push_back( next_annot );
400  }
401  } catch( CEofException& ){
402  // it's ok
403 
404  } catch( CIOException& ioex ){
406  << "IO error on reading BLAST output: " << ioex.GetErrCodeString()
407  );
408  }
409 
410  int exitcode;
411  //EIO_Status status = ps.GetPipe().Close( &exitcode );
412  exitcode = exit_info.GetExitCode();
413 
414  static string exitres[] = {
415  "success",
416  "input error",
417  "database error",
418  "engine error",
419  "out of memory",
420  "network error"
421  };
422 
423 
424  ERR_POST(
425  Info << "Program " << prog_name << " completed with "
426  //<< "status " << IO_StatusStr(status) << " and "
427  << "exitcode " << exitcode
428  << " ("
429  << (exitcode < sizeof(exitres)/sizeof(string) ? exitres[exitcode] : "unknown error" )
430  << ")"
431  );
432 
433  } catch( CException& ex ){
434  ERR_POST( "Failed to run standalone program " << prog_name << ": " << ex.GetMsg() );
435  }
437 
438  try {
439  //CRef<CSeq_annot> annot( new CSeq_annot() );
440  //list< CRef<CSeq_align> >& aligns = annot->SetData().SetAlign();
441  list< CRef<CSeq_align> > aligns;
442 
443  ITERATE(vector< CRef<CSeq_annot> >, annit, res_annots){
444  CRef<CSeq_annot> next_annot( *annit );
445 
446  if( !next_annot.IsNull() && next_annot->IsAlign() ){
447  CSeq_loc_Mapper mapper(ranges);
448 
449  for (const auto &i : seq_types)
450  mapper.SetSeqTypeById(i.first, i.second);
451 
452  if (subjectType == CBLASTParams::eLocalDB) {
455 
456  CTypeConstIterator<CSeq_id> id_iter(*next_annot);
457  for (; id_iter; ++id_iter) {
459  if (seq_types.find(h) == seq_types.end())
460  mapper.SetSeqTypeById(h, seqType);
461  }
462  }
463 
464  const list< CRef<CSeq_align> > raw_aligns = next_annot->GetData().GetAlign();
465 
466  //- translate Queries_ and Subjects_ to proper ids
467  //- translate coords to proper offsets
468  ITERATE( list< CRef<CSeq_align> >, alnit, raw_aligns ){
469  CRef<CSeq_align> align = mapper.Map( **alnit );
470  aligns.push_back( align );
471  }
472  }
473  }
474 
475  if( !aligns.empty() ){
476  x_SetStatusText( "Processing BLAST results..." );
477 
478  //- add results to project
479 
480  list< CRef<CSeq_align_set> > align_sets;
481 
483 
484  FindCompartments( aligns, align_sets );
485  } else {
486  CRef<CSeq_align_set> al_set( new CSeq_align_set() );
487  al_set->Set() = aligns;
488  align_sets.push_back( al_set );
489  }
490 
491  NON_CONST_ITERATE( list< CRef<CSeq_align_set> >, as_itr, align_sets ){
492  // create Project items from the results
493  CRef<CSeq_annot> annot( new CSeq_annot() );
494  annot->SetData().SetAlign() = (*as_itr)->Set();
495 
496  string label;
497  CRef<CScope> scope = m_Params.GetSeqLocs().front().scope;
498  CLabel::GetLabel( *annot, &label, CLabel::eDefault, scope );
499 
500  label += " [" + EProgramToTaskName( m_Params.GetCurrProgram() ) + "]";
501 
502  CRef<CProjectItem> item( new CProjectItem() );
503  item->SetItem().SetAnnot( *annot );
504  item->SetLabel( label );
506  annot->SetTitleDesc( label );
507  annot->SetNameDesc( label );
508 
509  if (loader)
510  AddProjectItem(*item, *loader);
511  else
512  AddProjectItem(*item);
513  }
514 
515 # if _DEBUG
516  string dumpname = CDirEntry::GetTmpName() + "-standalone-inproject";
517  ERR_POST( Info << "Standalone in-project dumpname is: '" << dumpname << "'" );
518 
519  CNcbiOfstream dumpstream( dumpname.c_str() );
520  unique_ptr<CObjectOStream> dumpasn(CObjectOStream::Open(eSerial_AsnText, dumpstream));
521 
522  ITERATE( list< CRef<CSeq_align_set> >, av_iter, align_sets ){
523  CRef<objects::CSeq_align_set> aliset = *av_iter;
524 
525  *dumpasn << *aliset;
526  }
527 
528  dumpasn->Close();
529 # endif
530 
531  }
532  } catch( CException& ex ){
533  ERR_POST( "Failed to convert results of program " << prog_name << ": " << ex.GetMsg() );
534  }
535 
536  CDirEntry subject_entry( subjects_fname );
537  subject_entry.Remove();
538 
539  CDirEntry query_entry( queries_fname );
540  query_entry.Remove();
541 
542  CDirEntry output_entry( output_fname );
543  output_entry.Remove();
544 
545  } else {
546  // run internal BLAST
547 
548 # ifdef _DEBUG
549  blast_opts->GetOptions().DebugDumpText( cerr, "BlastOptions", 0 );
550  ERR_POST( Info << "Window masker path: " << blast::WindowMaskerPathGet() /*CNcbiApplication::Instance()->SetEnvironment().Get("WINDOW_MASKER_PATH")*/ );
551 # endif
552 
553  CBl2Seq blaster( query_vec, subject_vec, *blast_opts );
554  blaster.SetInterruptCallback( s_BlastInterrupt, this );
555 
556  if( IsCanceled() ){
557  return;
558  }
559  x_SetStatusText( "Aligning sequences..." );
560 
561  TSeqAlignVector aligns = blaster.Run();
562 
563 
564 # if _DEBUG
565  string dumpname = CDirEntry::GetTmpName() + "-allapart";
566  ERR_POST( Info << "All-apart dumpname is: '" << dumpname << "'" );
567 
568  CNcbiOfstream dumpstream( dumpname.c_str() );
569  unique_ptr<CObjectOStream> dumpasn(CObjectOStream::Open(eSerial_AsnText, dumpstream));
570 
571  ITERATE( TSeqAlignVector, av_iter, aligns ){
572  CRef<objects::CSeq_align_set> aliset = *av_iter;
573 
574  *dumpasn << *aliset;
575  }
576 
577  dumpasn->Close();
578 # endif
579 
580  if( !IsCanceled() && !aligns.empty() ){
581  x_SetStatusText( "Processing BLAST results..." );
582 
583  x_CreateProjectItems( aligns );
584  }
585  }
586 }
587 
589 {
592  }
593 
595 }
596 
597 
599 {
600  CRef<CSeq_annot> annot( new CSeq_annot() );
601  list< CRef< CSeq_align > >& align_list = annot->SetData().SetAlign();
602 
603  NON_CONST_ITERATE( TSeqAlignVector, it, aligns ){
604  CRef<CSeq_align_set> results = *it;
605 
606  ITERATE (CSeq_align_set::Tdata, align_it, results->Get()) {
607  CRef<CSeq_align> align = *align_it;
608  if (align->GetSegs().IsDisc()) {
610  const list<CRef<CSeq_align> >& disc = align->GetSegs().GetDisc().Get();
611 
612  sas->Set().insert(sas->Set().end(), disc.begin(), disc.end());
613  align_list.insert(align_list.end(), sas->Get().begin(), sas->Get().end());
614  } else {
615  align_list.push_back(align);
616  }
617  }
618  }
619 
620  if( !align_list.empty() ){
621  list< CRef<CSeq_align_set> > align_sets;
622 
624 
625  FindCompartments( align_list, align_sets );
626  } else {
627  CRef<CSeq_align_set> al_set( new CSeq_align_set() );
628  al_set->Set() = align_list;
629  align_sets.push_back( al_set );
630  }
631 
632  NON_CONST_ITERATE( list< CRef<CSeq_align_set> >, as_itr, align_sets ){
633  // create Project items from the results
634  CRef<CSeq_annot> annot( new CSeq_annot() );
635  annot->SetData().SetAlign() = (*as_itr)->Set();
636 
637  string label;
638  CRef<CScope> scope = m_Params.GetSeqLocs().front().scope;
639  CLabel::GetLabel( *annot, &label, CLabel::eDefault, scope );
640 
641  label += ": BLAST Sequences"; // TODO
642 
643  CRef<CProjectItem> item( new CProjectItem() );
644  item->SetItem().SetAnnot( *annot );
645  item->SetLabel( label );
646  annot->SetCreateDate( CTime(CTime::eCurrent) );
647  annot->SetTitleDesc( label );
648  annot->SetNameDesc( label );
649 
650  AddProjectItem(*item);
651  }
652 
653 # if _DEBUG
654  string dumpname = CDirEntry::GetTmpName() + "-allapart-inproject";
655  ERR_POST( Info << "All-apart in-project dumpname is: '" << dumpname << "'" );
656 
657  CNcbiOfstream dumpstream( dumpname.c_str() );
658  unique_ptr<CObjectOStream> dumpasn(CObjectOStream::Open(eSerial_AsnText, dumpstream));
659 
660  ITERATE( list< CRef<CSeq_align_set> >, av_iter, align_sets ){
661  CRef<objects::CSeq_align_set> aliset = *av_iter;
662 
663  *dumpasn << *aliset;
664  }
665 
666  dumpasn->Close();
667 # endif
668  }
669 }
670 
Declares the CBl2Seq (BLAST 2 Sequences) class.
USING_SCOPE(objects)
static void x_ToBlastVec(const TConstScopedObjects &locs, bool mask_lc_regions, bool mask_rep_feats, TSeqLocVector &vec)
CBLASTSeqToolJob.
static const TProcessHandle kInvalidProcHandle
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
CAutoEnvironmentVariable –.
Definition: ncbienv.hpp:179
CBLASTParams - save user-specified parameters and preferences.
CBLASTSeqToolJob.
CBioseq_Handle –.
Runs the BLAST algorithm between 2 sequences.
Definition: bl2seq.hpp:58
void AddProjectItem(objects::CProjectItem &item)
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
CDirEntry –.
Definition: ncbifile.hpp:262
The result type for Spawn methods.
Definition: ncbiexec.hpp:120
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
CFeat_CI –.
Definition: feat_ci.hpp:64
Storage for multiple mapping ranges.
CObject –.
Definition: ncbiobj.hpp:180
Extended exit information for waited process.
CProcess –.
CScope –.
Definition: scope.hpp:92
bool IsAlign(void) const
Definition: Seq_annot.cpp:182
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetCreateDate(const CTime &dt)
Definition: Seq_annot.cpp:121
void SetTitleDesc(const string &title)
Definition: Seq_annot.cpp:96
CSeq_loc_Mapper –.
static wxString GetStdPath(void)
the applicaton installation directory.
Definition: sys_path.cpp:159
CTime –.
Definition: ncbitime.hpp:296
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Definition: map.hpp:338
Constant declarations for command line arguments for BLAST programs.
const string kArgOutput
Output file name.
const string kArgQuery
Query sequence(s)
const string kArgDb
BLAST database name.
const string kArgSubject
Subject input file to search.
static const char si[8][64]
Definition: des.c:146
void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options=fCompart_Defaults, float diff_len_filter=3.0f)
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, void *user_data=NULL)
Set a function callback to be invoked by the CORE of BLAST to allow interrupting a BLAST search in pr...
Definition: bl2seq.hpp:302
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
Definition: bl2seq.cpp:173
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
Definition: blast_aux.cpp:676
string WindowMaskerPathGet()
Retrieves the path to the windowmasker data files.
int WindowMaskerPathInit(const string &window_masker_path)
Initialize the path to the windowmasker data files.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
TProcessHandle GetProcessHandle(void)
Get process handle/pid.
Definition: ncbiexec.cpp:63
static CResult SpawnV(EMode mode, const char *cmdname, const char *const *argv)
Spawn a new process with variable number of command-line arguments.
Definition: ncbiexec.cpp:614
@ eNoWait
Continues to execute calling process concurrently with new process (asynchronous process).
Definition: ncbiexec.hpp:96
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
Definition: ncbifile.cpp:2595
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
Definition: ncbifile.cpp:2903
static CRef< objects::CLoaderDescriptor > CreateLoader(const string &db, bool nucleotide)
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
CBLASTSeqToolJob(const CBLASTParams &params)
bool GetLocalDBLoader() const
TConstScopedObjects & GetSeqLocs()
const string & GetWmDir() const
TProcessHandle m_ProcHandle
TConstScopedObjects & GetSubjSeqLocs()
CRef< blast::CBlastOptionsHandle > ToArgVecBlastOptions(vector< string > &arg_vec, bool local=true) const
string GetLocalNucDB() const
SProgParams & GetCurrParams()
returns params for the current program
bool IsCompartmentsRequested() const
bool m_MaskLowercase
mask Lowercase features
static Boolean s_BlastInterrupt(SBlastProgress *prog)
static callback for BLAST interruption this must match the API in CBl2Seq and BLAST
blast::EProgram & GetCurrProgram()
returns the current program for the currently selected input type
string GetLocalProtDB() const
virtual void RequestCancel()
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
bool m_MaskRepeats
mask Repeat features
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
bool IsSubjNucInput() const
ESubjectType GetSubjectType() const
bool IsStandaloneRequested() const
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
string m_Descr
mutex to sync our internals
virtual bool IsCanceled() const override
virtual void x_SetStatusText(const string &text)
virtual void RequestCancel() override
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eDefault
Definition: label.hpp:73
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void ResetStrand(void)
Reset the strand on this location.
Definition: Seq_loc.cpp:5221
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void SetSeqTypeById(const CSeq_id_Handle &idh, ESeqType seqtype) const
Methods for setting sequence types.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddConversion(CRef< CMappingRange > cvt)
Add new mapping range to the proper place.
bool IsNucleotide(void) const
const CSeqFeatData & GetData(void) const
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
const unsigned long kInfiniteTimeoutMs
Infinite timeout in milliseconds.
TPid TProcessHandle
bool Kill(unsigned long timeout=kDefaultKillTimeout)
Terminate process.
int Wait(unsigned long timeout=kInfiniteTimeoutMs, CExitInfo *info=0) const
Wait until process terminates.
int GetExitCode(void) const
Get process exit code.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
static const char label[]
#define INVALID_HANDLE_VALUE
A value for an invalid file handle.
Definition: mdb.c:389
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
Tdata & Set(void)
Assign a value to data member.
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
const TRegion & GetRegion(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
int i
static char * prog
Definition: mdb_load.c:33
#define wxT(x)
Definition: muParser.cpp:41
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define NCBI_OS_MSWIN
Definition: ncbiconf_msvc.h:19
Defines a portable execute class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
describes parameters for a BLAST program
SAnnotSelector –.
Progress monitoring structure.
Definition: blast_def.h:341
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
Definition: sseqloc.hpp:50
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
Definition: sseqloc.hpp:54
CRef< objects::CSeq_loc > mask
Seq-loc describing regions to mask in the seqloc field Acceptable types of Seq-loc are Seq-interval a...
Definition: sseqloc.hpp:59
Interface to retrieve list of available windowmasker filtering.
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
Modified on Tue Apr 23 07:40:40 2024 by modify_doxy.py rev. 669887