53 #include <wx/filename.h>
67 vec.reserve(locs.size());
74 loc.
scope = it->scope;
76 if (mask_lc_regions || mask_rep_feats) {
80 if (mask_lc_regions) {
83 for ( ; feat_iter; ++feat_iter) {
85 ==
"lowercase in FASTA file") {
93 for ( ; feat_iter; ++feat_iter) {
123 #if defined(NCBI_OS_UNIX)
181 vector<string> arg_vec;
188 unique_ptr<CAutoEnvironmentVariable> p_wm_path;
191 if( !wm_dir.empty() ){
204 string prog_name = arg_vec.front();
208 arg_vec.erase( arg_vec.begin() );
210 vector< CRef<CSeq_annot> > res_annots;
216 string queries_fname = subjects_fname +
"-q";
217 string output_fname = subjects_fname +
"-out";
218 subjects_fname +=
"-s";
223 if (!s_locs.empty()) {
242 fasta_sub_ostr.
Write(handle);
246 fasta_sub_ostr.
Write(handle, loc);
252 seq_types[local_sihd] = seqType;
255 seq_types[base_sihd] = seqType;
290 fasta_que_ostr.
Write( handle );
293 fasta_que_ostr.
Write( handle, loc );
299 seq_types[local_sihd] = seqType;
302 seq_types[base_sihd] = seqType;
321 wxFileName fname( agent_path, wxEmptyString );
322 #if !defined( NCBI_OS_DARWIN ) || defined( _DEBUG )
323 fname.AppendDir(
wxT(
"bin"));
327 agent_path = fname.GetFullPath();
330 arg_vec.push_back(
"-" +
string(
"outfmt") );
331 arg_vec.push_back(
"8" );
335 arg_vec.push_back(
"-" +
kArgDb);
337 if (localDB.find(
' ') != string::npos)
338 arg_vec.push_back(
'"' + localDB +
'"');
340 arg_vec.push_back(localDB);
344 arg_vec.push_back(subjects_fname);
349 arg_vec.push_back( queries_fname );
353 arg_vec.push_back( output_fname );
360 ITERATE( vector<string>, argit, arg_vec ){
361 agent_args += *argit +
" ";
365 LOG_POST(
Info <<
"We are about to start program on path '" << agent_path.ToUTF8() <<
"'" );
366 LOG_POST(
Info <<
"with parameters '" << agent_args <<
"'" );
373 const char** arg_vec_ptr =
new const char*[arg_vec.size() +2];
375 arg_vec_ptr[xi] =
NULL;
376 while( xi < arg_vec.size() ){
377 const char* arg_ptr = arg_vec[xi].c_str();
378 arg_vec_ptr[++xi] = arg_ptr;
380 arg_vec_ptr[++xi] =
NULL;
385 delete[] arg_vec_ptr;
394 while( !result_istr.eof() ){
399 res_annots.push_back( next_annot );
414 static string exitres[] = {
425 Info <<
"Program " << prog_name <<
" completed with "
427 <<
"exitcode " << exitcode
429 << (exitcode <
sizeof(exitres)/
sizeof(
string) ? exitres[exitcode] :
"unknown error" )
434 ERR_POST(
"Failed to run standalone program " << prog_name <<
": " << ex.
GetMsg() );
441 list< CRef<CSeq_align> > aligns;
449 for (
const auto &
i : seq_types)
457 for (; id_iter; ++id_iter) {
459 if (seq_types.find(h) == seq_types.end())
464 const list< CRef<CSeq_align> > raw_aligns = next_annot->
GetData().
GetAlign();
470 aligns.push_back( align );
475 if( !aligns.empty() ){
480 list< CRef<CSeq_align_set> > align_sets;
487 al_set->
Set() = aligns;
488 align_sets.push_back( al_set );
494 annot->
SetData().SetAlign() = (*as_itr)->Set();
503 item->
SetItem().SetAnnot( *annot );
517 ERR_POST(
Info <<
"Standalone in-project dumpname is: '" << dumpname <<
"'" );
533 ERR_POST(
"Failed to convert results of program " << prog_name <<
": " << ex.
GetMsg() );
536 CDirEntry subject_entry( subjects_fname );
553 CBl2Seq blaster( query_vec, subject_vec, *blast_opts );
566 ERR_POST(
Info <<
"All-apart dumpname is: '" << dumpname <<
"'" );
601 list< CRef< CSeq_align > >& align_list = annot->SetData().SetAlign();
612 sas->Set().insert(sas->Set().end(), disc.begin(), disc.end());
613 align_list.insert(align_list.end(), sas->Get().begin(), sas->Get().end());
615 align_list.push_back(align);
620 if( !align_list.empty() ){
621 list< CRef<CSeq_align_set> > align_sets;
628 al_set->Set() = align_list;
629 align_sets.push_back( al_set );
635 annot->SetData().SetAlign() = (*as_itr)->Set();
641 label +=
": BLAST Sequences";
644 item->SetItem().SetAnnot( *annot );
645 item->SetLabel(
label );
647 annot->SetTitleDesc(
label );
648 annot->SetNameDesc(
label );
655 ERR_POST(
Info <<
"All-apart in-project dumpname is: '" << dumpname <<
"'" );
Declares the CBl2Seq (BLAST 2 Sequences) class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
CAutoEnvironmentVariable –.
CBLASTParams - save user-specified parameters and preferences.
Runs the BLAST algorithm between 2 sequences.
void AddProjectItem(objects::CProjectItem &item)
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
The result type for Spawn methods.
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Storage for multiple mapping ranges.
Extended exit information for waited process.
void SetNameDesc(const string &name)
void SetCreateDate(const CTime &dt)
void SetTitleDesc(const string &title)
static wxString GetStdPath(void)
the applicaton installation directory.
Template class for iteration on objects of class C (non-medifiable version)
Constant declarations for command line arguments for BLAST programs.
const string kArgOutput
Output file name.
const string kArgQuery
Query sequence(s)
const string kArgDb
BLAST database name.
const string kArgSubject
Subject input file to search.
static const char si[8][64]
void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options=fCompart_Defaults, float diff_len_filter=3.0f)
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, void *user_data=NULL)
Set a function callback to be invoked by the CORE of BLAST to allow interrupting a BLAST search in pr...
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
string WindowMaskerPathGet()
Retrieves the path to the windowmasker data files.
int WindowMaskerPathInit(const string &window_masker_path)
Initialize the path to the windowmasker data files.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
const string & GetMsg(void) const
Get message string.
void Warning(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
TProcessHandle GetProcessHandle(void)
Get process handle/pid.
static CResult SpawnV(EMode mode, const char *cmdname, const char *const *argv)
Spawn a new process with variable number of command-line arguments.
@ eNoWait
Continues to execute calling process concurrently with new process (asynchronous process).
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
static CRef< objects::CLoaderDescriptor > CreateLoader(const string &db, bool nucleotide)
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
CBLASTSeqToolJob(const CBLASTParams ¶ms)
bool GetLocalDBLoader() const
TConstScopedObjects & GetSeqLocs()
const string & GetWmDir() const
TProcessHandle m_ProcHandle
TConstScopedObjects & GetSubjSeqLocs()
CRef< blast::CBlastOptionsHandle > ToArgVecBlastOptions(vector< string > &arg_vec, bool local=true) const
string GetLocalNucDB() const
SProgParams & GetCurrParams()
returns params for the current program
int m_WM_TaxId
winmask tax id
bool IsCompartmentsRequested() const
bool m_MaskLowercase
mask Lowercase features
static Boolean s_BlastInterrupt(SBlastProgress *prog)
static callback for BLAST interruption this must match the API in CBl2Seq and BLAST
blast::EProgram & GetCurrProgram()
returns the current program for the currently selected input type
string GetLocalProtDB() const
virtual void RequestCancel()
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
bool m_MaskRepeats
mask Repeat features
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
bool IsSubjNucInput() const
ESubjectType GetSubjectType() const
bool IsStandaloneRequested() const
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
string m_Descr
mutex to sync our internals
virtual bool IsCanceled() const override
virtual void x_SetStatusText(const string &text)
virtual void RequestCancel() override
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
vector< SConstScopedObject > TConstScopedObjects
#define MSerial_AsnText
I/O stream manipulators –.
@ eSerial_AsnText
ASN.1 text.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void ResetStrand(void)
Reset the strand on this location.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void SetSeqTypeById(const CSeq_id_Handle &idh, ESeqType seqtype) const
Methods for setting sequence types.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddConversion(CRef< CMappingRange > cvt)
Add new mapping range to the proper place.
bool IsNucleotide(void) const
const CSeqFeatData & GetData(void) const
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
const unsigned long kInfiniteTimeoutMs
Infinite timeout in milliseconds.
bool Kill(unsigned long timeout=kDefaultKillTimeout)
Terminate process.
int Wait(unsigned long timeout=kInfiniteTimeoutMs, CExitInfo *info=0) const
Wait until process terminates.
int GetExitCode(void) const
Get process exit code.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
@ eCurrent
Use current time. See also CCurrentTime.
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
static const char label[]
#define INVALID_HANDLE_VALUE
A value for an invalid file handle.
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
Tdata & Set(void)
Assign a value to data member.
bool IsDisc(void) const
Check if variant Disc is selected.
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
const TRegion & GetRegion(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsWhole(void) const
Check if variant Whole is selected.
@ e_not_set
No variant selected.
void SetData(TData &value)
Assign a value to Data data member.
const TAlign & GetAlign(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
Defines a portable execute class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
describes parameters for a BLAST program
Progress monitoring structure.
Structure to represent a single sequence to be fed to BLAST.
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
CRef< objects::CSeq_loc > mask
Seq-loc describing regions to mask in the seqloc field Acceptable types of Seq-loc are Seq-interval a...
Interface to retrieve list of available windowmasker filtering.
wxString ToWxString(const string &s)