NCBI C++ ToolKit
bl2seq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bl2seq.cpp 76361 2017-02-02 14:59:08Z madden $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  * ===========================================================================
29  */
30 
31 /// @file bl2seq.cpp
32 /// Implementation of CBl2Seq class.
33 
34 #include <ncbi_pch.hpp>
36 #include "blast_objmgr_priv.hpp"
38 
39 /** @addtogroup AlgoBlast
40  *
41  * @{
42  */
43 
46 BEGIN_SCOPE(blast)
47 
49  : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
50 {
51  TSeqLocVector queries;
52  TSeqLocVector subjects;
53  queries.push_back(query);
54  subjects.push_back(subject);
55 
56  x_Init(queries, subjects);
57  m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
58 }
59 
61 {
62  _ASSERT( !m_tQueries.empty() );
63  _ASSERT( !m_tSubjects.empty() );
68  m_Blast.Reset(new CLocalBlast(query_factory, m_OptsHandle, db));
69  if (m_InterruptFnx != NULL) {
71  }
72  // Set the hitlist size to the total number of subject sequences, to
73  // make sure that no hits are discarded (ported from CBl2Seq::SetupSearch
75 }
76 
78  CBlastOptionsHandle& opts)
79  : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
80 {
81  TSeqLocVector queries;
82  TSeqLocVector subjects;
83  queries.push_back(query);
84  subjects.push_back(subject);
85 
86  x_Init(queries, subjects);
87  m_OptsHandle.Reset(&opts);
88 }
89 
90 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
91  EProgram p, bool dbscan_mode)
92  : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
93 {
94  TSeqLocVector queries;
95  queries.push_back(query);
96 
97  x_Init(queries, subjects);
99 }
100 
101 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
102  CBlastOptionsHandle& opts, bool dbscan_mode)
103  : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
104 {
105  TSeqLocVector queries;
106  queries.push_back(query);
107 
108  x_Init(queries, subjects);
109  m_OptsHandle.Reset(&opts);
110 }
111 
112 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
113  EProgram p, bool dbscan_mode)
114  : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
115 {
116  x_Init(queries, subjects);
118 }
119 
120 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
121  CBlastOptionsHandle& opts, bool dbscan_mode)
122  : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
123 {
124  x_Init(queries, subjects);
125  m_OptsHandle.Reset(&opts);
126 }
127 
128 void CBl2Seq::x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs)
129 {
130  m_tQueries = queries;
131  m_tSubjects = subjs;
133 }
134 
136 {
138 }
139 
140 void
142 {
143  // should be changed if derived classes are created
144  m_Messages.clear();
146  m_AncillaryData.clear();
147  m_Results.Reset();
148 }
149 
151 
154 {
155  if (res.Empty()) {
156  return TSeqAlignVector();
157  }
158  TSeqAlignVector retval;
159  retval.reserve(res->GetNumResults());
160  ITERATE(CSearchResultSet, r, *res) {
162  if ((*r)->HasAlignments()) {
163  sa.Reset(const_cast<CSeq_align_set*>(&*(*r)->GetSeqAlign()));
164  } else {
166  }
167  retval.push_back(sa);
168  }
169  return retval;
170 }
171 
174 {
175  if (m_Results.NotEmpty()) {
176  // return cached results from previous run
178  }
179 
180  (void) RunEx();
183 }
184 
185 void
187 {
188  m_AncillaryData.clear();
189  m_AncillaryData.reserve(m_Results->size());
191  m_AncillaryData.push_back((*r)->GetAncillaryData());
192  }
193 }
194 
197 {
199  if (m_Results.NotEmpty()) {
200  // return cached results from previous run
201  return m_Results;
202  }
203 
204  //m_OptsHandle->GetOptions().DebugDumpText(cerr, "m_OptsHandle", 1);
206  m_Results = m_Blast->Run();
211  }
212  return m_Results;
213 }
214 
217 {
219 }
220 
221 void
222 CBl2Seq::GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const
223 {
224  retval.clear();
225  if (m_Results.Empty() || m_Results->empty()) {
226  return;
227  }
229  TSeqLocInfoVector subj_masks;
230  (*res)->GetSubjectMasks(subj_masks);
231  retval.push_back(subj_masks);
232  }
233 }
234 
235 END_SCOPE(blast)
237 
238 /* @} */
Declares the CBl2Seq (BLAST 2 Sequences) class.
BlastDiagnostics * Blast_DiagnosticsFree(BlastDiagnostics *diagnostics)
Free the BlastDiagnostics structure and all substructures.
BlastDiagnostics * Blast_DiagnosticsCopy(const BlastDiagnostics *diagnostics)
Free the BlastDiagnostics structure and all substructures.
Definitions which are dependant on the NCBI C++ Object Manager.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
#define false
Definition: bool.h:36
Runs the BLAST algorithm between 2 sequences.
Definition: bl2seq.hpp:58
Handle to the options to the BLAST algorithm.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Definition: local_blast.hpp:62
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Search Results for All Queries.
void x_InitCLocalBlast()
Common initialization of the CLocalBlast object.
Definition: bl2seq.cpp:60
void x_Init(const TSeqLocVector &queries, const TSeqLocVector &subjs)
Common initialization code for all c-tors.
Definition: bl2seq.cpp:128
TSearchMessages m_Messages
Stores any warnings emitted during query setup.
Definition: bl2seq.hpp:198
TInterruptFnPtr m_InterruptFnx
Interrupt callback.
Definition: bl2seq.hpp:211
void x_ResetInternalDs()
Clean up structures and results from any previous search.
Definition: bl2seq.cpp:141
CRef< CBlastOptionsHandle > m_OptsHandle
Blast options.
Definition: bl2seq.hpp:183
virtual ~CBl2Seq()
Destructor.
Definition: bl2seq.cpp:135
CRef< SInternalData > m_InternalData
Internal core data structures which are used in the preliminary and traceback stages of the search.
BlastDiagnostics * mi_pDiagnostics
Return search statistics data.
Definition: bl2seq.hpp:202
bool m_DbScanMode
Scan like a databsase (as opposed to pairwise)
Definition: bl2seq.hpp:185
void x_BuildAncillaryData()
Populate the internal m_AncillaryData member.
Definition: bl2seq.cpp:186
void GetFilteredSubjectRegions(vector< TSeqLocInfoVector > &retval) const
Retrieves regions filtered on the subject sequence(s)
Definition: bl2seq.cpp:222
void * m_InterruptUserData
Interrupt user datacallback.
Definition: bl2seq.hpp:213
CSearchResultSet::TAncillaryVector m_AncillaryData
Ancillary BLAST data.
Definition: bl2seq.hpp:205
CRef< CSearchResultSet > Run()
Executes the search.
TSeqLocInfoVector GetFilteredQueryRegions() const
Retrieves the filtered query regions.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, void *user_data=NULL)
Set a function callback to be invoked by the CORE of BLAST to allow interrupting a BLAST search in pr...
TSearchMessages GetSearchMessages() const
Retrieve any error/warning messages that occurred during the search.
void SetHitlistSize(int s)
Sets HitlistSize.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
Definition: bl2seq.cpp:173
size_type size() const
Identical to GetNumResults, provided to facilitate STL-style iteration.
CRef< CSearchResultSet > m_Results
CLocalBlast results.
Definition: bl2seq.hpp:208
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
Definition: bl2seq.cpp:196
static TSeqAlignVector CSearchResultSet2TSeqAlignVector(CRef< CSearchResultSet > res)
Converts m_Results data member to a TSeqAlignVector.
Definition: bl2seq.cpp:153
TSeqLocVector m_tQueries
query sequence(s)
Definition: bl2seq.hpp:181
TSeqLocVector m_tSubjects
sequence(s) to BLAST against
Definition: bl2seq.hpp:182
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
bool empty() const
Returns whether this container is empty or not.
CRef< TBlastDiagnostics > m_Diagnostics
Diagnostic output from preliminary and traceback stages.
CBl2Seq(const SSeqLoc &query, const SSeqLoc &subject, EProgram p)
Constructor to compare 2 sequences with default options.
Definition: bl2seq.cpp:48
TSeqLocInfoVector GetFilteredQueryRegions() const
Retrieves regions filtered on the query/queries.
Definition: bl2seq.cpp:216
CRef< CLocalBlast > m_Blast
The actual BLAST instance.
Definition: bl2seq.hpp:184
size_type GetNumResults() const
Return the number of results contained by this object.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
static string subject
static string query
#define _ASSERT
#define const
Definition: zconf.h:230
Modified on Sat Dec 02 09:22:41 2023 by modify_doxy.py rev. 669887