NCBI C++ ToolKit
split_query_aux_priv.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: split_query_aux_priv.hpp 77034 2017-03-20 13:42:25Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file split_query_aux_priv.hpp
31  * Auxiliary functions and classes to assist in query splitting
32  */
33 
34 #ifndef ALGO_BLAST_API__SPLIT_QUERY_AUX_PRIV_HPP
35 #define ALGO_BLAST_API__SPLIT_QUERY_AUX_PRIV_HPP
36 
37 #include <corelib/ncbiobj.hpp>
39 #include "split_query.hpp"
42 #include <sstream>
43 
44 /** @addtogroup AlgoBlast
45  *
46  * @{
47  */
48 
50 BEGIN_SCOPE(blast)
51 
52 /// Determines if the input query sequence(s) should be split because it
53 //// is supported by the current implementation. The splitting decision
54 /// in this function is not based upon query length.
55 /// @param program BLAST program type [in]
56 /// @param chunk_size size of each of the query chunks [in]
57 /// @param concatenated_query_length length of the concatenated query [in]
58 /// @param num_queries number of queries to split [in]
60 bool
62  size_t chunk_size,
63  size_t concatenated_query_length,
64  size_t num_queries);
65 
66 /// Size of the region that overlaps in between each query chunk
67 /// @param program BLAST program type [in]
69 size_t
71 
72 /// Calculate the number of chunks that a query will be split into
73 /// based upon query length, chunk_size and program.
74 /// @param program BLAST program type [in]
75 /// @param chunk_size size of each of the query chunks, may be adjusted [in|out]
76 /// @param concatenated_query_length length of the concatenated query [in]
77 /// @param num_queries number of queries to split [in]
79 Uint4
81  size_t *chunk_size,
82  size_t concatenated_query_length,
83  size_t num_queries);
84 
85 /// Function used by search class to retrieve a query factory for a given chunk
89  CRef<CBlastOptions> options,
90  CRef<SInternalData> full_data,
91  size_t num_threaded =1);
92 
93 /// this might supercede the function below...
94 void
96  CRef<IQueryFactory> full_query_fact,
97  CRef<SInternalData> full_data);
98 
99 
100 /**
101  * @brief Auxiliary class to provide convenient and efficient access to
102  * conversions between contexts local to query split chunks and the absolute
103  * (full, unsplit) query
104  */
106 public:
107  /// Constructor
108  /// @param sqb Split query block structure [in]
109  /// @param query_chunk_factories query factories corresponding to each of
110  /// the chunks needed to report unit testing data (optional) [in]
111  /// @param options BLAST options, also needed to report unit test data
112  /// (optional) [in]
114  vector< CRef<IQueryFactory> >* query_chunk_factories = NULL,
115  const CBlastOptions* options = NULL);
116 
117  /**
118  * @brief Get the context number in the absolute (i.e.: unsplit) query
119  *
120  * @param chunk_num Chunk number where the context is found in the split
121  * query [in]
122  * @param context_in_chunk Context in the split query [in]
123  *
124  * @return the appropriate context, or if the context is invalid
125  * kInvalidContext
126  */
127  int GetAbsoluteContext(size_t chunk_num, Int4 context_in_chunk) const;
128 
129  /**
130  * @brief Get the context number in the split query chunk. This function is
131  * basically doing the reverse lookup that GetAbsoluteContext does
132  *
133  * @param chunk_num Chunk number to search for this context [in]
134  * @param absolute_context context number in the absolute (i.e.: unsplit)
135  * query [in]
136  *
137  * @return the appropriate context if found, else kInvalidContext
138  *
139  * @sa GetAbsoluteContext
140  */
141  int GetContextInChunk(size_t chunk_num, int absolute_context) const;
142 
143  /**
144  * @brief Get the chunk number where context_in_chunk starts (i.e.:
145  * location of its first chunk).
146  *
147  * @param curr_chunk Chunk where the context_in_chunk is found [in]
148  * @param context_in_chunk Context in the split query [in]
149  *
150  * @return the appropriate chunk number or kInvalidContext if the context
151  * is not valid in the query chunk (i.e.: strand not searched)
152  */
153  int GetStartingChunk(size_t curr_chunk, Int4 context_in_chunk) const;
154 
155  /// Print this object so that its contents can be directly used to update
156  /// split_query.ini (for unit testing)
157  /// @param out stream to print this object [in|out]
158  /// @param rhs object to print [in]
159  friend ostream& operator<<(ostream& out, const CContextTranslator& rhs);
160 
161 private:
162  /// Each element in this vector represents a chunk, and it contains the
163  /// contexts numbers that correspond in the full concatenated query
164  vector< vector<int> > m_ContextsPerChunk;
165 
166  vector< vector<int> > m_StartingChunks;
167  vector< vector<int> > m_AbsoluteContexts;
168 };
169 
170 /// Auxiliary class to determine information about the query that was split
171 /// into chunks.
173 public:
174  /**
175  * @brief Constructor
176  *
177  * @param sqb Split query block structure [in]
178  * @param program BLAST program type [in]
179  * @param local_query_data source of query data [in]
180  */
182  EBlastProgramType program,
183  CRef<ILocalQueryData> local_query_data);
184 
185  /**
186  * @brief Get the length of the query
187  *
188  * @param chunk_num chunk number where query is found [in]
189  * @param context_in_chunk which context within this chunk contains query
190  * [in]
191  *
192  * @return length of query
193  */
194  size_t GetQueryLength(size_t chunk_num, int context_in_chunk) const;
195  /**
196  * @brief Get the length of the query
197  *
198  * @param global_query_index index of the query in the context of the
199  * full,non-split query [in]
200  *
201  * @return length of query
202  */
203  size_t GetQueryLength(int global_query_index) const;
204 
205  /**
206  * @brief get the last chunk where query identified with global_query_index
207  * is found
208  *
209  * @param global_query_index index of the query in the context of the
210  * full,non-split query [in]
211  *
212  * @return chunk number where query is last found
213  */
214  int GetLastChunk(int global_query_index);
215  /**
216  * @brief get the last chunk where query identified with global_query_index
217  * is found
218  *
219  * @param chunk_num chunk number where query is found [in]
220  * @param context_in_chunk which context within this chunk contains query
221  * [in]
222  *
223  * @return chunk number where query is last found
224  */
225  int GetLastChunk(size_t chunk_num, int context_in_chunk);
226 
227 private:
228  /**
229  * @brief Convert a context in a chunk to a query index (within the chunk)
230  *
231  * @param context_in_chunk context number [in]
232  *
233  * @return query index
234  */
235  size_t x_ContextInChunkToQueryIndex(int context_in_chunk) const;
236 
237  /// BLAST program type
239 
240  /// Each element in this vector represents a chunk, and it contains the
241  /// query indices that correspond in the full concatenated query
242  vector< vector<size_t> > m_QueryIndicesPerChunk;
243 
244  /// Lengths of the queries
245  vector<size_t> m_QueryLengths;
246 
247  /// Lists the last chunk where the query can be found
249  /// Initial value of all entries in the above cache
250  enum { kUninitialized = -1 };
251 };
252 
253 /// Auxiliary function to print a vector
254 /// @param data2print vector to print [in]
255 template <class T>
256 string s_PrintVector(const vector<T>& data2print)
257 {
258  ostringstream os;
259 
260  if (data2print.empty()) {
261  return kEmptyStr;
262  }
263 
264  os << data2print.front();
265  for (size_t i = 1; i < data2print.size(); i++) {
266  os << ", " << data2print[i];
267  }
268  return os.str();
269 }
270 
271 END_SCOPE(BLAST)
273 
274 /* @} */
275 
276 #endif /* ALGO_BLAST_API__SPLIT_QUERY_AUX_PRIV__HPP */
277 
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Definitions and functions associated with the BlastQueryInfo structure.
Encapsulates ALL the BLAST algorithm's options.
Auxiliary class to provide convenient and efficient access to conversions between contexts local to q...
Auxiliary class to determine information about the query that was split into chunks.
Wrapper class around SSplitQueryBlk structure.
static const int chunk_size
CNcbiOstream & operator<<(CNcbiOstream &out, const CEquivRange &range)
Definition: equiv_range.cpp:96
std::ofstream out("events_result.xml")
main entry point for tests
const auto kUninitialized
Definition: filetrack.cpp:536
vector< vector< int > > m_StartingChunks
void SplitQuery_SetEffectiveSearchSpace(CRef< CBlastOptions > options, CRef< IQueryFactory > full_query_fact, CRef< SInternalData > full_data)
this might supercede the function below...
size_t SplitQuery_GetOverlapChunkSize(EBlastProgramType program)
Size of the region that overlaps in between each query chunk.
vector< vector< int > > m_ContextsPerChunk
Each element in this vector represents a chunk, and it contains the contexts numbers that correspond ...
CRef< SInternalData > SplitQuery_CreateChunkData(CRef< IQueryFactory > qf, CRef< CBlastOptions > options, CRef< SInternalData > full_data, size_t num_threads)
Function used by search class to retrieve a query factory for a given chunk.
vector< size_t > m_QueryLengths
Lengths of the queries.
vector< int > m_LastChunkForQueryCache
Lists the last chunk where the query can be found.
EBlastProgramType m_Program
BLAST program type.
vector< vector< int > > m_AbsoluteContexts
bool SplitQuery_ShouldSplit(EBlastProgramType program, size_t chunk_size, size_t concatenated_query_length, size_t num_queries)
Determines if the input query sequence(s) should be split because it.
Uint4 SplitQuery_CalculateNumChunks(EBlastProgramType program, size_t *chunk_size, size_t concatenated_query_length, size_t num_queries)
Calculate the number of chunks that a query will be split into based upon query length,...
string s_PrintVector(const vector< T > &data2print)
Auxiliary function to print a vector.
vector< vector< size_t > > m_QueryIndicesPerChunk
Each element in this vector represents a chunk, and it contains the query indices that correspond in ...
#define NULL
Definition: ncbistd.hpp:225
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
int i
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CQuerySplitter, a class to split the query sequence(s)
Modified on Thu Apr 11 15:14:38 2024 by modify_doxy.py rev. 669887