NCBI C++ ToolKit
seqsrc_query_factory.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqsrc_query_factory.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /// @file seqsrc_query_factory.cpp
31 /// Implementation of the BlastSeqSrc interface for a query factory
32 
33 #include <ncbi_pch.hpp>
34 #include "seqsrc_query_factory.hpp"
39 #include "blast_objmgr_priv.hpp" // for SetupSubjects
40 
41 /** @addtogroup AlgoBlast
42  *
43  * @{
44  */
45 
48 BEGIN_SCOPE(blast)
49 
50 /////////////////////////////////////////////////////////////////////////////
51 //
52 // CQueryFactoryInfo
53 //
54 /////////////////////////////////////////////////////////////////////////////
55 
56 /// Contains information about all sequences in a set.
57 class CQueryFactoryInfo : public CObject
58 {
59 public:
60  /// Constructor from a vector of sequence location/scope pairs and a
61  /// BLAST program type.
63  CQueryFactoryInfo(const TSeqLocVector& subject_seqs,
64  EBlastProgramType program);
66  /// Setter and getter functions for the private fields
67  Uint4 GetMaxLength();
68  Uint4 GetMinLength();
69  Uint4 GetAvgLength();
70  void SetAvgLength(Uint4 val);
71  bool GetIsProtein();
72  Uint4 GetNumSeqs();
73  BLAST_SequenceBlk* GetSeqBlk(Uint4 index);
74 private:
75  bool m_IsProt; ///< Are these sequences protein or nucleotide?
76  vector<BLAST_SequenceBlk*> m_SeqBlkVector; ///< Vector of sequence blocks
77  unsigned int m_MaxLength; ///< Length of the longest sequence in this set
78  unsigned int m_MinLength; ///< Length of the longest sequence in this set
79  unsigned int m_AvgLength; ///< Average length of sequences in this set
80  /// local query data obtained from the query factory
82  Uint4 m_NumSeqs; ///< Number of sequences
83 };
84 
85 /// Constructor
87  EBlastProgramType program)
88 : m_IsProt(Blast_SubjectIsProtein(program) ? true : false), m_MaxLength(0),
89  m_MinLength(1), m_AvgLength(0), m_QuerySource(0), m_NumSeqs(0)
90 {
91  CRef<IRemoteQueryData> query_data(query_factory->MakeRemoteQueryData());
92  CRef<CBioseq_set> bss(query_data->GetBioseqSet());
93  _ASSERT(bss.NotEmpty());
95  if ( !m_QuerySource ) {
96  NCBI_THROW(CBlastException, eSeqSrcInit,
97  "Failed to initialize sequences for IQueryFactory");
98  }
99 
100  // TODO support for m_MinLength
102  m_NumSeqs = static_cast<Uint4>(m_QuerySource->Size());
103  _ASSERT(!m_SeqBlkVector.empty());
104 }
105 
107  EBlastProgramType program)
108 : m_IsProt(Blast_SubjectIsProtein(program) ? true : false), m_MaxLength(0),
109  m_MinLength(1), m_AvgLength(0), m_QuerySource(0), m_NumSeqs(static_cast<Uint4>(subj_seqs.size()))
110 {
111  // Fix subject location for tblast[nx].
112  if (Blast_SubjectIsTranslated(program))
113  {
114  TSeqLocVector temp_slv;
115  vector<Int2> strand_v;
116  ITERATE(TSeqLocVector, iter, subj_seqs)
117  {
118  strand_v.push_back((Int2) (*iter).seqloc->GetStrand());
119  CRef<CSeq_loc> sl(new CSeq_loc);
120  sl->Assign(*((*iter).seqloc));
122  if ((*iter).mask)
123  {
124  CRef<CSeq_loc> mask_sl(new CSeq_loc);
125  mask_sl->Assign(*((*iter).mask));
126  SSeqLoc sseq_loc(*sl, *((*iter).scope), *mask_sl);
127  temp_slv.push_back(sseq_loc);
128  }
129  else
130  {
131  SSeqLoc sseq_loc(*sl, *((*iter).scope));
132  temp_slv.push_back(sseq_loc);
133  }
134  }
135 
136  SetupSubjects(temp_slv, program, &m_SeqBlkVector, &m_MaxLength);
137 
138  int index=0;
139  ITERATE(vector<Int2>, s_iter, strand_v)
140  {
141  m_SeqBlkVector[index++]->subject_strand = *s_iter;
142  }
143  }
144  else
145  SetupSubjects(const_cast<TSeqLocVector&>(subj_seqs), program, &m_SeqBlkVector, &m_MaxLength);
146 
147  _ASSERT(!m_SeqBlkVector.empty());
148 }
149 
150 /// Destructor
152 {
153  NON_CONST_ITERATE(vector<BLAST_SequenceBlk*>, itr, m_SeqBlkVector) {
154  *itr = BlastSequenceBlkFree(*itr);
155  }
156  m_SeqBlkVector.clear();
158 }
159 
160 
161 /// Returns maximal length of a set of sequences
163 {
164  return m_MaxLength;
165 }
166 
167 /// Returns minimal length of a set of sequences
169 {
170  return m_MinLength;
171 }
172 
173 /// Returns average length
175 {
176  return m_AvgLength;
177 }
178 
179 /// Sets average length
181 {
182  m_AvgLength = length;
183 }
184 
185 /// Answers whether sequences in this object are protein or nucleotide
187 {
188  return m_IsProt;
189 }
190 
191 /// Returns number of sequences
193 {
194  return m_NumSeqs;
195 }
196 
197 /// Returns sequence block structure for one of the sequences
198 /// @param index Which sequence to retrieve sequence block for? [in]
199 /// @return The sequence block.
201 {
202  // N.B.: we're not using the at() method for compatibility with GCC 2.95
203  if (index >= GetNumSeqs()) {
204  throw std::out_of_range("");
205  }
206  return m_SeqBlkVector[index];
207 }
208 
209 /// The following functions interact with the C API, and have to be
210 /// declared extern "C".
211 
212 extern "C" {
213 
214 /// Retrieves the length of the longest sequence in the BlastSeqSrc.
215 /// @param multiseq_handle Pointer to the structure containing sequences [in]
216 static Int4
217 s_QueryFactoryGetMaxLength(void* multiseq_handle, void*)
218 {
219  CRef<CQueryFactoryInfo>* seq_info =
220  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
221  _ASSERT(seq_info);
222  return (*seq_info)->GetMaxLength();
223 }
224 
225 /// Retrieves the length of the longest sequence in the BlastSeqSrc.
226 /// @param multiseq_handle Pointer to the structure containing sequences [in]
227 static Int4
228 s_QueryFactoryGetMinLength(void* multiseq_handle, void*)
229 {
230  CRef<CQueryFactoryInfo>* seq_info =
231  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
232  _ASSERT(seq_info);
233  return (*seq_info)->GetMinLength();
234 }
235 
236 /// Retrieves the average length of the sequence in the BlastSeqSrc.
237 /// @param multiseq_handle Pointer to the structure containing sequences [in]
238 static Int4
239 s_QueryFactoryGetAvgLength(void* multiseq_handle, void*)
240 {
241  CRef<CQueryFactoryInfo>* seq_info =
242  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
243  _ASSERT(seq_info);
244 
245  if ((*seq_info)->GetAvgLength() == 0) {
246  const Uint4 num_seqs((*seq_info)->GetNumSeqs());
247  _ASSERT(num_seqs > 0);
248 
249  Int8 total_length(0);
250  for (Uint4 index = 0; index < num_seqs; ++index)
251  total_length += (Int8) (*seq_info)->GetSeqBlk(index)->length;
252  (*seq_info)->SetAvgLength((Uint4) (total_length / num_seqs));
253  }
254  return (*seq_info)->GetAvgLength();
255 }
256 
257 /// Retrieves the number of sequences in the BlastSeqSrc.
258 /// @param multiseq_handle Pointer to the structure containing sequences [in]
259 static Int4
260 s_QueryFactoryGetNumSeqs(void* multiseq_handle, void*)
261 {
262  CRef<CQueryFactoryInfo>* seq_info =
263  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
264  _ASSERT(seq_info);
265  return (*seq_info)->GetNumSeqs();
266 }
267 
268 
269 /// Returns zero as this implementation does not use an alias file.
270 static Int4
271 s_QueryFactoryGetNumSeqsStats(void* /*multiseq_handle*/, void*)
272 {
273  return 0;
274 }
275 
276 /// Returns 0 as total length, indicating that this is NOT a database!
277 static Int8
278 s_QueryFactoryGetTotLen(void* /*multiseq_handle*/, void*)
279 {
280  return 0;
281 }
282 
283 /// Returns 0 as total statistic length, as this implementation does not use alias files.
284 static Int8
285 s_QueryFactoryGetTotLenStats(void* /*multiseq_handle*/, void*)
286 {
287  return 0;
288 }
289 
290 /// Always returns NcbiEmptyCStr
291 static const char*
292 s_QueryFactoryGetName(void* /*multiseq_handle*/, void*)
293 {
294  return NcbiEmptyCStr;
295 }
296 
297 /// Answers whether this object is for protein or nucleotide sequences.
298 /// @param multiseq_handle Pointer to the structure containing sequences [in]
299 static Boolean
300 s_QueryFactoryGetIsProt(void* multiseq_handle, void*)
301 {
302  CRef<CQueryFactoryInfo>* seq_info =
303  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
304  _ASSERT(seq_info);
305  return (Boolean) (*seq_info)->GetIsProtein();
306 }
307 
308 /// Retrieves the sequence for a given index, in a given encoding.
309 /// @param multiseq_handle Pointer to the structure containing sequences [in]
310 /// @param args Pointer to BlastSeqSrcGetSeqArg structure, containing sequence index and
311 /// encoding. [in]
312 /// @return return codes defined in blast_seqsrc.h
313 static Int2
314 s_QueryFactoryGetSequence(void* multiseq_handle, BlastSeqSrcGetSeqArg* args)
315 {
316  CRef<CQueryFactoryInfo>* seq_info =
317  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
318 
319  _ASSERT(seq_info);
320  _ASSERT(args);
321 
322  if ((*seq_info)->GetNumSeqs() == 0 || !args)
323  return BLAST_SEQSRC_ERROR;
324 
325  BLAST_SequenceBlk* seq_blk(0);
326  try { seq_blk = (*seq_info)->GetSeqBlk(args->oid); }
327  catch (const std::out_of_range&) {
328  return BLAST_SEQSRC_EOF;
329  }
330  _ASSERT(seq_blk);
331 
332  BlastSequenceBlkCopy(&args->seq, seq_blk);
333  /* If this is a nucleotide sequence, and it is the traceback stage,
334  we need the uncompressed buffer, stored in the 'sequence_start'
335  pointer. That buffer has an extra sentinel byte for blastn, but
336  no sentinel byte for translated programs. */
337  if (args->encoding == eBlastEncodingNucleotide) {
338  args->seq->sequence = args->seq->sequence_start + 1;
339  } else if (args->encoding == eBlastEncodingNcbi4na) {
340  args->seq->sequence = args->seq->sequence_start;
341  }
342 
343  // these are not applicable to encode subject masks, instead seq_ranges
344  // should be utilized
345  _ASSERT(args->seq->lcase_mask == NULL);
347 
348  args->seq->oid = args->oid;
349  return BLAST_SEQSRC_SUCCESS;
350 }
351 
352 /// Deallocates the uncompressed sequence buffer if necessary.
353 /// @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
354 static void
355 s_QueryFactoryReleaseSequence(void* /*multiseq_handle*/, BlastSeqSrcGetSeqArg* args)
356 {
357  _ASSERT(args);
358  if (args->seq->sequence_start_allocated)
359  sfree(args->seq->sequence_start);
360 }
361 
362 /// Retrieve length of a given sequence.
363 /// @param multiseq_handle Pointer to the structure containing sequences [in]
364 /// @param args Pointer to integer indicating index into the sequences
365 /// vector [in]
366 /// @return Length of the sequence or BLAST_SEQSRC_ERROR.
367 static Int4
368 s_QueryFactoryGetSeqLen(void* multiseq_handle, void* args)
369 {
370  CRef<CQueryFactoryInfo>* seq_info =
371  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
372  Int4 index;
373 
374  _ASSERT(seq_info);
375  _ASSERT(args);
376 
377  index = *((Int4*) args);
378  return (*seq_info)->GetSeqBlk(index)->length;
379 }
380 
381 /// Mirrors the database iteration interface. Next chunk of indices retrieval
382 /// is really just a check that current index has not reached the end.
383 /// @todo Does this need to be so complicated? Why not simply have all logic in
384 /// s_QueryFactoryIteratorNext? - Answer: as explained in the comments, the
385 /// GetNextChunk functionality is provided as a convenience to provide
386 /// MT-safe iteration over a BlastSeqSrc implementation.
387 /// @param multiseq_handle Pointer to the multiple sequence object [in]
388 /// @param itr Iterator over multiseq_handle [in] [out]
389 /// @return Status.
390 static Int2
392 {
393  CRef<CQueryFactoryInfo>* seq_info =
394  static_cast<CRef<CQueryFactoryInfo>*>(multiseq_handle);
395 
396  _ASSERT(itr);
397 
398  if (itr->current_pos == UINT4_MAX) {
399  itr->current_pos = 0;
400  }
401 
402  if (itr->current_pos >= (*seq_info)->GetNumSeqs())
403  return BLAST_SEQSRC_EOF;
404 
405  return BLAST_SEQSRC_SUCCESS;
406 }
407 
408 /// Resets the internal bookmark iterator (N/A in this case)
409 static void
410 s_QueryFactoryResetChunkIter(void* /*multiseq_handle*/)
411 {
412  return;
413 }
414 
415 /// Gets the next sequence index, given a BlastSeqSrc pointer.
416 /// @param multiseq_handle Handle to access the underlying object over which
417 /// iteration occurs. [in]
418 /// @param itr Iterator over seqsrc [in] [out]
419 /// @return Next index in the sequence set
420 static Int4
422 {
423  Int4 retval = BLAST_SEQSRC_EOF;
424  Int2 status = 0;
425 
426  _ASSERT(multiseq_handle);
427  _ASSERT(itr);
428 
429  if ((status = s_QueryFactoryGetNextChunk(multiseq_handle, itr))
430  == BLAST_SEQSRC_EOF) {
431  return status;
432  }
433  retval = itr->current_pos++;
434 
435  return retval;
436 }
437 
438 /// Encapsulates the arguments needed to initialize multi-sequence source.
440  CRef<IQueryFactory> query_factory; ///< The query factory
441  TSeqLocVector subj_seqs; ///< The subject sequences
442  EBlastProgramType program; ///< BLAST program
443 
444  /// Constructor
446  const TSeqLocVector& subj_seqs,
449 };
450 
451 /// Multi sequence source destructor: frees its internal data structure
452 /// @param seq_src BlastSeqSrc structure to free [in]
453 /// @return NULL
454 static BlastSeqSrc*
456 {
457  if (!seq_src)
458  return NULL;
459  CRef<CQueryFactoryInfo>* seq_info = static_cast<CRef<CQueryFactoryInfo>*>
461  delete seq_info;
462  return NULL;
463 }
464 
465 /// Multi-sequence sequence source copier: creates a new reference to the
466 /// CQueryFactoryInfo object and copies the rest of the BlastSeqSrc structure.
467 /// @param seq_src BlastSeqSrc structure to copy [in]
468 /// @return Pointer to the new BlastSeqSrc.
469 static BlastSeqSrc*
471 {
472  if (!seq_src)
473  return NULL;
474  CRef<CQueryFactoryInfo>* seq_info = static_cast<CRef<CQueryFactoryInfo>*>
476  CRef<CQueryFactoryInfo>* seq_info2 = new CRef<CQueryFactoryInfo>(*seq_info);
477 
478  _BlastSeqSrcImpl_SetDataStructure(seq_src, (void*) seq_info2);
479 
480  return seq_src;
481 }
482 
483 /// Multi-sequence source constructor
484 /// @param retval BlastSeqSrc structure (already allocated) to populate [in]
485 /// @param args Pointer to QueryFactorySrcNewArgs structure above [in]
486 /// @return Updated bssp structure (with all function pointers initialized
487 static BlastSeqSrc*
488 s_QueryFactorySrcNew(BlastSeqSrc* retval, void* args)
489 {
490  _ASSERT(retval);
491  _ASSERT(args);
492 
493  SQueryFactorySrcNewArgs* seqsrc_args = (SQueryFactorySrcNewArgs*) args;
494 
496  try {
497  if (seqsrc_args->query_factory) {
498  seq_info->Reset(new CQueryFactoryInfo(seqsrc_args->query_factory,
499  seqsrc_args->program));
500  } else {
501  seq_info->Reset(new CQueryFactoryInfo(seqsrc_args->subj_seqs,
502  seqsrc_args->program));
503  }
504  } catch (const ncbi::CException& e) {
505  _BlastSeqSrcImpl_SetInitErrorStr(retval, strdup(e.ReportAll().c_str()));
506  } catch (const std::exception& e) {
507  _BlastSeqSrcImpl_SetInitErrorStr(retval, strdup(e.what()));
508  } catch (...) {
510  strdup("Caught unknown exception from CQueryFactoryInfo constructor"));
511  }
512 
513  /* Initialize the BlastSeqSrc structure fields with user-defined function
514  * pointers and seq_info */
517  _BlastSeqSrcImpl_SetDataStructure(retval, (void*) seq_info);
533 
534  return retval;
535 }
536 
537 } // extern "C"
538 
539 static BlastSeqSrc*
541  const TSeqLocVector& subj_seqs,
542  EBlastProgramType program)
543 {
544  BlastSeqSrc* seq_src = NULL;
545  BlastSeqSrcNewInfo bssn_info;
546 
547  if (query_factory.Empty() && subj_seqs.empty()) {
548  NCBI_THROW(CBlastException, eInvalidArgument,
549  "Must provide either a query factory or subject sequences");
550  }
551 
552  SQueryFactorySrcNewArgs args(query_factory, subj_seqs, program);
553 
554  bssn_info.constructor = &s_QueryFactorySrcNew;
555  bssn_info.ctor_argument = (void*) &args;
556 
557  seq_src = BlastSeqSrcNew(&bssn_info);
558  return seq_src;
559 }
560 
563  EBlastProgramType program)
564 {
566  return s_QueryFactoryBlastSeqSrcInit(query_factory, empty, program);
567 }
568 
571  EBlastProgramType program)
572 {
574  return s_QueryFactoryBlastSeqSrcInit(empty, subj_seqs, program);
575 }
576 
577 END_SCOPE(blast)
579 
580 
581 /* @} */
Internal auxiliary setup classes/functions for extracting sequence data from Bioseqs.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declares the BLAST exception class.
Definitions which are dependant on the NCBI C++ Object Manager.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Definition: blast_program.c:50
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Definition: blast_seqsrc.h:291
BlastSeqSrc * BlastSeqSrcNew(const BlastSeqSrcNewInfo *bssn_info)
Allocates memory for a BlastSeqSrc structure and then invokes the constructor function defined in its...
Definition: blast_seqsrc.c:90
#define BLAST_SEQSRC_SUCCESS
Successful sequence retrieval.
Definition: blast_seqsrc.h:293
#define BLAST_SEQSRC_EOF
No more sequences available.
Definition: blast_seqsrc.h:292
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetReleaseSequence(BlastSeqSrc *var, ReleaseSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:574
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetResetChunkIterator(BlastSeqSrc *var, ResetChunkIteratorFnPtr arg)
Definition: blast_seqsrc.c:581
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLenStats(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:564
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSequence(BlastSeqSrc *var, GetSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:572
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqsStats(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:559
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLen(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:563
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:573
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetIsProt(BlastSeqSrc *var, GetBoolFnPtr arg)
Definition: blast_seqsrc.c:567
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDataStructure(BlastSeqSrc *var, void *arg)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetCopyFnPtr(BlastSeqSrc *var, BlastSeqSrcCopier arg)
Definition: blast_seqsrc.c:553
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetAvgSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:562
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetIterNext(BlastSeqSrc *var, AdvanceIteratorFnPtr arg)
Definition: blast_seqsrc.c:576
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMinSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:561
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetInitErrorStr(BlastSeqSrc *var, char *arg)
Definition: blast_seqsrc.c:556
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDeleteFnPtr(BlastSeqSrc *var, BlastSeqSrcDestructor arg)
Definition: blast_seqsrc.c:552
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetName(BlastSeqSrc *var, GetStrFnPtr arg)
Definition: blast_seqsrc.c:566
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMaxSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:560
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqs(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:558
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
Definition: blast_util.c:259
Defines BLAST error codes (user errors included)
Implements the IBlastQuerySource interface using a CBioseq_set as data source.
CObject –.
Definition: ncbiobj.hpp:180
Contains information about all sequences in a set.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
TSeqLocVector subj_seqs
The subject sequences.
Uint4 GetAvgLength()
Returns average length.
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
unsigned int m_MaxLength
Length of the longest sequence in this set.
BLAST_SequenceBlk * GetSeqBlk(Uint4 index)
Returns sequence block structure for one of the sequences.
EBlastProgramType program
BLAST program.
SQueryFactorySrcNewArgs(CRef< IQueryFactory > qf, const TSeqLocVector &subj_seqs, EBlastProgramType p)
Constructor.
CQueryFactoryInfo(CRef< IQueryFactory > qf, EBlastProgramType program)
Constructor from a vector of sequence location/scope pairs and a BLAST program type.
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
BlastSeqSrc * QueryFactoryBlastSeqSrcInit(CRef< IQueryFactory > query_factory, EBlastProgramType program)
Initialize the sequence source structure from a query factory.
static Int4 s_QueryFactoryGetSeqLen(void *multiseq_handle, void *args)
Retrieve length of a given sequence.
unsigned int m_AvgLength
Average length of sequences in this set.
bool m_IsProt
Are these sequences protein or nucleotide?
Uint4 m_NumSeqs
Number of sequences.
static void s_QueryFactoryReleaseSequence(void *, BlastSeqSrcGetSeqArg *args)
Deallocates the uncompressed sequence buffer if necessary.
static Int4 s_QueryFactoryGetNumSeqs(void *multiseq_handle, void *)
Retrieves the number of sequences in the BlastSeqSrc.
static Boolean s_QueryFactoryGetIsProt(void *multiseq_handle, void *)
Answers whether this object is for protein or nucleotide sequences.
static BlastSeqSrc * s_QueryFactoryBlastSeqSrcInit(CRef< IQueryFactory > query_factory, const TSeqLocVector &subj_seqs, EBlastProgramType program)
static const char * s_QueryFactoryGetName(void *, void *)
Always returns NcbiEmptyCStr.
static Int8 s_QueryFactoryGetTotLen(void *, void *)
Returns 0 as total length, indicating that this is NOT a database!
CRef< IBlastQuerySource > m_QuerySource
local query data obtained from the query factory
unsigned int m_MinLength
Length of the longest sequence in this set.
static Int8 s_QueryFactoryGetTotLenStats(void *, void *)
Returns 0 as total statistic length, as this implementation does not use alias files.
static BlastSeqSrc * s_QueryFactorySrcNew(BlastSeqSrc *retval, void *args)
Multi-sequence source constructor.
static void s_QueryFactoryResetChunkIter(void *)
Resets the internal bookmark iterator (N/A in this case)
static Int2 s_QueryFactoryGetSequence(void *multiseq_handle, BlastSeqSrcGetSeqArg *args)
Retrieves the sequence for a given index, in a given encoding.
static Int4 s_QueryFactoryGetMinLength(void *multiseq_handle, void *)
Retrieves the length of the longest sequence in the BlastSeqSrc.
virtual CRef< objects::CBioseq_set > GetBioseqSet()=0
Accessor for the CBioseq_set.
static BlastSeqSrc * s_QueryFactorySrcFree(BlastSeqSrc *seq_src)
Multi sequence source destructor: frees its internal data structure.
static BlastSeqSrc * s_QueryFactorySrcCopy(BlastSeqSrc *seq_src)
Multi-sequence sequence source copier: creates a new reference to the CQueryFactoryInfo object and co...
CRef< IRemoteQueryData > MakeRemoteQueryData()
Creates and caches an IRemoteQueryData.
Definition: query_data.cpp:61
Uint4 GetMaxLength()
Setter and getter functions for the private fields.
static Int2 s_QueryFactoryGetNextChunk(void *multiseq_handle, BlastSeqSrcIterator *itr)
Mirrors the database iteration interface.
static Int4 s_QueryFactoryGetNumSeqsStats(void *, void *)
Returns zero as this implementation does not use an alias file.
vector< BLAST_SequenceBlk * > m_SeqBlkVector
Vector of sequence blocks.
Uint4 GetNumSeqs()
Returns number of sequences.
virtual TSeqPos Size() const =0
Return the number of elements in the sequence container.
static Int4 s_QueryFactoryIteratorNext(void *multiseq_handle, BlastSeqSrcIterator *itr)
Gets the next sequence index, given a BlastSeqSrc pointer.
void SetAvgLength(Uint4 val)
Sets average length.
CRef< IQueryFactory > query_factory
The query factory.
Uint4 GetMinLength()
Returns minimal length of a set of sequences.
static Int4 s_QueryFactoryGetMaxLength(void *multiseq_handle, void *)
The following functions interact with the C API, and have to be declared extern "C".
bool GetIsProtein()
Answers whether sequences in this object are protein or nucleotide.
static Int4 s_QueryFactoryGetAvgLength(void *multiseq_handle, void *)
Retrieves the average length of the sequence in the BlastSeqSrc.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NcbiEmptyCStr
Definition: ncbistr.hpp:59
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
#define strdup
Definition: ncbi_ansi_ext.h:70
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define UINT4_MAX
largest number represented by unsigned int.
Definition: ncbi_std.h:136
Implementation of the BlastSeqSrc interface for a query factory.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 oid
The ordinal id of the current sequence.
Definition: blast_def.h:250
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
Definition: blast_def.h:265
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Definition: blast_def.h:268
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Definition: blast_def.h:253
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:259
EBlastEncoding encoding
Encoding of sequence, i.e.
Definition: blast_seqsrc.h:263
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Definition: blast_seqsrc.h:284
Complete type definition of Blast Sequence Source Iterator.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Encapsulates the arguments needed to initialize multi-sequence source.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
#define _ASSERT
Modified on Mon Apr 22 04:05:59 2024 by modify_doxy.py rev. 669887