NCBI C++ ToolKit
seqsrc_multiseq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqsrc_multiseq.cpp 92028 2020-12-17 15:27:57Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 */
29 
30 /// @file seqsrc_multiseq.cpp
31 /// Implementation of the BlastSeqSrc interface for a vector of sequence
32 /// locations.
33 
34 #include <ncbi_pch.hpp>
38 #include "blast_objmgr_priv.hpp"
39 
40 #include <memory>
41 
42 /** @addtogroup AlgoBlast
43  *
44  * @{
45  */
46 
49 BEGIN_SCOPE(blast)
50 
51 /// Contains information about all sequences in a set.
52 class CMultiSeqInfo : public CObject
53 {
54 public:
55  /// Constructor from a vector of sequence location/scope pairs and a
56  /// BLAST program type.
57  CMultiSeqInfo(TSeqLocVector& seq_vector, EBlastProgramType program, bool dbscan_mode);
58  ~CMultiSeqInfo();
59  /// Setter and getter functions for the private fields
60  Uint4 GetMaxLength();
61  void SetMaxLength(Uint4 val);
62  Uint4 GetAvgLength();
63  void SetAvgLength(Uint4 val);
64  Int8 GetTotLength();
65  bool GetIsProtein();
66  Uint4 GetNumSeqs();
67  BLAST_SequenceBlk* GetSeqBlk(int index);
68 private:
69  /// Internal fields
70  bool m_ibIsProt; ///< Are these sequences protein or nucleotide?
71  vector<BLAST_SequenceBlk*> m_ivSeqBlkVec; ///< Vector of sequence blocks
72  unsigned int m_iMaxLength; ///< Length of the longest sequence in this set
73  unsigned int m_iAvgLength; ///< Average length of sequences in this set
74  Int8 m_iTotalLength; ///< Total length of sequences in this set
75  bool m_DbScanMode; ///< Database scanning mode (not pairwise)
76 };
77 
78 /// Returns maximal length of a set of sequences
80 {
81  return m_iMaxLength;
82 }
83 
84 /// Sets maximal length
86 {
87  m_iMaxLength = length;
88 }
89 
90 /// Returns average length
92 {
93  return m_iAvgLength;
94 }
95 
96 /// Sets average length
98 {
99  m_iAvgLength = length;
100 }
101 
102 /// Returns total length
104 {
105  return m_iTotalLength;
106 }
107 
108 /// Answers whether sequences in this object are protein or nucleotide
110 {
111  return m_ibIsProt;
112 }
113 
114 /// Returns number of sequences
116 {
117  return (Uint4) m_ivSeqBlkVec.size();
118 }
119 
120 /// Returns sequence block structure for one of the sequences
121 /// @param index Which sequence to retrieve sequence block for? [in]
122 /// @return The sequence block.
124 {
125  _ASSERT(!m_ivSeqBlkVec.empty());
126  _ASSERT((int)m_ivSeqBlkVec.size() > index);
127  return m_ivSeqBlkVec[index];
128 }
129 
130 /// Constructor
132  EBlastProgramType program,
133  bool dbscan_mode)
134 {
135  m_ibIsProt = Blast_SubjectIsProtein(program) ? true : false;
136  m_DbScanMode = dbscan_mode;
137  m_iTotalLength=0;
138 
139  // Fix subject location for tblast[nx].
140  if (Blast_SubjectIsTranslated(program))
141  {
142  TSeqLocVector temp_slv;
143  vector<Int2> strand_v;
144  ITERATE(TSeqLocVector, iter, seq_vector)
145  {
146  strand_v.push_back((Int2) (*iter).seqloc->GetStrand());
147  CRef<CSeq_loc> sl(new CSeq_loc);
148  sl->Assign(*((*iter).seqloc));
150  if ((*iter).mask)
151  {
152  CRef<CSeq_loc> mask_sl(new CSeq_loc);
153  mask_sl->Assign(*((*iter).mask));
154  SSeqLoc sseq_loc(*sl, *((*iter).scope), *mask_sl);
155  temp_slv.push_back(sseq_loc);
156  }
157  else
158  {
159  SSeqLoc sseq_loc(*sl, *((*iter).scope));
160  temp_slv.push_back(sseq_loc);
161  }
162  }
163 
164  SetupSubjects(temp_slv, program, &m_ivSeqBlkVec, &m_iMaxLength);
165 
166  int index=0;
167  ITERATE(vector<Int2>, s_iter, strand_v)
168  {
169  m_ivSeqBlkVec[index++]->subject_strand = *s_iter;
170  }
171  }
172  else
173  SetupSubjects(seq_vector, program, &m_ivSeqBlkVec, &m_iMaxLength);
174 
175  if(dbscan_mode)
176  {
177  ITERATE(vector<BLAST_SequenceBlk*>, iter, m_ivSeqBlkVec)
178  {
179  m_iTotalLength += (Int8) (*iter)->length;
180  }
181  }
182  // Do not set right away
183  m_iAvgLength = 0;
184 }
185 
186 /// Destructor
188 {
189  NON_CONST_ITERATE(vector<BLAST_SequenceBlk*>, itr, m_ivSeqBlkVec) {
190  *itr = BlastSequenceBlkFree(*itr);
191  }
192  m_ivSeqBlkVec.clear();
193 }
194 
195 /// The following functions interact with the C API, and have to be
196 /// declared extern "C".
197 
198 extern "C" {
199 
200 /// Retrieves the length of the longest sequence in the BlastSeqSrc.
201 /// @param multiseq_handle Pointer to the structure containing sequences [in]
202 static Int4
203 s_MultiSeqGetMaxLength(void* multiseq_handle, void*)
204 {
205  Int4 retval = 0;
206  Uint4 index;
207  CRef<CMultiSeqInfo>* seq_info =
208  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
209 
210  _ASSERT(seq_info);
211  _ASSERT(seq_info->NotEmpty());
212 
213  if ((retval = (*seq_info)->GetMaxLength()) > 0)
214  return retval;
215 
216  for (index=0; index<(*seq_info)->GetNumSeqs(); ++index)
217  retval = MAX(retval, (*seq_info)->GetSeqBlk(index)->length);
218  (*seq_info)->SetMaxLength(retval);
219 
220  return retval;
221 }
222 
223 /// Retrieves the length of the longest sequence in the BlastSeqSrc.
224 /// @param multiseq_handle Pointer to the structure containing sequences [in]
225 static Int4
226 s_MultiSeqGetMinLength(void* multiseq_handle, void*)
227 {
228  Int4 retval = INT4_MAX;
229  Uint4 index;
230  CRef<CMultiSeqInfo>* seq_info =
231  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
232 
233  for (index=0; index<(*seq_info)->GetNumSeqs(); ++index)
234  retval = MIN(retval, (*seq_info)->GetSeqBlk(index)->length);
235 
236  if(retval < BLAST_SEQSRC_MINLENGTH)
237  retval = BLAST_SEQSRC_MINLENGTH;
238 
239  return retval;
240 }
241 
242 /// Retrieves the length of the longest sequence in the BlastSeqSrc.
243 /// @param multiseq_handle Pointer to the structure containing sequences [in]
244 static Int4
245 s_MultiSeqGetAvgLength(void* multiseq_handle, void*)
246 {
247  Int8 total_length = 0;
248  Uint4 num_seqs = 0;
249  Uint4 avg_length;
250  Uint4 index;
251  CRef<CMultiSeqInfo>* seq_info =
252  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
253 
254  _ASSERT(seq_info);
255  _ASSERT(seq_info->NotEmpty());
256 
257  if ((avg_length = (*seq_info)->GetAvgLength()) > 0)
258  return avg_length;
259 
260  if ((num_seqs = (*seq_info)->GetNumSeqs()) == 0)
261  return 0;
262  for (index = 0; index < num_seqs; ++index)
263  total_length += (Int8) (*seq_info)->GetSeqBlk(index)->length;
264  avg_length = (Uint4) (total_length / num_seqs);
265  (*seq_info)->SetAvgLength(avg_length);
266 
267  return avg_length;
268 }
269 
270 /// Retrieves the number of sequences in the BlastSeqSrc.
271 /// @param multiseq_handle Pointer to the structure containing sequences [in]
272 static Int4
273 s_MultiSeqGetNumSeqs(void* multiseq_handle, void*)
274 {
275  CRef<CMultiSeqInfo>* seq_info =
276  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
277 
278  _ASSERT(seq_info);
279  _ASSERT(seq_info->NotEmpty());
280  return (*seq_info)->GetNumSeqs();
281 }
282 
283 /// Returns zero as this implementation does not support alias files.
284 static Int4
285 s_MultiSeqGetNumSeqsStats(void* /*multiseq_handle*/, void*)
286 {
287  return 0;
288 }
289 
290 /// Returns total length in db scan mode.
291 /// Returns 0 as total length, indicating that this is not a database search.
292 static Int8
293 s_MultiSeqGetTotLen(void* multiseq_handle, void*)
294 {
295  CRef<CMultiSeqInfo>* seq_info =
296  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
297  return (*seq_info)->GetTotLength();
298 }
299 
300 /// Returns 0 as this implementation does not use alias files.
301 static Int8
302 s_MultiSeqGetTotLenStats(void* /*multiseq_handle*/, void*)
303 {
304  return 0;
305 }
306 
307 /// Always returns NcbiEmptyCStr
308 static const char*
309 s_MultiSeqGetName(void* /*multiseq_handle*/, void*)
310 {
311  return NcbiEmptyCStr;
312 }
313 
314 /// Answers whether this object is for protein or nucleotide sequences.
315 /// @param multiseq_handle Pointer to the structure containing sequences [in]
316 static Boolean
317 s_MultiSeqGetIsProt(void* multiseq_handle, void*)
318 {
319  CRef<CMultiSeqInfo>* seq_info =
320  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
321 
322  _ASSERT(seq_info);
323  _ASSERT(seq_info->NotEmpty());
324 
325  return (Boolean) (*seq_info)->GetIsProtein();
326 }
327 
328 /// Retrieves the sequence for a given index, in a given encoding.
329 /// @param multiseq_handle Pointer to the structure containing sequences [in]
330 /// @param args Pointer to BlastSeqSrcGetSeqArg structure, containing sequence index and
331 /// encoding. [in]
332 /// @return return codes defined in blast_seqsrc.h
333 static Int2
334 s_MultiSeqGetSequence(void* multiseq_handle, BlastSeqSrcGetSeqArg* args)
335 {
336  CRef<CMultiSeqInfo>* seq_info =
337  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
338  Int4 index;
339 
340  _ASSERT(seq_info);
341  _ASSERT(seq_info->NotEmpty());
342  _ASSERT(args);
343 
344  if ((*seq_info)->GetNumSeqs() == 0 || !args)
345  return BLAST_SEQSRC_ERROR;
346 
347  index = args->oid;
348 
349  if (index >= (Int4) (*seq_info)->GetNumSeqs())
350  return BLAST_SEQSRC_EOF;
351 
352  BlastSequenceBlkCopy(&args->seq, (*seq_info)->GetSeqBlk(index));
353  /* If this is a nucleotide sequence, and it is the traceback stage,
354  we need the uncompressed buffer, stored in the 'sequence_start'
355  pointer. That buffer has an extra sentinel byte for blastn, but
356  no sentinel byte for translated programs. */
357  if (args->encoding == eBlastEncodingNucleotide) {
358  args->seq->sequence = args->seq->sequence_start + 1;
359  } else if (args->encoding == eBlastEncodingNcbi4na) {
360  args->seq->sequence = args->seq->sequence_start;
361  }
362 
363  // these are not applicable to encode subject masks, instead seq_ranges
364  // should be utilized
365  _ASSERT(args->seq->lcase_mask == NULL);
367 
368  args->seq->oid = index;
369  return BLAST_SEQSRC_SUCCESS;
370 }
371 
372 /// Deallocates the uncompressed sequence buffer if necessary.
373 /// @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
374 static void
375 s_MultiSeqReleaseSequence(void* /*multiseq_handle*/, BlastSeqSrcGetSeqArg* args)
376 {
377  _ASSERT(args);
378  if (args->seq->sequence_start_allocated)
379  sfree(args->seq->sequence_start);
380 }
381 
382 /// Retrieve length of a given sequence.
383 /// @param multiseq_handle Pointer to the structure containing sequences [in]
384 /// @param args Pointer to integer indicating index into the sequences
385 /// vector [in]
386 /// @return Length of the sequence or BLAST_SEQSRC_ERROR.
387 static Int4
388 s_MultiSeqGetSeqLen(void* multiseq_handle, void* args)
389 {
390  CRef<CMultiSeqInfo>* seq_info =
391  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
392  Int4 index;
393 
394  _ASSERT(seq_info);
395  _ASSERT(seq_info->NotEmpty());
396  _ASSERT(args);
397 
398  index = *((Int4*) args);
399  return (*seq_info)->GetSeqBlk(index)->length;
400 }
401 
402 /// Mirrors the database iteration interface. Next chunk of indices retrieval
403 /// is really just a check that current index has not reached the end.
404 /// @todo Does this need to be so complicated? Why not simply have all logic in
405 /// s_MultiSeqIteratorNext? - Answer: as explained in the comments, the
406 /// GetNextChunk functionality is provided as a convenience to provide
407 /// MT-safe iteration over a BlastSeqSrc implementation.
408 /// @param multiseq_handle Pointer to the multiple sequence object [in]
409 /// @param itr Iterator over multiseq_handle [in] [out]
410 /// @return Status.
411 static Int2
412 s_MultiSeqGetNextChunk(void* multiseq_handle, BlastSeqSrcIterator* itr)
413 {
414  CRef<CMultiSeqInfo>* seq_info =
415  static_cast<CRef<CMultiSeqInfo>*>(multiseq_handle);
416 
417  _ASSERT(seq_info);
418  _ASSERT(seq_info->NotEmpty());
419  _ASSERT(itr);
420 
421  if (itr->current_pos == UINT4_MAX) {
422  itr->current_pos = 0;
423  }
424 
425  if (itr->current_pos >= (*seq_info)->GetNumSeqs())
426  return BLAST_SEQSRC_EOF;
427 
428  return BLAST_SEQSRC_SUCCESS;
429 }
430 
431 /// Resets the internal bookmark iterator (N/A in this case)
432 static void
433 s_MultiSeqResetChunkIter(void* /*multiseq_handle*/)
434 {
435  return;
436 }
437 
438 /// Gets the next sequence index, given a BlastSeqSrc pointer.
439 /// @param multiseq_handle Handle to access the underlying object over which
440 /// iteration occurs. [in]
441 /// @param itr Iterator over seqsrc [in] [out]
442 /// @return Next index in the sequence set
443 static Int4
444 s_MultiSeqIteratorNext(void* multiseq_handle, BlastSeqSrcIterator* itr)
445 {
446  Int4 retval = BLAST_SEQSRC_EOF;
447  Int2 status = 0;
448 
449  _ASSERT(multiseq_handle);
450  _ASSERT(itr);
451 
452  if ((status = s_MultiSeqGetNextChunk(multiseq_handle, itr))
453  == BLAST_SEQSRC_EOF) {
454  return status;
455  }
456  retval = itr->current_pos++;
457 
458  return retval;
459 }
460 
461 /// Encapsulates the arguments needed to initialize multi-sequence source.
463  TSeqLocVector seq_vector; ///< Vector of sequences
464  EBlastProgramType program; ///< BLAST program
465  bool dbscan_mode; ///< Database mode (not pairwise)
466  /// Constructor
468  : seq_vector(sv), program(p), dbscan_mode(db) {}
469 };
470 
471 /// Multi sequence source destructor: frees its internal data structure
472 /// @param seq_src BlastSeqSrc structure to free [in]
473 /// @return NULL
474 static BlastSeqSrc*
476 {
477  if (!seq_src)
478  return NULL;
479  CRef<CMultiSeqInfo>* seq_info = static_cast<CRef<CMultiSeqInfo>*>
481  delete seq_info;
482  return NULL;
483 }
484 
485 /// Multi-sequence sequence source copier: creates a new reference to the
486 /// CMultiSeqInfo object and copies the rest of the BlastSeqSrc structure.
487 /// @param seq_src BlastSeqSrc structure to copy [in]
488 /// @return Pointer to the new BlastSeqSrc.
489 static BlastSeqSrc*
491 {
492  if (!seq_src)
493  return NULL;
494  CRef<CMultiSeqInfo>* seq_info = static_cast<CRef<CMultiSeqInfo>*>
496  CRef<CMultiSeqInfo>* seq_info2 = new CRef<CMultiSeqInfo>(*seq_info);
497 
498  _BlastSeqSrcImpl_SetDataStructure(seq_src, (void*) seq_info2);
499 
500  return seq_src;
501 }
502 
503 /// Multi-sequence source constructor
504 /// @param retval BlastSeqSrc structure (already allocated) to populate [in]
505 /// @param args Pointer to MultiSeqSrcNewArgs structure above [in]
506 /// @return Updated bssp structure (with all function pointers initialized
507 static BlastSeqSrc*
508 s_MultiSeqSrcNew(BlastSeqSrc* retval, void* args)
509 {
510  _ASSERT(retval);
511  _ASSERT(args);
512 
513  SMultiSeqSrcNewArgs* seqsrc_args = (SMultiSeqSrcNewArgs*) args;
514 
515  CRef<CMultiSeqInfo>* seq_info = new CRef<CMultiSeqInfo>(0);
516  try {
517  seq_info->Reset(new CMultiSeqInfo(seqsrc_args->seq_vector,
518  seqsrc_args->program,
519  seqsrc_args->dbscan_mode));
520  } catch (const ncbi::CException& e) {
521  _BlastSeqSrcImpl_SetInitErrorStr(retval, strdup(e.ReportAll().c_str()));
522  } catch (const std::exception& e) {
523  _BlastSeqSrcImpl_SetInitErrorStr(retval, strdup(e.what()));
524  } catch (...) {
526  strdup("Caught unknown exception from CMultiSeqInfo constructor"));
527  }
528 
529  /* Initialize the BlastSeqSrc structure fields with user-defined function
530  * pointers and seq_info */
533  _BlastSeqSrcImpl_SetDataStructure(retval, (void*) seq_info);
548 
549  return retval;
550 }
551 
552 } // extern "C"
553 
556  EBlastProgramType program,
557  bool dbscan_mode)
558 {
559  BlastSeqSrc* seq_src = NULL;
560  BlastSeqSrcNewInfo bssn_info;
561 
562  unique_ptr<SMultiSeqSrcNewArgs> args
563  (new SMultiSeqSrcNewArgs(const_cast<TSeqLocVector&>(seq_vector),
564  program, dbscan_mode));
565 
566  bssn_info.constructor = &s_MultiSeqSrcNew;
567  bssn_info.ctor_argument = (void*) args.get();
568 
569  seq_src = BlastSeqSrcNew(&bssn_info);
570  return seq_src;
571 }
572 
573 END_SCOPE(blast)
575 
576 
577 /* @} */
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Definitions which are dependant on the NCBI C++ Object Manager.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Definition: blast_program.c:50
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Definition: blast_seqsrc.h:291
BlastSeqSrc * BlastSeqSrcNew(const BlastSeqSrcNewInfo *bssn_info)
Allocates memory for a BlastSeqSrc structure and then invokes the constructor function defined in its...
Definition: blast_seqsrc.c:90
#define BLAST_SEQSRC_SUCCESS
Successful sequence retrieval.
Definition: blast_seqsrc.h:293
#define BLAST_SEQSRC_MINLENGTH
Default minimal sequence length.
Definition: blast_seqsrc.h:205
#define BLAST_SEQSRC_EOF
No more sequences available.
Definition: blast_seqsrc.h:292
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetReleaseSequence(BlastSeqSrc *var, ReleaseSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:574
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetResetChunkIterator(BlastSeqSrc *var, ResetChunkIteratorFnPtr arg)
Definition: blast_seqsrc.c:581
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLenStats(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:564
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSequence(BlastSeqSrc *var, GetSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:572
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqsStats(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:559
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLen(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:563
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:573
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetIsProt(BlastSeqSrc *var, GetBoolFnPtr arg)
Definition: blast_seqsrc.c:567
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDataStructure(BlastSeqSrc *var, void *arg)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetCopyFnPtr(BlastSeqSrc *var, BlastSeqSrcCopier arg)
Definition: blast_seqsrc.c:553
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetAvgSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:562
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetIterNext(BlastSeqSrc *var, AdvanceIteratorFnPtr arg)
Definition: blast_seqsrc.c:576
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMinSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:561
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetInitErrorStr(BlastSeqSrc *var, char *arg)
Definition: blast_seqsrc.c:556
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDeleteFnPtr(BlastSeqSrc *var, BlastSeqSrcDestructor arg)
Definition: blast_seqsrc.c:552
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetName(BlastSeqSrc *var, GetStrFnPtr arg)
Definition: blast_seqsrc.c:566
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMaxSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:560
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqs(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:558
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
Definition: blast_util.c:259
#define true
Definition: bool.h:35
Contains information about all sequences in a set.
CObject –.
Definition: ncbiobj.hpp:180
Int8 GetTotLength()
Returns total length.
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
static BlastSeqSrc * s_MultiSeqSrcNew(BlastSeqSrc *retval, void *args)
Multi-sequence source constructor.
static void s_MultiSeqReleaseSequence(void *, BlastSeqSrcGetSeqArg *args)
Deallocates the uncompressed sequence buffer if necessary.
CMultiSeqInfo(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode)
Constructor from a vector of sequence location/scope pairs and a BLAST program type.
bool dbscan_mode
Database mode (not pairwise)
static Int4 s_MultiSeqGetMaxLength(void *multiseq_handle, void *)
The following functions interact with the C API, and have to be declared extern "C".
unsigned int m_iAvgLength
Average length of sequences in this set.
static BlastSeqSrc * s_MultiSeqSrcFree(BlastSeqSrc *seq_src)
Multi sequence source destructor: frees its internal data structure.
Uint4 GetMaxLength()
Setter and getter functions for the private fields.
static BlastSeqSrc * s_MultiSeqSrcCopy(BlastSeqSrc *seq_src)
Multi-sequence sequence source copier: creates a new reference to the CMultiSeqInfo object and copies...
static void s_MultiSeqResetChunkIter(void *)
Resets the internal bookmark iterator (N/A in this case)
static Int4 s_MultiSeqGetMinLength(void *multiseq_handle, void *)
Retrieves the length of the longest sequence in the BlastSeqSrc.
static Int8 s_MultiSeqGetTotLen(void *multiseq_handle, void *)
Returns total length in db scan mode.
bool m_DbScanMode
Database scanning mode (not pairwise)
bool m_ibIsProt
Internal fields.
static Int4 s_MultiSeqGetNumSeqs(void *multiseq_handle, void *)
Retrieves the number of sequences in the BlastSeqSrc.
static Int8 s_MultiSeqGetTotLenStats(void *, void *)
Returns 0 as this implementation does not use alias files.
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
~CMultiSeqInfo()
Destructor.
static Int2 s_MultiSeqGetNextChunk(void *multiseq_handle, BlastSeqSrcIterator *itr)
Mirrors the database iteration interface.
void SetAvgLength(Uint4 val)
Sets average length.
SMultiSeqSrcNewArgs(TSeqLocVector sv, EBlastProgramType p, bool db)
Constructor.
static Int4 s_MultiSeqGetSeqLen(void *multiseq_handle, void *args)
Retrieve length of a given sequence.
static Int4 s_MultiSeqGetAvgLength(void *multiseq_handle, void *)
Retrieves the length of the longest sequence in the BlastSeqSrc.
bool GetIsProtein()
Answers whether sequences in this object are protein or nucleotide.
BLAST_SequenceBlk * GetSeqBlk(int index)
Returns sequence block structure for one of the sequences.
static Int4 s_MultiSeqIteratorNext(void *multiseq_handle, BlastSeqSrcIterator *itr)
Gets the next sequence index, given a BlastSeqSrc pointer.
Uint4 GetAvgLength()
Returns average length.
unsigned int m_iMaxLength
Length of the longest sequence in this set.
EBlastProgramType program
BLAST program.
static Boolean s_MultiSeqGetIsProt(void *multiseq_handle, void *)
Answers whether this object is for protein or nucleotide sequences.
vector< BLAST_SequenceBlk * > m_ivSeqBlkVec
Vector of sequence blocks.
Uint4 GetNumSeqs()
Returns number of sequences.
TSeqLocVector seq_vector
Vector of sequences.
static Int2 s_MultiSeqGetSequence(void *multiseq_handle, BlastSeqSrcGetSeqArg *args)
Retrieves the sequence for a given index, in a given encoding.
Int8 m_iTotalLength
Total length of sequences in this set.
void SetMaxLength(Uint4 val)
Sets maximal length.
static const char * s_MultiSeqGetName(void *, void *)
Always returns NcbiEmptyCStr.
static Int4 s_MultiSeqGetNumSeqsStats(void *, void *)
Returns zero as this implementation does not support alias files.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NcbiEmptyCStr
Definition: ncbistr.hpp:59
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
#define strdup
Definition: ncbi_ansi_ext.h:70
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
#define INT4_MAX
largest nubmer represented by signed int
Definition: ncbi_std.h:141
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define UINT4_MAX
largest number represented by unsigned int.
Definition: ncbi_std.h:136
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 oid
The ordinal id of the current sequence.
Definition: blast_def.h:250
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
Definition: blast_def.h:265
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Definition: blast_def.h:268
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Definition: blast_def.h:253
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:259
EBlastEncoding encoding
Encoding of sequence, i.e.
Definition: blast_seqsrc.h:263
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Definition: blast_seqsrc.h:284
Complete type definition of Blast Sequence Source Iterator.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Encapsulates the arguments needed to initialize multi-sequence source.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
#define _ASSERT
@ FALSE
Definition: testodbc.c:27
Modified on Wed Feb 21 09:55:07 2024 by modify_doxy.py rev. 669887