NCBI C++ ToolKit
blast_aux.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_aux.cpp 90312 2020-06-04 16:06:44Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 */
29 
30 /// @file blast_aux.cpp
31 /// Implements C++ wrapper classes for structures in algo/blast/core as well as
32 /// some auxiliary functions to convert CSeq_loc to/from BlastMask structures.
33 
34 #include <ncbi_pch.hpp>
35 
44 
53 #include "blast_setup.hpp"
54 #include "blast_aux_priv.hpp"
55 
56 #include <objects/seq/Seq_ext.hpp>
59 #include <objmgr/seq_map_ci.hpp>
60 
61 #include <algorithm>
62 #include <sstream>
63 
64 /** @addtogroup AlgoBlast
65  *
66  * @{
67  */
68 
70 BEGIN_SCOPE(blast)
72 
73 #ifndef SKIP_DOXYGEN_PROCESSING
74 
75 void
77  const
78 {
79  ddc.SetFrame("CQuerySetUpOptions");
80  if (!m_Ptr)
81  return;
82 
84  {
85  ddc.Log("mask_at_hash", m_Ptr->filtering_options->mask_at_hash);
87  {
89  ddc.Log("dust_level", dustOptions->level);
90  ddc.Log("dust_window", dustOptions->window);
91  ddc.Log("dust_linker", dustOptions->linker);
92  }
94  {
96  ddc.Log("seg_window", segOptions->window);
97  ddc.Log("seg_locut", segOptions->locut);
98  ddc.Log("seg_hicut", segOptions->hicut);
99  }
101  {
102  ddc.Log("repeat_database", m_Ptr->filtering_options->repeatFilterOptions->database);
103  }
104  }
105  else if (m_Ptr->filter_string)
106  ddc.Log("filter_string", m_Ptr->filter_string);
107 
108  ddc.Log("strand_option", m_Ptr->strand_option);
109  ddc.Log("genetic_code", m_Ptr->genetic_code);
110 }
111 
112 void
113 CBLAST_SequenceBlk::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
114 {
115  ddc.SetFrame("CBLAST_SequenceBlk");
116  if (!m_Ptr)
117  return;
118 
119  ddc.Log("sequence", m_Ptr->sequence);
120  ddc.Log("sequence_start", m_Ptr->sequence_start);
121  ddc.Log("sequence_allocated", m_Ptr->sequence_allocated);
122  ddc.Log("sequence_start_allocated", m_Ptr->sequence_start_allocated);
123  ddc.Log("length", m_Ptr->length);
124 
125 }
126 
127 void
128 CBlastQueryInfo::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
129 {
130  ddc.SetFrame("CBlastQueryInfo");
131  if (!m_Ptr)
132  return;
133 
134  ddc.Log("first_context", m_Ptr->first_context);
135  ddc.Log("last_context", m_Ptr->last_context);
136  ddc.Log("num_queries", m_Ptr->num_queries);
137  ddc.Log("max_length", m_Ptr->max_length);
138 
139  for (Int4 i = m_Ptr->first_context; i <= m_Ptr->last_context; i++) {
140  const string prefix = string("context[") + NStr::IntToString(i) +
141  string("].");
142  ddc.Log(prefix+string("query_offset"), m_Ptr->contexts[i].query_offset);
143  ddc.Log(prefix+string("query_length"), m_Ptr->contexts[i].query_length);
144  ddc.Log(prefix+string("eff_searchsp"), m_Ptr->contexts[i].eff_searchsp);
145  ddc.Log(prefix+string("length_adjustment"),
147  ddc.Log(prefix+string("query_index"), m_Ptr->contexts[i].query_index);
148  ddc.Log(prefix+string("frame"), m_Ptr->contexts[i].frame);
149  ddc.Log(prefix+string("is_valid"), m_Ptr->contexts[i].is_valid);
150  }
151 }
152 
153 void
154 CLookupTableOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
155 {
156  ddc.SetFrame("CLookupTableOptions");
157  if (!m_Ptr)
158  return;
159 
160  ddc.Log("threshold", m_Ptr->threshold);
161  ddc.Log("lut_type", m_Ptr->lut_type);
162  ddc.Log("word_size", m_Ptr->word_size);
163  ddc.Log("mb_template_length", m_Ptr->mb_template_length);
164  ddc.Log("mb_template_type", m_Ptr->mb_template_type);
165 }
166 
167 void
168 CLookupTableWrap::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
169 {
170  ddc.SetFrame("CLookupTableWrap");
171  if (!m_Ptr)
172  return;
173 
174 }
175 void
176 CBlastInitialWordOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
177 {
178  ddc.SetFrame("BlastInitialWordOptions");
179  if (!m_Ptr)
180  return;
181 
182  ddc.Log("window_size", m_Ptr->window_size);
183  ddc.Log("x_dropoff", m_Ptr->x_dropoff);
184 }
185 void
187 {
188  ddc.SetFrame("CBlastInitialWordParameters");
189  if (!m_Ptr)
190  return;
191 
192 }
193 void
194 CBlast_ExtendWord::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
195 {
196  ddc.SetFrame("CBlast_ExtendWord");
197  if (!m_Ptr)
198  return;
199 
200 }
201 
202 void
203 CBlastExtensionOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
204 {
205  ddc.SetFrame("CBlastExtensionOptions");
206  if (!m_Ptr)
207  return;
208 
209  ddc.Log("gap_x_dropoff", m_Ptr->gap_x_dropoff);
210  ddc.Log("gap_x_dropoff_final", m_Ptr->gap_x_dropoff_final);
211  ddc.Log("ePrelimGapExt", m_Ptr->ePrelimGapExt);
212  ddc.Log("eTbackExt", m_Ptr->eTbackExt);
213 }
214 
215 void
216 CBlastExtensionParameters::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
217 {
218  ddc.SetFrame("CBlastExtensionParameters");
219  if (!m_Ptr)
220  return;
221 
222  ddc.Log("gap_x_dropoff", m_Ptr->gap_x_dropoff);
223  ddc.Log("gap_x_dropoff_final", m_Ptr->gap_x_dropoff_final);
224 }
225 
226 void
227 CBlastHitSavingOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
228 {
229  ddc.SetFrame("CBlastHitSavingOptions");
230  if (!m_Ptr)
231  return;
232 
233  ddc.Log("hitlist_size", m_Ptr->hitlist_size);
234  ddc.Log("hsp_num_max", m_Ptr->hsp_num_max);
235  ddc.Log("max_hsps_per_subject", m_Ptr->max_hsps_per_subject);
236  ddc.Log("total_hsp_limit", m_Ptr->total_hsp_limit);
237  ddc.Log("culling_limit", m_Ptr->culling_limit);
238  ddc.Log("expect_value", m_Ptr->expect_value);
239  ddc.Log("cutoff_score", m_Ptr->cutoff_score);
240  ddc.Log("percent_identity", m_Ptr->percent_identity);
241  ddc.Log("do_sum_stats", m_Ptr->do_sum_stats);
242  ddc.Log("longest_intron", m_Ptr->longest_intron);
243  ddc.Log("min_hit_length", m_Ptr->min_hit_length);
244  ddc.Log("min_diag_separation", m_Ptr->min_diag_separation);
245  if (m_Ptr->hsp_filt_opt) {
246  ddc.Log("hsp_filt_opt->best_hit_stage",
248  if (m_Ptr->hsp_filt_opt->best_hit) {
249  ddc.Log("hsp_filt_opt->best_hit->overhang",
251  ddc.Log("hsp_filt_opt->best_hit->score_edge",
253  }
254  ddc.Log("hsp_filt_opt->culling_stage",
257  ddc.Log("hsp_filt_opt->culling_opts->max_hits",
259  }
260  }
261 }
262 void
263 CBlastHitSavingParameters::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
264 {
265  ddc.SetFrame("CBlastHitSavingParameters");
266  if (!m_Ptr)
267  return;
268 
269 }
270 void
271 CPSIBlastOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
272 {
273  ddc.SetFrame("CPSIBlastOptions");
274  if (!m_Ptr)
275  return;
276 
277  ddc.Log("pseudo_count", m_Ptr->pseudo_count);
278  ddc.Log("inclusion_ethresh", m_Ptr->inclusion_ethresh);
279  ddc.Log("use_best_alignment", m_Ptr->use_best_alignment);
280  ddc.Log("nsg_compatibility_mode", m_Ptr->nsg_compatibility_mode);
281  ddc.Log("impala_scaling_factor", m_Ptr->impala_scaling_factor);
282 }
283 
284 void
285 CBlastGapAlignStruct::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
286 {
287  ddc.SetFrame("CBlastGapAlignStruct");
288  if (!m_Ptr)
289  return;
290 
291 }
292 
293 void
295 {
296  ddc.SetFrame("CBlastEffectiveLengthsOptions");
297  if (!m_Ptr)
298  return;
299 
300  ddc.Log("db_length", (unsigned long)m_Ptr->db_length); // Int8
301  ddc.Log("dbseq_num", m_Ptr->dbseq_num);
302  for (Int4 i = 0; i < m_Ptr->num_searchspaces; i++) {
303  const string prefix = string("searchsp[") + NStr::IntToString(i) +
304  string("]");
305  ddc.Log(prefix, m_Ptr->searchsp_eff[i]);
306  }
307 }
308 
309 void
311 {
312  ddc.SetFrame("CBlastEffectiveLengthsParameters");
313  if (!m_Ptr)
314  return;
315 
316  ddc.Log("real_db_length", (unsigned long)m_Ptr->real_db_length); // Int8
317  ddc.Log("real_num_seqs", m_Ptr->real_num_seqs);
318 }
319 
320 void
321 CBlastScoreBlk::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
322 {
323  ddc.SetFrame("CBlastScoreBlk");
324  if (!m_Ptr)
325  return;
326 
327  ddc.Log("protein_alphabet", m_Ptr->protein_alphabet);
328  ddc.Log("alphabet_size", m_Ptr->alphabet_size);
329  ddc.Log("alphabet_start", m_Ptr->alphabet_start);
330  ddc.Log("loscore", m_Ptr->loscore);
331  ddc.Log("hiscore", m_Ptr->hiscore);
332  ddc.Log("penalty", m_Ptr->penalty);
333  ddc.Log("reward", m_Ptr->reward);
334  ddc.Log("scale_factor", m_Ptr->scale_factor);
335  ddc.Log("read_in_matrix", m_Ptr->read_in_matrix);
336  ddc.Log("number_of_contexts", m_Ptr->number_of_contexts);
337  ddc.Log("name", m_Ptr->name);
338  ddc.Log("ambig_size", m_Ptr->ambig_size);
339  ddc.Log("ambig_occupy", m_Ptr->ambig_occupy);
340 }
341 
342 void
343 CBlastScoringOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
344 {
345  ddc.SetFrame("CBlastScoringOptions");
346  if (!m_Ptr)
347  return;
348 
349  ddc.Log("matrix", m_Ptr->matrix);
350  ddc.Log("reward", m_Ptr->reward);
351  ddc.Log("penalty", m_Ptr->penalty);
352  ddc.Log("gapped_calculation", m_Ptr->gapped_calculation);
353  ddc.Log("gap_open", m_Ptr->gap_open);
354  ddc.Log("gap_extend", m_Ptr->gap_extend);
355  ddc.Log("shift_pen", m_Ptr->shift_pen);
356  ddc.Log("is_ooframe", m_Ptr->is_ooframe);
357 }
358 
359 void
361  const
362 {
363  ddc.SetFrame("CBlastScoringParameters");
364  if (!m_Ptr)
365  return;
366 
367  ddc.Log("reward", m_Ptr->reward);
368  ddc.Log("penalty", m_Ptr->penalty);
369  ddc.Log("gap_open", m_Ptr->gap_open);
370  ddc.Log("gap_extend", m_Ptr->gap_extend);
371  ddc.Log("shift_pen", m_Ptr->shift_pen);
372  ddc.Log("scale_factor", m_Ptr->scale_factor);
373 }
374 
375 void
376 CBlastDatabaseOptions::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
377 {
378  ddc.SetFrame("CBlastDatabaseOptions");
379  if (!m_Ptr)
380  return;
381 
382 }
383 
384 void
385 CPSIMsa::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
386 {
387  ddc.SetFrame("CPSIMsa");
388  if (!m_Ptr)
389  return;
390 
391  if (m_Ptr->dimensions) {
392  ddc.Log("dimensions::query_length", m_Ptr->dimensions->query_length);
393  ddc.Log("dimensions::num_seqs", m_Ptr->dimensions->num_seqs);
394  }
395 }
396 
397 void
398 CPSIMatrix::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
399 {
400  ddc.SetFrame("CPSIMatrix");
401  if (!m_Ptr)
402  return;
403 
404  ddc.Log("ncols", m_Ptr->ncols);
405  ddc.Log("nrows", m_Ptr->nrows);
406  ddc.Log("lambda", m_Ptr->lambda);
407  ddc.Log("kappa", m_Ptr->kappa);
408  ddc.Log("h", m_Ptr->h);
409  // pssm omitted because it might be too large!
410 }
411 
412 void
414  unsigned int /*depth*/) const
415 {
416  ddc.SetFrame("CPSIDiagnosticsRequest");
417  if (!m_Ptr)
418  return;
419 
420  ddc.Log("information_content", m_Ptr->information_content);
421  ddc.Log("residue_frequencies", m_Ptr->residue_frequencies);
422  ddc.Log("weighted_residue_frequencies",
424  ddc.Log("frequency_ratios", m_Ptr->frequency_ratios);
425  ddc.Log("gapless_column_weights", m_Ptr->gapless_column_weights);
426 }
427 
428 void
430  unsigned int /*depth*/) const
431 {
432  ddc.SetFrame("CPSIDiagnosticsResponse");
433  if (!m_Ptr)
434  return;
435 
436  ddc.Log("alphabet_size", m_Ptr->alphabet_size);
437 }
438 
439 void
440 CBlastSeqSrc::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
441 {
442  ddc.SetFrame("CBlastSeqSrc");
443  if (!m_Ptr)
444  return;
445 
446  /** @todo should the BlastSeqSrc API support names for types of
447  * BlastSeqSrc? Might be useful for debugging */
448 }
449 
450 void
452  unsigned int /*depth*/) const
453 {
454  ddc.SetFrame("CBlastSeqSrcIterator");
455  if (!m_Ptr)
456  return;
457 
458  string iterator_type;
459  switch (m_Ptr->itr_type) {
460  case eOidList: iterator_type = "oid_list"; break;
461  case eOidRange: iterator_type = "oid_range"; break;
462  default: abort();
463  }
464 
465  ddc.Log("itr_type", iterator_type);
466  ddc.Log("current_pos", m_Ptr->current_pos);
467  ddc.Log("chunk_sz", m_Ptr->chunk_sz);
468 }
469 
470 void
471 CBlast_Message::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
472 {
473  ddc.SetFrame("CBlast_Message");
474  if (!m_Ptr)
475  return;
476 
477  ddc.Log("severity", m_Ptr->severity);
478  ddc.Log("message", m_Ptr->message);
479  // code and subcode are unused
480 }
481 
482 void
484  unsigned int /*depth*/) const
485 {
486  ddc.SetFrame("CBlastHSPResults");
487  if (!m_Ptr)
488  return;
489 
490  ddc.Log("num_queries", m_Ptr->num_queries);
491  // hitlist itself is not printed
492 }
493 
494 void
495 CBlastMaskLoc::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
496 {
497  ddc.SetFrame("CBlastMaskLoc");
498  if (!m_Ptr)
499  return;
500 
501  ddc.Log("total_size", m_Ptr->total_size);
502  for (int index = 0; index < m_Ptr->total_size; ++index) {
503  ddc.Log("context", index);
504  for (BlastSeqLoc* seqloc = m_Ptr->seqloc_array[index];
505  seqloc; seqloc = seqloc->next) {
506  ddc.Log("left", seqloc->ssr->left);
507  ddc.Log("right", seqloc->ssr->right);
508  }
509  }
510 }
511 
512 void
513 CBlastSeqLoc::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
514 {
515  ddc.SetFrame("CBlastSeqLoc");
516  if (!m_Ptr)
517  return;
518 
519  for (BlastSeqLoc* tmp = m_Ptr; tmp; tmp = tmp->next) {
520  ddc.Log("left", tmp->ssr->left);
521  ddc.Log("right", tmp->ssr->right);
522  }
523 }
524 
525 void
526 CSBlastProgress::DebugDump(CDebugDumpContext ddc, unsigned int /*depth*/) const
527 {
528  ddc.SetFrame("SBlastProgress");
529  if (!m_Ptr)
530  return;
531 
532  ddc.Log("stage", m_Ptr->stage);
533  ddc.Log("user_data", m_Ptr->user_data);
534 }
535 
536 #endif /* SKIP_DOXYGEN_PROCESSING */
537 
539 CSeqLoc2BlastSeqLoc(const objects::CSeq_loc* slp)
540 {
541  if (!slp ||
542  slp->Which() == CSeq_loc::e_not_set ||
543  slp->IsEmpty() ||
544  slp->IsNull() ) {
545  return NULL;
546  }
547 
548  _ASSERT(slp->IsInt() || slp->IsPacked_int() || slp->IsMix());
549 
550  CBlastSeqLoc retval;
551  BlastSeqLoc* tail = NULL; /* Pointer to the tail of the complement_mask
552  linked list */
553 
554  if (slp->IsInt()) {
555  BlastSeqLocNew(&retval, slp->GetInt().GetFrom(), slp->GetInt().GetTo());
556  } else if (slp->IsPacked_int()) {
557  ITERATE(CPacked_seqint::Tdata, itr, slp->GetPacked_int().Get()) {
558  /* Cache the tail of the list to avoid the overhead of traversing
559  * the list when appending to it */
560  tail = BlastSeqLocNew(tail ? &tail : &retval,
561  (*itr)->GetFrom(), (*itr)->GetTo());
562  }
563  } else if (slp->IsMix()) {
564  ITERATE(CSeq_loc_mix::Tdata, itr, slp->GetMix().Get()) {
565  if ((*itr)->IsInt()) {
566  /* Cache the tail of the list to avoid the overhead of
567  * traversing the list when appending to it */
568  tail = BlastSeqLocNew(tail ? &tail : &retval,
569  (*itr)->GetInt().GetFrom(),
570  (*itr)->GetInt().GetTo());
571  } else if ((*itr)->IsPnt()) {
572  /* Cache the tail of the list to avoid the overhead of
573  * traversing the list when appending to it */
574  tail = BlastSeqLocNew(tail ? &tail : &retval,
575  (*itr)->GetPnt().GetPoint(),
576  (*itr)->GetPnt().GetPoint());
577  }
578  }
579  } else {
580  NCBI_THROW(CBlastException, eNotSupported,
581  "Unsupported CSeq_loc type");
582  }
583 
584  return retval.Release();
585 }
586 
588 FindGeneticCode(int genetic_code)
589 {
590  Uint1* retval = NULL;
591 
592  // handle the sentinel value which indicates that the genetic code is not
593  // applicable
594  if (static_cast<Uint4>(genetic_code) == numeric_limits<Uint4>::max()) {
595  return retval;
596  }
597 
598  const string kGenCode = CGen_code_table::GetNcbieaa(genetic_code);
599  if (kGenCode == kEmptyStr) {
600  return retval;
601  }
602  CSeq_data gc_ncbieaa(kGenCode, CSeq_data::e_Ncbieaa);
603  CSeq_data gc_ncbistdaa;
604 
605  TSeqPos nconv = CSeqportUtil::Convert(gc_ncbieaa, &gc_ncbistdaa,
607  if (nconv == 0) {
608  return retval;
609  }
610 
611  _ASSERT(gc_ncbistdaa.IsNcbistdaa());
612  _ASSERT(nconv == gc_ncbistdaa.GetNcbistdaa().Get().size());
613 
614  try {
615  retval = new Uint1[nconv];
616  } catch (const bad_alloc&) {
617  return NULL;
618  }
619 
620  for (TSeqPos i = 0; i < nconv; i++)
621  retval[i] = gc_ncbistdaa.GetNcbistdaa().Get()[i];
622 
623  return retval;
624 }
625 
626 DEFINE_CLASS_STATIC_FAST_MUTEX(CAutomaticGenCodeSingleton::sm_Mutex);
628 
630 {
631  if ( !genetic_code ) {
632  genetic_code = BLAST_GENETIC_CODE;
633  }
634 
635  CFastMutexGuard LOCK(sm_Mutex);
636  m_RefCounter++;
638  // N.B.: this is added as this is the default value
639  if (GenCodeSingletonFind(genetic_code) == NULL) {
640  TAutoUint1ArrayPtr gc = FindGeneticCode(genetic_code);
641  GenCodeSingletonAdd(genetic_code, gc.get());
642  }
643 }
644 
646 {
647  CFastMutexGuard LOCK(sm_Mutex);
648  if (--m_RefCounter == 0) {
650  }
651 }
652 
654 {
655  CFastMutexGuard LOCK(sm_Mutex);
656  if (GenCodeSingletonFind(genetic_code) == NULL) {
657  TAutoUint1ArrayPtr gc = FindGeneticCode(genetic_code);
658  GenCodeSingletonAdd(genetic_code, gc.get());
659  }
660 }
661 
662 void ThrowIfInvalidTask(const string& task)
663 {
664  set<string> valid_tasks;
665  if (valid_tasks.empty()) {
666  valid_tasks = CBlastOptionsFactory::GetTasks();
667  }
668 
669  if (valid_tasks.find(task) == valid_tasks.end()) {
670  ostringstream os;
671  os << "'" << task << "' is not a supported task";
672  NCBI_THROW(CBlastException, eInvalidArgument, os.str());
673  }
674 }
675 
677 {
678  string retval;
679  switch (p) {
680  case eBlastn: retval.assign("blastn"); break;
681  case eMegablast: retval.assign("megablast"); break;
682  case eDiscMegablast: retval.assign("dc-megablast"); break;
683  case eBlastp: retval.assign("blastp"); break;
684  case eBlastx: retval.assign("blastx"); break;
685  case eTblastn: retval.assign("tblastn"); break;
686  case eTblastx: retval.assign("tblastx"); break;
687  case eRPSBlast: retval.assign("rpsblast"); break;
688  case eRPSTblastn: retval.assign("rpstblastn"); break;
689  case ePSIBlast: retval.assign("psiblast"); break;
690  case ePSITblastn: retval.assign("psitblastn"); break;
691  case ePHIBlastp: retval.assign("phiblastp"); break;
692  case ePHIBlastn: retval.assign("phiblastn"); break;
693  case eDeltaBlast: retval.assign("deltablast"); break;
694  case eVecScreen: retval.assign("vecscreen"); break;
695  case eMapper: retval.assign("mapr2g"); break;
696  default:
697  cerr << "Invalid EProgram value: " << (int)p << endl;
698  abort();
699  }
700 
701 #if _DEBUG
702  ThrowIfInvalidTask(retval);
703 #endif
704 
705  return retval;
706 }
707 
710 {
711  switch (p) {
712  case eBlastn:
713  case eMegablast:
714  case eDiscMegablast:
715  case eVecScreen:
716  return eBlastTypeBlastn;
717 
718  case eMapper:
719  return eBlastTypeMapping;
720 
721  case eBlastp:
722  return eBlastTypeBlastp;
723 
724  case eBlastx:
725  return eBlastTypeBlastx;
726 
727  case eTblastn:
728  return eBlastTypeTblastn;
729 
730  case eTblastx:
731  return eBlastTypeTblastx;
732 
733  case eRPSBlast:
734  return eBlastTypeRpsBlast;
735 
736  case eRPSTblastn:
737  return eBlastTypeRpsTblastn;
738 
739  case ePSIBlast:
740  case eDeltaBlast:
741  return eBlastTypePsiBlast;
742 
743  case ePSITblastn:
744  return eBlastTypePsiTblastn;
745 
746  case ePHIBlastp:
747  return eBlastTypePhiBlastp;
748 
749  case ePHIBlastn:
750  return eBlastTypePhiBlastn;
751 
752  default:
753  return eBlastTypeUndefined;
754  }
755 }
756 
758 {
759  _ASSERT( !program_name.empty() );
760 
761  string lowercase_program_name(program_name);
762  lowercase_program_name = NStr::ToLower(lowercase_program_name);
763 
764 #if _DEBUG
765  ThrowIfInvalidTask(lowercase_program_name);
766 #endif
767 
768  if (NStr::StartsWith(lowercase_program_name, "blastn")) {
769  return eBlastn;
770  // -RMH- support new toolkit program
771  } else if (NStr::StartsWith(lowercase_program_name, "rmblastn")) {
772  return eBlastn;
773  } else if (NStr::StartsWith(lowercase_program_name, "blastp")) {
774  return eBlastp;
775  } else if (lowercase_program_name == "blastx") {
776  return eBlastx;
777  } else if (lowercase_program_name == "tblastn") {
778  return eTblastn;
779  } else if (lowercase_program_name == "tblastx") {
780  return eTblastx;
781  } else if (lowercase_program_name == "rpsblast") {
782  return eRPSBlast;
783  } else if (lowercase_program_name == "rpstblastn") {
784  return eRPSTblastn;
785  } else if (lowercase_program_name == "megablast") {
786  return eMegablast;
787  } else if (lowercase_program_name == "psiblast") {
788  return ePSIBlast;
789  } else if (lowercase_program_name == "psitblastn") {
790  return ePSITblastn;
791  } else if (lowercase_program_name == "dc-megablast") {
792  return eDiscMegablast;
793  } else if (lowercase_program_name == "deltablast") {
794  return eDeltaBlast;
795  } else if (lowercase_program_name == "vecscreen") {
796  return eVecScreen;
797  // FIXME: mapper is used in core as a single program name for all tasks,
798  // we may need a better approach to mapping tasks with fewer program names
799  } else if (lowercase_program_name == "mapper") {
800  return eMapper;
801  } else if (lowercase_program_name == "mapr2g") {
802  return eMapper;
803  } else if (lowercase_program_name == "mapr2r") {
804  return eMapper;
805  } else if (lowercase_program_name == "mapg2g") {
806  return eMapper;
807  } else {
808  NCBI_THROW(CBlastException, eNotSupported,
809  "Program type '" + program_name + "' not supported");
810  }
811 }
812 
814 {
815  char* program_string(0);
816  if (BlastNumber2Program(program, &program_string) == 0) {
817  string retval(program_string);
818  sfree(program_string);
819  return retval;
820  } else {
821  return NcbiEmptyString;
822  }
823 }
824 
825 template <class Position>
827  const CRange<Position>& range)
828 {
829  if (target.Empty()) {
830  throw std::runtime_error("Target range is empty");
831  }
832 
833  if (range.Empty() ||
834  (range.GetFrom() > target.GetTo()) ||
835  ((range.GetFrom() + target.GetFrom()) > target.GetTo())) {
836  return target;
837  }
838 
839  CRange<Position> retval;
840  retval.SetFrom(max(target.GetFrom() + range.GetFrom(), target.GetFrom()));
841  retval.SetTo(min(target.GetFrom() + range.GetTo(), target.GetTo()));
842  return retval;
843 }
844 
845 /// Return the masked locations for a given query as well as whether the
846 /// linked list's elements should be reverted or not (true in the case of
847 /// negative only strand)
848 /// The first element of the returned pair is the linked list of masked
849 /// locations
850 /// The second element of the returned pair is true if the linked list needs to
851 /// be reversed
852 static pair<BlastSeqLoc*, bool>
853 s_GetBlastnMask(const BlastMaskLoc* mask, unsigned int query_index)
854 {
855  const unsigned int kNumContexts = GetNumberOfContexts(eBlastTypeBlastn);
856  _ASSERT(query_index*kNumContexts < (unsigned int)mask->total_size);
857 
858  unsigned int context_index(query_index * kNumContexts);
859 
860  BlastSeqLoc* core_seqloc(0);
861  bool needs_reversing(false);
862  // N.B.: The elements of the seqloc_array corresponding to reverse
863  // strands are all NULL except in a reverse-strand-only search
864  if ( !(core_seqloc = mask->seqloc_array[context_index++])) {
865  core_seqloc = mask->seqloc_array[context_index];
866  needs_reversing = true;
867  }
868  return make_pair(core_seqloc, needs_reversing);
869 }
870 
871 /// Convert EBlastTypeBlastn CORE masks into TSeqLocInfoVector
872 static void s_ConvertBlastnMasks(const CPacked_seqint::Tdata& query_intervals,
873  const BlastMaskLoc* mask,
874  TSeqLocInfoVector& retval)
875 {
876  unsigned int i(0);
877  ITERATE(CPacked_seqint::Tdata, query_interval, query_intervals) {
878  const TSeqRange kTarget((*query_interval)->GetFrom(),
879  (*query_interval)->GetTo());
880 
881  TMaskedQueryRegions query_masks;
882  pair<BlastSeqLoc*, bool> loc_aux = s_GetBlastnMask(mask, i++);
883  for (BlastSeqLoc* loc = loc_aux.first; loc; loc = loc->next) {
884  TSeqRange masked_range(loc->ssr->left, loc->ssr->right);
885  TSeqRange range(Map(kTarget, masked_range));
886  if (range.NotEmpty() && range != kTarget) {
888  seqint->SetId().Assign((*query_interval)->GetId());
889  seqint->SetFrom(range.GetFrom());
890  seqint->SetTo(range.GetTo());
891  CRef<CSeqLocInfo> seqlocinfo
892  (new CSeqLocInfo(seqint, CSeqLocInfo::eFrameNotSet));
893  query_masks.push_back(seqlocinfo);
894  }
895  }
896  if (loc_aux.second) {
897  reverse(query_masks.begin(), query_masks.end());
898  }
899  retval.push_back(query_masks);
900  }
901 }
902 
903 void
905  const objects::CPacked_seqint& queries,
906  const BlastMaskLoc* mask,
907  TSeqLocInfoVector& mask_v)
908 {
909  _ASSERT(mask);
910  const unsigned int kNumContexts = GetNumberOfContexts(program);
911  const CPacked_seqint::Tdata& query_intervals = queries.Get();
912 
913  if (query_intervals.size() != mask->total_size/kNumContexts) {
914  string msg = "Blast_GetSeqLocInfoVector: number of query ids " +
915  NStr::SizetToString(query_intervals.size()) +
916  " not equal to number of queries in mask " +
917  NStr::IntToString(mask->total_size/kNumContexts);
918  NCBI_THROW(CBlastException, eInvalidArgument, msg);
919  }
920 
921  if (program == eBlastTypeBlastn || program == eBlastTypeMapping) {
922  s_ConvertBlastnMasks(query_intervals, mask, mask_v);
923  return;
924  }
925 
926  unsigned int qindex(0);
927  ITERATE(CPacked_seqint::Tdata, query_interval, query_intervals) {
928 
929  const TSeqRange kTarget((*query_interval)->GetFrom(),
930  (*query_interval)->GetTo());
931  TMaskedQueryRegions query_masks;
932  for (unsigned int index = 0; index < kNumContexts; index++) {
933 
934  BlastSeqLoc* loc = mask->seqloc_array[qindex*kNumContexts+index];
935  for ( ; loc; loc = loc->next) {
936  TSeqRange masked_range(loc->ssr->left, loc->ssr->right);
937  TSeqRange range(Map(kTarget, masked_range));
938  if (range.NotEmpty() && range != kTarget) {
939  int frame = BLAST_ContextToFrame(program, index);
940  if (frame == INT1_MAX) {
941  string msg("Conversion from context to frame failed ");
942  msg += "for '" + Blast_ProgramNameFromType(program)
943  + "'";
944  NCBI_THROW(CBlastException, eCoreBlastError, msg);
945  }
947  seqint->SetId().Assign((*query_interval)->GetId());
948  seqint->SetFrom(range.GetFrom());
949  seqint->SetTo(range.GetTo());
950  CRef<CSeqLocInfo> seqloc_info
951  (new CSeqLocInfo(seqint, frame));
952  query_masks.push_back(seqloc_info);
953  }
954  }
955  }
956  mask_v.push_back(query_masks);
957  qindex++;
958  }
959 }
960 
961 //
962 // TSearchMessages
963 //
964 
965 void
966 TQueryMessages::SetQueryId(const string& id)
967 {
968  m_IdString = id;
969 }
970 
971 string
973 {
974  return m_IdString;
975 }
976 
977 void
979 {
980  // Combine the Seq-id's
981  if (m_IdString.empty()) {
982  m_IdString = other.m_IdString;
983  } else {
984  if ( !other.m_IdString.empty() ) {
985  _ASSERT(m_IdString == other.m_IdString);
986  }
987  }
988 
989  if ((*this).empty()) {
990  *this = other;
991  return;
992  }
993 
994  copy(other.begin(), other.end(), back_inserter(*this));
995 }
996 
997 //
998 // TSearchMessages
999 //
1000 
1001 bool
1003 {
1004  ITERATE(vector<TQueryMessages>, qm, *this) {
1005  if ( !qm->empty() ) {
1006  return true;
1007  }
1008  }
1009  return false;
1010 }
1011 
1012 string
1014 {
1015  string retval;
1016  ITERATE(vector<TQueryMessages>, qm, *this) {
1017  if (qm->empty()) {
1018  continue;
1019  }
1020  ITERATE(TQueryMessages, msg, *qm) {
1021  retval += (*msg)->GetMessage() + " ";
1022  }
1023  }
1024  return retval;
1025 }
1026 
1027 void
1029 {
1030  if (empty()) {
1031  *this = other;
1032  return;
1033  }
1034 
1035  for (size_t i = 0; i < other.size(); i++) {
1036  (*this)[i].Combine(other[i]);
1037  }
1038 
1039  RemoveDuplicates();
1040 }
1041 
1042 void
1044 {
1045  NON_CONST_ITERATE(TSearchMessages, sm, (*this)) {
1046  if (sm->empty()) {
1047  continue;
1048  }
1049  sort(sm->begin(), sm->end(), TQueryMessagesLessComparator());
1050  TQueryMessages::iterator new_end =
1051  unique(sm->begin(), sm->end(), TQueryMessagesEqualComparator());
1052  sm->erase(new_end, sm->end());
1053  }
1054 }
1055 
1057  int /*error_id*/,
1058  const string & message)
1059 {
1062  message));
1063 
1064  NON_CONST_ITERATE(TSearchMessages, query_messages, *this) {
1065  query_messages->push_back(sm);
1066  }
1067 }
1068 
1071 {
1073 
1074  if (input.empty()) {
1075  return retval;
1076  }
1077  retval.Reset(new objects::CBioseq_set);
1078 
1079  ITERATE(TSeqLocVector, itr, input) {
1080  if (itr->seqloc->GetId()) {
1081  CBioseq_Handle bh =
1082  itr->scope->GetBioseqHandle(*itr->seqloc->GetId());
1084  CRef<objects::CSeq_entry> seq_entry
1085  (const_cast<objects::CSeq_entry*>(&*seh.GetCompleteSeq_entry()));
1086  retval->SetSeq_set().push_back(seq_entry);
1087  }
1088  }
1089 
1090  return retval;
1091 }
1092 
1093 bool
1094 IsLocalId(const objects::CSeq_id* seqid)
1095 {
1096  bool retval = false;
1097 
1098  if ( !seqid ) {
1099  return retval;
1100  }
1101 
1102  CSeq_id::EAccessionInfo id_info = seqid->IdentifyAccession();
1103  if (seqid->IsLocal() || id_info == CSeq_id::eAcc_local ||
1104  id_info == CSeq_id::eAcc_general) {
1105  retval = true;
1106  }
1107  return retval;
1108 }
1109 
1110 void
1111 LoadSequencesToScope(CScope::TIds& ids, vector<TSeqRange>& ranges, CRef<CScope> & scope)
1112 {
1113  CScope::TBioseqHandles bhs = scope->GetBioseqHandles(ids);
1114 
1115  // Per Eugene Vasilchenko's suggestion, via email on 6/8/10:
1116  // "With the current API you can make artificial delta sequence
1117  // referencing several other sequences and use its CSeqMap to load them
1118  // all in one call. There is no straightforward way to do this, sorry."
1119 
1120  // Create virtual delta sequence
1121  CRef<CBioseq> top_seq(new CBioseq);
1122  CSeq_inst& inst = top_seq->SetInst();
1125  CDelta_ext& delta = inst.SetExt().SetDelta();
1126  int i = 0;
1127  ITERATE(CScope::TBioseqHandles, it, bhs) {
1128  CRef<CDelta_seq> seq(new CDelta_seq);
1129  CSeq_interval& interval = seq->SetLoc().SetInt();
1130  interval.SetId(*SerialClone(*it->GetAccessSeq_id_Handle().GetSeqId()));
1131  if (ranges[i].GetFrom() > ranges[i].GetToOpen()) {
1132  TSeqPos length = it->GetBioseqLength();
1133  interval.SetFrom(length - ranges[i].GetFrom());
1134  interval.SetTo(length - ranges[i].GetTo());
1135  } else {
1136  interval.SetFrom(ranges[i].GetFrom());
1137  interval.SetTo(ranges[i].GetTo());
1138  }
1139  i++;
1140  delta.Set().push_back(seq);
1141  }
1142 
1143  // Add it to the scope
1144  CBioseq_Handle top_bh = scope->AddBioseq(*top_seq);
1145 
1146  // prepare selector. SetLinkUsedTSE() is necessary for batch loading
1148  sel.SetLinkUsedTSE(top_bh.GetTSE_Handle());
1149 
1150  // and get all sequence data in batch mode
1151  _TRACE("Fetching " << ids.size() << " sequences");
1152  top_bh.GetSeqMap().CanResolveRange(&*scope, sel);
1153 }
1154 
1156 {
1157  if(m_handler != NULL) {
1158  m_handler->Post(mess);
1159  }
1160  if(m_save) {
1162  string m;
1163  mess.Write(m);
1164  d->SetMessage(NStr::Sanitize(m));
1165  d->SetCode((int)mess.m_Severity);
1166  {
1167  DEFINE_STATIC_MUTEX(mx);
1168  CMutexGuard guard(mx);
1169  m_messages.push_back(d);
1170  }
1171  }
1172 }
1173 
1175 {
1176  DEFINE_STATIC_MUTEX(mx);
1177  CMutexGuard guard(mx);
1178  m_messages.clear();
1179 }
1180 
1182 {
1183  if(m_handler) {
1185  m_handler = NULL;
1186  }
1187 }
1188 
1190 {
1191  m_save = false;
1192  ResetMessages();
1193 }
1194 
1195 
1196 END_SCOPE(blast)
1198 
1199 /* @} */
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Auxiliary functions for BLAST.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
EBlastSeverity
Blast error message severities .
Definition: blast_message.h:55
@ eBlastSevWarning
Definition: blast_message.h:57
const int kBlastMessageNoContext
Declared in blast_message.h as extern const.
Definition: blast_message.c:36
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypePsiTblastn
Definition: blast_program.h:83
@ eBlastTypeRpsTblastn
Definition: blast_program.h:85
@ eBlastTypePhiBlastn
Definition: blast_program.h:87
@ eBlastTypeMapping
Definition: blast_program.h:88
@ eBlastTypeTblastx
Definition: blast_program.h:79
@ eBlastTypePsiBlast
Definition: blast_program.h:82
@ eBlastTypePhiBlastp
Definition: blast_program.h:86
@ eBlastTypeRpsBlast
Definition: blast_program.h:84
@ eBlastTypeUndefined
Definition: blast_program.h:89
@ eBlastTypeTblastn
Definition: blast_program.h:77
@ eBlastTypeBlastp
Definition: blast_program.h:73
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
@ eOidRange
Data is a range of contiguous ordinal ids (indices)
@ eOidList
Data is a list of discontiguous ordinal ids (indices)
Internal auxiliary setup classes/functions for C++ BLAST APIs.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eVecScreen
Vector screening.
Definition: blast_types.hpp:72
@ eTblastx
Translated nucl-Translated nucl.
Definition: blast_types.hpp:62
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
@ ePHIBlastn
Nucleotide PHI BLAST.
Definition: blast_types.hpp:70
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ ePHIBlastp
Protein PHI BLAST.
Definition: blast_types.hpp:69
@ eMapper
Jumper alignment for mapping.
Definition: blast_types.hpp:73
@ ePSIBlast
PSI Blast.
Definition: blast_types.hpp:67
@ eTblastn
Protein-Translated nucl.
Definition: blast_types.hpp:61
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
@ ePSITblastn
PSI Tblastn.
Definition: blast_types.hpp:68
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
Definition: blast_types.hpp:66
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
Definition: blast_types.hpp:64
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
Definition: blast_util.c:312
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Definition: blast_util.c:839
ncbi::TMaskedQueryRegions mask
CBioseq_Handle –.
CBlast4_error –.
Defines BLAST error codes (user errors included)
Wrapper class for BlastSeqLoc .
Definition: blast_aux.hpp:355
void SetFrame(const string &frame)
Definition: ddumpable.cpp:137
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
Definition: ddumpable.cpp:151
CDelta_seq –.
Definition: Delta_seq.hpp:66
static const string & GetNcbieaa(int id)
CRange –.
Definition: Range.hpp:68
Error or Warning Message from search.
structure for seqloc info
Definition: seqlocinfo.hpp:48
CSeq_entry_Handle –.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
Class for the messages for an individual query sequence.
string m_IdString
The query identifier.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
static char tmp[3200]
Definition: utf8.c:42
Defines the interface to interact with the genetic code singleton object.
Int2 GenCodeSingletonAdd(Uint4 gen_code_id, const Uint1 *gen_code_str)
Add a genetic code entry to the singleton.
void GenCodeSingletonFini()
Uninitialize the genetic code singleton.
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
void GenCodeSingletonInit()
Initialize the genetic code singleton.
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:451
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:271
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
BlastHitSavingOptions * m_Ptr
Definition: blast_aux.hpp:326
void Blast_GetSeqLocInfoVector(EBlastProgramType program, const objects::CPacked_seqint &queries, const BlastMaskLoc *mask, TSeqLocInfoVector &mask_v)
Converts a BlastMaskLoc internal structure into an object returned by the C++ API.
Definition: blast_aux.cpp:904
bool HasMessages() const
Definition: blast_aux.cpp:1002
QuerySetUpOptions * m_Ptr
Definition: blast_aux.hpp:312
BlastExtensionParameters * m_Ptr
Definition: blast_aux.hpp:324
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:376
BlastSeqLoc * m_Ptr
Definition: blast_aux.hpp:355
PSIDiagnosticsRequest * m_Ptr
Definition: blast_aux.hpp:347
BlastSeqLoc * CSeqLoc2BlastSeqLoc(const objects::CSeq_loc *slp)
Converts a CSeq_loc into a BlastSeqLoc structure used in NewBlast.
Definition: blast_aux.cpp:539
void ResetMessages(void)
Reset messgae buffer, erase all saved message.
Definition: blast_aux.cpp:1174
BlastGapAlignStruct * m_Ptr
Definition: blast_aux.hpp:342
virtual void Post(const SDiagMessage &mess)
Save and post diag message.
Definition: blast_aux.cpp:1155
BlastInitialWordParameters * m_Ptr
Definition: blast_aux.hpp:320
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:413
static void s_ConvertBlastnMasks(const CPacked_seqint::Tdata &query_intervals, const BlastMaskLoc *mask, TSeqLocInfoVector &retval)
Convert EBlastTypeBlastn CORE masks into TSeqLocInfoVector.
Definition: blast_aux.cpp:872
LookupTableOptions * m_Ptr
Definition: blast_aux.hpp:314
void AddMessageAllQueries(EBlastSeverity severity, int error_id, const string &message)
Add a message for all queries.
Definition: blast_aux.cpp:1056
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:495
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:398
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:471
BlastScoringOptions * m_Ptr
Definition: blast_aux.hpp:334
TAutoUint1ArrayPtr FindGeneticCode(int genetic_code)
Retrieves the requested genetic code in Ncbistdaa format.
Definition: blast_aux.cpp:588
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:263
BlastHitSavingParameters * m_Ptr
Definition: blast_aux.hpp:328
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:310
~CAutomaticGenCodeSingleton()
destructor
Definition: blast_aux.cpp:645
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:128
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:321
Blast_ExtendWord * m_Ptr
Definition: blast_aux.hpp:322
static pair< BlastSeqLoc *, bool > s_GetBlastnMask(const BlastMaskLoc *mask, unsigned int query_index)
Return the masked locations for a given query as well as whether the linked list's elements should be...
Definition: blast_aux.cpp:853
void LoadSequencesToScope(CScope::TIds &ids, vector< TSeqRange > &ranges, CRef< CScope > &scope)
Definition: blast_aux.cpp:1111
BlastHSPResults * m_Ptr
Definition: blast_aux.hpp:343
SBlastProgress * m_Ptr
Definition: blast_aux.hpp:357
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:216
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
Definition: blast_aux.cpp:709
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:440
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:203
BlastSeqSrcIterator * m_Ptr
Definition: blast_aux.hpp:351
PSIDiagnosticsResponse * m_Ptr
Definition: blast_aux.hpp:348
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:113
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:176
CRef< objects::CBioseq_set > TSeqLocVector2Bioseqs(const TSeqLocVector &input)
Convert a TSeqLocVector to a CBioseq_set.
Definition: blast_aux.cpp:1070
string GetQueryId() const
Get the query id as a string.
Definition: blast_aux.cpp:972
void Combine(const TSearchMessages &other_msgs)
Combine another set of search messages with this one.
Definition: blast_aux.cpp:1028
~CBlastAppDiagHandler()
Destructor.
Definition: blast_aux.cpp:1181
bool IsLocalId(const objects::CSeq_id *seqid)
Returns true if the CSeq_id is a local id.
Definition: blast_aux.cpp:1094
BlastExtensionOptions * m_Ptr
Definition: blast_aux.hpp:323
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:385
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:360
string ToString() const
Converts messages to a string, which is returned.
Definition: blast_aux.cpp:1013
PSIMsa * m_Ptr
Definition: blast_aux.hpp:345
BlastSeqLoc * Release()
Definition: blast_aux.hpp:355
static set< string > GetTasks(ETaskSets choice=eAll)
Retrieve the set of supported tasks.
BlastDatabaseOptions * m_Ptr
Definition: blast_aux.hpp:331
BlastQueryInfo * m_Ptr
Definition: blast_aux.hpp:311
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:186
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
Definition: blast_aux.cpp:676
BLAST_SequenceBlk * m_Ptr
Definition: blast_aux.hpp:309
void RemoveDuplicates()
Find and remove redundant messages.
Definition: blast_aux.cpp:1043
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
void SetQueryId(const string &id)
Set the query id as a string.
Definition: blast_aux.cpp:966
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:483
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:526
BlastScoreBlk * m_Ptr
Definition: blast_aux.hpp:333
BlastMaskLoc * m_Ptr
Definition: blast_aux.hpp:354
BlastEffectiveLengthsOptions * m_Ptr
Definition: blast_aux.hpp:338
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:227
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:294
Blast_Message * m_Ptr
Definition: blast_aux.hpp:352
BlastSeqSrc * m_Ptr
Definition: blast_aux.hpp:350
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
Definition: blast_aux.cpp:813
void DoNotSaveMessages(void)
Call to turn off saving diag message, discard all saved message.
Definition: blast_aux.cpp:1189
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:285
void Combine(const TQueryMessages &other)
Combine other messages with these.
Definition: blast_aux.cpp:978
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:76
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:168
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:429
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
Definition: blast_aux.cpp:757
CDiagHandler * m_handler
Definition: blast_aux.hpp:264
PSIMatrix * m_Ptr
Definition: blast_aux.hpp:346
DEFINE_CLASS_STATIC_FAST_MUTEX(CAutomaticGenCodeSingleton::sm_Mutex)
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:343
void ThrowIfInvalidTask(const string &task)
Validates that the task provided is indeed a valid task, otherwise throws a CBlastException.
Definition: blast_aux.cpp:662
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:154
list< CRef< objects::CBlast4_error > > m_messages
Definition: blast_aux.hpp:265
BlastScoringParameters * m_Ptr
Definition: blast_aux.hpp:335
BlastEffectiveLengthsParameters * m_Ptr
Definition: blast_aux.hpp:340
LookupTableWrap * m_Ptr
Definition: blast_aux.hpp:315
PSIBlastOptions * m_Ptr
Definition: blast_aux.hpp:330
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:194
void AddGeneticCode(int genetic_code)
Add the genetic code to the genetic code singleton.
Definition: blast_aux.cpp:653
CAutomaticGenCodeSingleton(int genetic_code=0)
Default constructor.
Definition: blast_aux.cpp:629
BlastInitialWordOptions * m_Ptr
Definition: blast_aux.hpp:318
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: blast_aux.cpp:513
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
void Write(string &str, TDiagWriteFlags flags=fNone) const
Binary OR of "EDiagWriteFlags".
Definition: ncbidiag.cpp:5355
EDiagSev m_Severity
Severity level.
Definition: ncbidiag.hpp:1651
virtual void Post(const SDiagMessage &mess)=0
Post message to handler.
void SetDiagHandler(CDiagHandler *handler, bool can_delete=true)
Set the diagnostic handler using the specified diagnostic handler class.
Definition: ncbidiag.cpp:6288
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const TPrim & Get(void) const
Definition: serialbase.hpp:347
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
@ eAcc_general
Definition: Seq_id.hpp:444
@ eAcc_local
Definition: Seq_id.hpp:337
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
TBioseqHandles GetBioseqHandles(const TIds &ids)
Get bioseq handles for all ids.
Definition: scope.cpp:143
vector< CBioseq_Handle > TBioseqHandles
Definition: scope.hpp:144
vector< CSeq_id_Handle > TIds
Definition: scope.hpp:143
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
const CSeqMap & GetSeqMap(void) const
Get sequence map.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
Definition: seq_map_ci.hpp:157
bool CanResolveRange(CScope *scope, const SSeqMapSelector &sel) const
Definition: seq_map.cpp:986
@ fFindAnyLeaf
Definition: seq_map.hpp:139
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
bool Empty(void) const
Definition: range.hpp:148
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
#define DEFINE_STATIC_MUTEX(id)
Define static mutex and initialize it.
Definition: ncbimtx.hpp:512
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
void SetId(TId &value)
Assign a value to Id data member.
list< CRef< CSeq_loc > > Tdata
void SetFrom(TFrom value)
Assign a value to From data member.
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
bool IsNcbistdaa(void) const
Check if variant Ncbistdaa is selected.
Definition: Seq_data_.hpp:684
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_inst_.cpp:147
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:690
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
static int input()
int i
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
#define INT1_MAX
largest number represented by signed short (one byte)
Definition: ncbi_std.h:166
T max(T x_, T y_)
T min(T x_, T y_)
void abort()
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
static const char * prefix[]
Definition: pcregrep.c:405
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
Definition of SSeqLoc structure.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
Definition: blast_def.h:251
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Definition: blast_def.h:253
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
Int4 query_index
Index of query (same for all frames)
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Int8 * searchsp_eff
Search space to be used for statistical calculations (one such per query context)
Int8 db_length
Database length to be used for statistical calculations.
Int4 dbseq_num
Number of database sequences to be used for statistical calculations.
Int4 num_searchspaces
Number of elements in searchsp_eff, this must be equal to the number of contexts in the search.
Int8 real_db_length
Total database length to use in search space calculations.
Int4 real_num_seqs
Number of subject sequences to use for search space calculations.
EBlastTbackExt eTbackExt
type of traceback extension.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
Int4 gap_x_dropoff
X-dropoff value for gapped extension (raw)
int max_hits
Maximum number of hits per area of query.
BlastHSPBestHitOptions * best_hit
Best Hit algorithm.
BlastHSPCullingOptions * culling_opts
culling algorithm
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
Int4 culling_limit
If the query range of an HSP is contained in at least this many higher-scoring HSPs,...
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
Int4 max_hsps_per_subject
Queries are paired reads, for mapping.
Int4 total_hsp_limit
Maximal total number of HSPs to keep.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 hsp_num_max
Maximal number of HSPs to save for one database sequence.
Boolean do_sum_stats
Force sum statistics to be used to combine HSPs, TRUE by default for all ungapped searches and transl...
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 min_diag_separation
How many diagonals separate a hit from a substantial alignment before it's not blocked out.
Int4 min_hit_length
optional minimum alignment length; alignments not at least this long are discarded
BlastHSPFilteringOptions * hsp_filt_opt
Contains options to configure the HSP filtering/writering structures If not set, the default HSP filt...
double percent_identity
The percent identity cut-off threshold.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Structure for keeping the query masking information.
Definition: blast_def.h:210
Int4 total_size
Total size of the BlastSeqLoc array below.
Definition: blast_def.h:218
BlastSeqLoc ** seqloc_array
Array of masked locations.
Definition: blast_def.h:231
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Boolean protein_alphabet
TRUE if alphabet_code is for a protein alphabet (e.g., ncbistdaa etc.), FALSE for nt.
Definition: blast_stat.h:178
Int4 loscore
Min.
Definition: blast_stat.h:197
double scale_factor
multiplier for all cutoff and dropoff scores
Definition: blast_stat.h:201
Int2 ambig_occupy
How many occupied?
Definition: blast_stat.h:220
Int2 ambig_size
size of array above.
Definition: blast_stat.h:219
char * name
name of scoring matrix.
Definition: blast_stat.h:183
Int2 alphabet_start
numerical value of 1st letter.
Definition: blast_stat.h:182
Int2 alphabet_size
size of alphabet.
Definition: blast_stat.h:181
Int4 penalty
penalty for mismatch in blastn.
Definition: blast_stat.h:199
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
Definition: blast_stat.h:217
Boolean read_in_matrix
If TRUE, matrix is read in, otherwise produce one from penalty and reward above.
Definition: blast_stat.h:202
Int4 hiscore
Max.
Definition: blast_stat.h:198
Int4 reward
reward for match in blastn.
Definition: blast_stat.h:200
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
double scale_factor
multiplier for all cutoff scores
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int2 penalty
Penalty for a mismatch.
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
Int2 reward
Reward for a match.
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
SSeqRange * ssr
location data on the sequence.
Definition: blast_def.h:206
struct BlastSeqLoc * next
next in linked list
Definition: blast_def.h:205
unsigned int chunk_sz
Size of the chunks to advance over the BlastSeqSrc, also size of oid_list member, this is provided to...
BlastSeqSrcItrType itr_type
Indicates which member to access: oid_list or oid_range.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
EBlastSeverity severity
severity code
Definition: blast_message.h:72
char * message
User message to be saved.
Definition: blast_message.h:73
Int4 word_size
Determines the size of the lookup table.
double threshold
Score threshold for putting words in a lookup table (fractional values are allowed,...
Int4 mb_template_type
Type of a discontiguous word template.
ELookupTableType lut_type
What kind of lookup table to construct?
Int4 mb_template_length
Length of the discontiguous words.
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Boolean information_content
request information content
Definition: blast_psi.h:182
Boolean frequency_ratios
request frequency ratios
Definition: blast_psi.h:187
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Definition: blast_psi.h:185
Boolean gapless_column_weights
request gapless column weights
Definition: blast_psi.h:188
Boolean residue_frequencies
request observed residue frequencies
Definition: blast_psi.h:183
Uint4 alphabet_size
Specifies length of alphabet.
Definition: blast_psi.h:225
double kappa
Kappa Karlin-Altschul parameter.
Definition: blast_psi.h:155
Uint4 ncols
Number of columns in PSSM (query_length)
Definition: blast_psi.h:151
double lambda
Lambda Karlin-Altschul parameter.
Definition: blast_psi.h:154
Uint4 nrows
Number of rows in PSSM (alphabet_size)
Definition: blast_psi.h:152
double h
H Karlin-Altschul parameter.
Definition: blast_psi.h:156
Uint4 num_seqs
Number of distinct sequences aligned with the query (does not include the query)
Definition: blast_psi.h:59
Uint4 query_length
Length of the query.
Definition: blast_psi.h:58
PSIMsaDimensions * dimensions
dimensions of the msa
Definition: blast_psi.h:76
Uint1 strand_option
In blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both.
char * filter_string
DEPRECATED, filtering options above.
SBlastFilterOptions * filtering_options
structured options for all filtering offered from algo/blast/core for BLAST.
Int4 genetic_code
Genetic code to use for translation, [t]blastx only.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
Boolean mask_at_hash
mask query only for lookup table creation
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
EBlastStage stage
Stage of the BLAST search currently in progress.
Definition: blast_def.h:342
void * user_data
Pointer to user-provided data.
Definition: blast_def.h:344
SDiagMessage –.
Definition: ncbidiag.hpp:1599
Options for dust algorithm, applies only to nucl.
int linker
min distance to link segments.
char * database
Nucleotide database for mini BLAST search.
Options for SEG algorithm, applies only to protein-protein comparisons.
int window
initial window to trigger further work.
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Int4 left
left endpoint of range (zero based)
Definition: blast_def.h:156
Int4 right
right endpoint of range (zero based)
Definition: blast_def.h:157
#define _ASSERT
Modified on Wed Apr 17 13:09:36 2024 by modify_doxy.py rev. 669887