NCBI C++ ToolKit
magicblast.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: magicblast.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Greg Boratyn
27  *
28  */
29 
30 /** @file magicblast.cpp
31  * Implementation of CMagicBlast.
32  */
33 
34 #include <ncbi_pch.hpp>
40 
41 #include "blast_seqalign.hpp"
42 #include "blast_aux_priv.hpp"
43 #include "../core/jumper.h"
44 
45 /** @addtogroup AlgoBlast
46  *
47  * @{
48  */
49 
52 BEGIN_SCOPE(blast);
53 
55  CRef<CLocalDbAdapter> blastdb,
57  : m_Queries(query_factory),
58  m_LocalDbAdapter(blastdb),
59  m_Options(&options->SetOptions()),
60  m_BtopSpliceSignals(true)
61 {
63  if (!env.Get("BTOP_NO_SPLICE_SIGNALS").empty()) {
64  m_BtopSpliceSignals = false;
65  }
66  x_Validate();
67 }
68 
69 
71 {
72  x_Run();
73 
74  // close HSP stream and create internal results structure
77  wrapped_results.Reset(WrapStruct(results, Blast_MappingResultsFree));
78 
80  results);
81 
82  // build and return results
83  return x_BuildSeqAlignSet(results);
84 }
85 
86 
88 {
89  x_Run();
90 
91  // close HSP stream and create internal results structure
94  wrapped_results.Reset(WrapStruct(results, Blast_MappingResultsFree));
95 
97  results);
98 
99  // build and return results
100  return x_BuildResultSet(results);
101 }
102 
103 
105 {
107  m_Options,
109  GetNumberOfThreads()));
110 
111  int status = m_PrelimSearch->CheckInternalData();
112  if (status != 0)
113  {
114  // Search was not run, but we send back an empty CSearchResultSet.
115  CRef<ILocalQueryData> local_query_data =
117 
118  vector< CConstRef<objects::CSeq_id> > seqid_vec;
119  vector< CRef<CBlastAncillaryData> > ancill_vec;
120  TSeqAlignVector sa_vec;
121  size_t index;
122  unsigned int num_subjects = 0;
125 
126  IBlastSeqInfoSrc * subject_infosrc =
128 
129  if(subject_infosrc != NULL) {
130  num_subjects = subject_infosrc->Size();
131  }
132  }
133  TSearchMessages msg_vec;
134  for (index=0; index<local_query_data->GetNumQueries(); index++)
135  {
137  local_query_data->GetSeq_loc(index)->GetId());
138 
139  TQueryMessages q_msg;
140  local_query_data->GetQueryMessages(index, q_msg);
141  msg_vec.push_back(q_msg);
142  seqid_vec.push_back(query_id);
144  sa_vec.push_back(tmp_align);
145  pair<double, double> tmp_pair(-1.0, -1.0);
146  CRef<CBlastAncillaryData> tmp_ancillary_data(
147  new CBlastAncillaryData(tmp_pair, tmp_pair,
148  tmp_pair, 0));
149 
150  ancill_vec.push_back(tmp_ancillary_data);
151 
152  for(unsigned int i=1; i < num_subjects; i++) {
153  TQueryMessages msg;
154  msg_vec.push_back(msg);
155  seqid_vec.push_back(query_id);
157  sa_vec.push_back(tmp_align);
158  CRef<CBlastAncillaryData> tmp_ancillary_data(
159  new CBlastAncillaryData(tmp_pair, tmp_pair, tmp_pair, 0));
160  ancill_vec.push_back(tmp_ancillary_data);
161  }
162  }
164 
165  // FIXME: Report search messages
166  }
167 
168  try {
170  // do mapping
172 
173  }
174  catch( CIndexedDbException & ) {
175  throw;
176  }
177  catch (CBlastException & e) {
179  throw;
180  }
181  }
182  catch (...) {
183 
184  }
185 
186  return 0;
187 }
188 
189 
191 {
192  if (m_Options.Empty()) {
193  NCBI_THROW(CBlastException, eInvalidArgument, "Missing options");
194  }
195 
196  if (m_Queries.Empty()) {
197  NCBI_THROW(CBlastException, eInvalidArgument, "Missing query");
198  }
199 
200  if (m_LocalDbAdapter.Empty()) {
201  NCBI_THROW(CBlastException, eInvalidArgument,
202  "Missing database or subject sequences");
203  }
204 }
205 
206 // Compute BTOP string and percent identity from JumperEdits structure that
207 // contains base mismatch infotmation
208 static void s_ComputeBtopAndIdentity(const HSPChain* chain,
209  string& btop,
210  string& md_tag,
211  double& perc_id,
212  bool btop_splice_signals)
213 {
214  _ASSERT(chain);
215  _ASSERT(chain->hsps->hsp);
216  const Uint1 kGap = 15;
217 
218  int num_identical = 0;
219  int len = 0;
220  int md_matches = 0;
221  HSPContainer* h = chain->hsps;
222  BlastHSP* prev = NULL;
223  for (; h; prev = h->hsp, h = h->next) {
224  const BlastHSP* hsp = h->hsp;
225  const JumperEditsBlock* hsp_edits = hsp->map_info->edits;
226 
227  if (prev) {
228  int subject_gap = hsp->subject.offset - prev->subject.end;
229  if (subject_gap > 0 &&
230  (prev->map_info->right_edge & MAPPER_SPLICE_SIGNAL) &&
232 
233  // intron
234  btop += (string)"^";
235 
236  if (btop_splice_signals) {
237  string acceptor(2u, ' ');
238  acceptor[0] = (char)tolower(BLASTNA_TO_IUPACNA[
239  (int)((prev->map_info->right_edge >> 2) & 3)]);
240  acceptor[1] = (char)tolower(BLASTNA_TO_IUPACNA[
241  (int)(prev->map_info->right_edge & 3)]);
242  btop += acceptor;
243 
244  btop += NStr::IntToString(subject_gap - 4);
245 
246  string donor(2u, ' ');
247  donor[0] = (char)tolower(BLASTNA_TO_IUPACNA[
248  (int)((hsp->map_info->left_edge >> 2) & 3)]);
249  donor[1] = (char)tolower(BLASTNA_TO_IUPACNA[
250  (int)(hsp->map_info->left_edge & 3)]);
251  btop += donor;
252  }
253  else {
254  btop += NStr::IntToString(subject_gap);
255  }
256  btop += "^";
257  }
258  else if (subject_gap > 0) {
259  // gap in subject/reference
260  btop += (string)"%" + NStr::IntToString(subject_gap) + "%";
261 
262  md_tag += (string)"!" + NStr::IntToString(subject_gap) + "!";
263  }
264 
265  int query_gap = hsp->query.offset - prev->query.end;
266  if (query_gap > 0) {
267  btop += (string)"_" + NStr::IntToString(query_gap) + "_";
268  len += hsp->query.offset - prev->query.end;
269  }
270  else if (query_gap < 0) {
271  btop += (string)"(" + NStr::IntToString(-query_gap) + ")";
272  }
273  }
274 
275  int query_pos = hsp->query.offset;
276  int num_matches = 0;
277  for (int i=0;i < hsp_edits->num_edits;i++) {
278  num_matches = hsp_edits->edits[i].query_pos - query_pos;
279  query_pos += num_matches;
280 
281  _ASSERT(num_matches >= 0);
282  num_identical += num_matches;
283  if (num_matches > 0) {
284  btop += NStr::IntToString(num_matches);
285  }
286 
287  char buff[3];
288  buff[0] = BLASTNA_TO_IUPACNA[(int)hsp_edits->edits[i].query_base];
289  buff[1] = BLASTNA_TO_IUPACNA[(int)hsp_edits->edits[i].subject_base];
290  buff[2] = 0;
291  btop += (string)buff;
292 
293  // assemble SAM MD tag
294  if (buff[1] != '-') {
295  if (i > 0 && hsp_edits->edits[i - 1].query_base == kGap &&
296  buff[0] == '-' && num_matches == 0) {
297 
298  md_tag += (string)(&buff[1]);
299  }
300  else {
301  md_tag += NStr::IntToString(num_matches + md_matches);
302  if (buff[0] == '-') {
303  md_tag += "^";
304  }
305  md_tag += (string)(&buff[1]);
306  md_matches = 0;
307  }
308  }
309  else {
310  md_matches += num_matches;
311  }
312 
313  len++;
314  if (hsp_edits->edits[i].query_base != kGap) {
315  query_pos++;
316  }
317  }
318  num_matches = hsp->query.end - query_pos;
319  num_identical += num_matches;
320  if (num_matches > 0) {
321  btop += NStr::IntToString(num_matches);
322  md_matches += num_matches;
323  }
324  }
325  if (md_matches > 0) {
326  md_tag += NStr::IntToString(md_matches);
327  }
328  len += num_identical;
329 
330  perc_id = (double)(num_identical * 100) / (double)len;
331 }
332 
333 
335  CRef<ILocalQueryData>& qdata,
336  CRef<IBlastSeqInfoSrc>& seqinfo_src,
337  const BlastQueryInfo* query_info,
338  bool btop_splice_signals)
339 {
340  CRef<CSeq_align> align(new CSeq_align);
342  align->SetDim(2);
343 
344  int query_index = chain->context / NUM_STRANDS;
345  CConstRef<CSeq_loc> query_loc = qdata->GetSeq_loc(query_index);
346  CRef<CSeq_id> query_id(new CSeq_id);
347  SerialAssign(*query_id, CSeq_loc_CI(*query_loc).GetSeq_id());
348  _ASSERT(query_id);
349  TSeqPos query_length = static_cast<TSeqPos>(qdata->GetSeqLength(query_index));
350 
351  CRef<CSeq_id> subject_id;
352  TSeqPos subj_length;
353  GetSequenceLengthAndId(seqinfo_src, chain->oid, CSeq_id::BlastRank,
354  subject_id, &subj_length);
355 
356 
357  MakeSplicedSeg(align->SetSegs().SetSpliced(), query_id, subject_id,
358  query_length, chain);
359 
360  // alignment score
362 
363  // user objec stores auxiliary information needed for various output
364  // formats
365  CRef<CUser_object> user_object(new CUser_object);
366  user_object->SetType().SetStr("Mapper Info");
367  align->SetExt().push_back(user_object);
368  // for SAM
369  // context is needed mostly for printing query sequences, mostly for
370  // convinience and fast lookup
371  user_object->AddField("context", chain->context);
372  user_object->AddField("num_hits", chain->count);
373 
374  // for tabular
375  string btop;
376  string md_tag;
377  double perc_id;
378  s_ComputeBtopAndIdentity(chain, btop, md_tag, perc_id,
379  btop_splice_signals);
380  user_object->AddField("btop", btop);
381  user_object->AddField("md_tag", md_tag);
383  perc_id);
384 
385  // to diffierentiate between the first and last segment of a paired read
386  // if sequence ids cannot be trusted
387  user_object->AddField("segment",
388  query_info->contexts[chain->context].segment_flags);
389 
390  return align;
391 }
392 
394  const HSPChain* chains,
395  CRef<ILocalQueryData> qdata,
396  CRef<IBlastSeqInfoSrc> seqinfo_src,
397  const BlastQueryInfo* query_info,
398  bool btop_splice_signals)
399 {
401 
402  // single spliced alignment
403  for (const HSPChain* chain = chains; chain; chain = chain->next) {
404 
405  // mate pairs are processed together when the first one is
406  // encountered, so skip the second of the pair
407  if (chain->pair && chain->context > chain->pair->context) {
408  continue;
409  }
410 
411  CRef<CSeq_align> align;
412 
413  // pairs are reported as disc seg alignment composed of two
414  // spliced segs
415  if (chain->pair) {
416  align.Reset(new CSeq_align);
418  align->SetDim(2);
419 
420  CSeq_align::TSegs::TDisc& disc = align->SetSegs().SetDisc();
421  disc.Set().push_back(s_CreateSeqAlign(chain, qdata, seqinfo_src,
422  query_info,
423  btop_splice_signals));
424  disc.Set().push_back(s_CreateSeqAlign(chain->pair, qdata,
425  seqinfo_src, query_info,
426  btop_splice_signals));
427  }
428  else {
429  align = s_CreateSeqAlign(chain, qdata, seqinfo_src, query_info,
430  btop_splice_signals);
431  }
432 
433  seq_aligns->Set().push_back(align);
434  }
435 
436  return seq_aligns;
437 }
438 
439 
441  const BlastMappingResults* results)
442 {
443  TSeqAlignVector aligns;
444  aligns.reserve(results->num_queries);
445 
447  query_ids.reserve(results->num_queries);
448 
451  CRef<IBlastSeqInfoSrc> seqinfo_src;
452  seqinfo_src.Reset(m_LocalDbAdapter->MakeSeqInfoSrc());
453  _ASSERT(seqinfo_src);
454 
455  _ASSERT(results->num_queries == (int)query_data->GetNumQueries());
456 
458  for (int index=0;index < results->num_queries;index++) {
459  HSPChain* chains = results->chain_array[index];
460  CRef<CSeq_align_set> seq_aligns(x_CreateSeqAlignSet(chains, query_data,
461  seqinfo_src,
462  query_info,
464 
465  for (auto it: seq_aligns->Get()) {
466  retval->Set().push_back(it);
467  }
468  }
469 
470  return retval;
471 }
472 
473 // paired alignments go first
476  return a->GetSegs().IsDisc() && !b->GetSegs().IsDisc();
477  }
478 };
479 
480 
482  const BlastMappingResults* results)
483 {
485  CRef<IBlastSeqInfoSrc> seqinfo_src;
486  seqinfo_src.Reset(m_LocalDbAdapter->MakeSeqInfoSrc());
487  _ASSERT(seqinfo_src);
488 
490  _ASSERT(results->num_queries == (int)query_data->GetNumQueries());
491 
492  const TSeqLocInfoVector& query_masks = m_PrelimSearch->GetQueryMasks();
493 
495  retval->reserve(results->num_queries);
496 
497  for (int index=0;index < results->num_queries;index++) {
498  HSPChain* chains = results->chain_array[index];
499  CConstRef<CSeq_id> query_id(query_data->GetSeq_loc(index)->GetId());
500  CRef<CSeq_align_set> aligns(x_CreateSeqAlignSet(chains, query_data,
501  seqinfo_src,
502  query_info,
504 
505  int query_length =
506  query_info->contexts[index * NUM_STRANDS].query_length;
508 
509  if (query_info->contexts[index * NUM_STRANDS].segment_flags ==
510  eFirstSegment) {
511 
512  _ASSERT(query_info->contexts[(index + 1) * NUM_STRANDS]
514 
515  CConstRef<CSeq_id> mate_id(
516  query_data->GetSeq_loc(index + 1)->GetId());
517 
518  int mate_length =
519  query_info->contexts[(index + 1) * NUM_STRANDS].query_length;
520 
521  chains = results->chain_array[index + 1];
522  CRef<CSeq_align_set> mate_aligns(x_CreateSeqAlignSet(chains,
523  query_data,
524  seqinfo_src,
525  query_info,
527 
528  for (auto it: mate_aligns->Get()) {
529  aligns->Set().push_back(it);
530  }
531 
532  // sort results so that pairs go first
533  aligns->Set().sort(seq_align_pairs_first());
534 
535  res.Reset(new CMagicBlastResults(query_id, mate_id, aligns,
536  &query_masks[index],
537  &query_masks[index + 1],
538  query_length,
539  mate_length));
540  index++;
541  }
542  else {
543  res.Reset(new CMagicBlastResults(query_id, aligns,
544  &query_masks[index],
545  query_length));
546  }
547 
548  retval->push_back(res);
549  }
550 
551  return retval;
552 }
553 
554 
556  CConstRef<CSeq_id> mate_id,
557  CRef<CSeq_align_set> aligns,
558  const TMaskedQueryRegions* query_mask /* = NULL */,
559  const TMaskedQueryRegions* mate_mask /* = NULL */,
560  int query_length /* = 0 */,
561  int mate_length /* = 0 */)
562  : m_QueryId(query_id),
563  m_MateId(mate_id),
564  m_Aligns(aligns),
565  m_Paired(true)
566 {
567  x_SetInfo(query_length, query_mask, mate_length, mate_mask);
568 }
569 
570 
572  CRef<CSeq_align_set> aligns,
573  const TMaskedQueryRegions* query_mask /* = NULL */,
574  int query_length /* = 0 */)
575  : m_QueryId(query_id),
576  m_Aligns(aligns),
577  m_Paired(false)
578 {
579  x_SetInfo(query_length, query_mask);
580 }
581 
582 
583 // Sort alignments so that paired, forward-reversed alignments are first
585 {
587  {
588  if (a->GetSegs().IsDisc() && b->GetSegs().IsDisc()) {
589  const CSeq_align& a_first = *a->GetSegs().GetDisc().Get().front();
590  const CSeq_align& a_second = *a->GetSegs().GetDisc().Get().back();
591  const CSeq_align& b_first = *b->GetSegs().GetDisc().Get().front();
592  const CSeq_align& b_second = *b->GetSegs().GetDisc().Get().back();
593 
594  if (a_first.GetSeqStrand(0) == eNa_strand_plus &&
595  a_second.GetSeqStrand(0) == eNa_strand_minus &&
596  a_first.GetSeqStart(1) <= a_second.GetSeqStart(1) &&
597  (b_first.GetSeqStrand(0) != eNa_strand_plus ||
598  b_second.GetSeqStrand(0) != eNa_strand_minus ||
599  b_first.GetSeqStart(1) > b_second.GetSeqStart(1))) {
600 
601  return true;
602  }
603 
604  return false;
605  }
606 
607  return (a->GetSegs().IsDisc() && !b->GetSegs().IsDisc());
608  }
609 };
610 
611 
612 // Sort alignments so that paired, reversed-forward alignments are first
614 {
616  {
617  if (a->GetSegs().IsDisc() && b->GetSegs().IsDisc()) {
618  const CSeq_align& a_first = *a->GetSegs().GetDisc().Get().front();
619  const CSeq_align& a_second = *a->GetSegs().GetDisc().Get().back();
620  const CSeq_align& b_first = *b->GetSegs().GetDisc().Get().front();
621  const CSeq_align& b_second = *b->GetSegs().GetDisc().Get().back();
622 
623  if (a_first.GetSeqStrand(0) == eNa_strand_minus &&
624  a_second.GetSeqStrand(0) == eNa_strand_plus &&
625  a_second.GetSeqStart(1) <= a_first.GetSeqStart(1) &&
626  (b_first.GetSeqStrand(0) != eNa_strand_minus ||
627  b_second.GetSeqStrand(0) != eNa_strand_plus ||
628  b_second.GetSeqStart(1) > b_first.GetSeqStart(1))) {
629 
630  return true;
631  }
632 
633  return false;
634  }
635 
636  return (a->GetSegs().IsDisc() && !b->GetSegs().IsDisc());
637  }
638 };
639 
641 {
642  if (order == eFwRevFirst) {
644  }
645  else {
647  }
648 }
649 
650 
651 void CMagicBlastResults::x_SetInfo(int first_length,
652  const TMaskedQueryRegions* first_mask,
653  int last_length /* = 0 */,
654  const TMaskedQueryRegions* last_mask /* = NULL */)
655 {
656  m_FirstInfo = 0;
657  m_LastInfo = 0;
658  m_Concordant = false;
659 
660  bool first_aligned = false;
661  bool last_aligned = false;
662 
663  if (!m_Paired) {
664  first_aligned = !m_Aligns->Get().empty();
665  m_Concordant = true;
666  }
667  else {
668 
669  for (auto it: m_Aligns->Get()) {
670  if (it->GetSegs().IsDisc()) {
671  first_aligned = true;
672  last_aligned = true;
673 
674  const CSeq_align_set::Tdata& sasd =
675  it->GetSegs().GetDisc().Get();
676  ASSERT(sasd.size() == 2);
677 
678  CRef<CSeq_align> sa_q = sasd.front();
679  ENa_strand str_q = sa_q->GetSeqStrand(0);
680  TSeqPos sp_q = sa_q->GetSeqStart(1);
681 
682  CRef<CSeq_align> sa_m = sasd.back();
683  ENa_strand str_m = sa_m->GetSeqStrand(0);
684  TSeqPos sp_m = sa_m->GetSeqStart(1);
685 
686  if (str_q == eNa_strand_plus
687  && str_m == eNa_strand_minus) {
688  if (sp_q <= sp_m) {
689  m_Concordant = true;
690  }
691  } else if (str_q == eNa_strand_minus
692  && str_m == eNa_strand_plus) {
693  if (sp_q >= sp_m) {
694  m_Concordant = true;
695  }
696  }
697 
698  break;
699  }
700  else if (it->GetSeq_id(0).Match(*m_QueryId)) {
701  first_aligned = true;
702  }
703  else if (it->GetSeq_id(0).Match(*m_MateId)) {
704  last_aligned = true;
705  }
706  }
707  }
708 
709  if (!first_aligned) {
711  }
712 
713  if (!last_aligned) {
715  }
716 
717  if (first_mask && !first_mask->empty()) {
718  TSeqRange first_range(*first_mask->front());
719  if (first_range.GetLength() + 1 >= (TSeqPos)first_length) {
721  }
722  }
723 
724  if (last_mask && !last_mask->empty()) {
725  TSeqRange last_range(*last_mask->front());
726  if (last_range.GetLength() + 1 >= (TSeqPos)last_length) {
728  }
729  }
730 }
731 
733 {
735 
736  for (auto result: *this) {
737 
738  if (no_discordant && result->IsPaired() && !result->IsConcordant()) {
739  continue;
740  }
741 
742  for (auto it: result->GetSeqAlign()->Get()) {
743  retval->Set().push_back(it);
744  }
745  }
746 
747  return retval;
748 }
749 
750 
751 END_SCOPE(blast)
753 
754 /* @} */
Auxiliary functions for BLAST.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
Definition: blast_def.h:93
void BlastHSPStreamMappingClose(BlastHSPStream *hsp_stream, BlastMappingResults *results)
Closes BlastHSPStream structure for mapping and produces BlastMappingResults.
@ eFirstSegment
The first sequence of a pair with both sequences read and accepted.
@ eLastSegment
Definition of classes which constitute the results of running a BLAST search.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
Class used to return ancillary data from a blast search, i.e.
Defines BLAST error codes (user errors included)
Search class to perform the preliminary stage of the BLAST search.
Index wrapper exceptions.
Results of Magic-BLAST mapping.
Definition: magicblast.hpp:241
Magic-BLAST results for a single query/read or a pair of reads.
Definition: magicblast.hpp:137
CNcbiEnvironment –.
Definition: ncbienv.hpp:104
@ eScore_PercentIdentity_Gapped
Definition: Seq_align.hpp:163
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
int CheckInternalData()
Checks that internal data is valid.
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
Definition: magicblast.cpp:615
CRef< CLocalDbAdapter > m_LocalDbAdapter
Reference to a BLAST subject/database object.
Definition: magicblast.hpp:116
bool m_Concordant
True if results are concordant pair.
Definition: magicblast.hpp:229
CRef< IQueryFactory > m_Queries
Queries.
Definition: magicblast.hpp:113
bool m_Paired
True if results are for paired reads.
Definition: magicblast.hpp:226
CRef< SInternalData > m_InternalData
Internal data strctures.
Definition: magicblast.hpp:125
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
Definition: magicblast.cpp:586
CRef< SInternalData > Run()
Borrow the internal data and results results.
CRef< CMagicBlastResultSet > RunEx(void)
Definition: magicblast.cpp:87
void x_SetInfo(int first_length, const TMaskedQueryRegions *first_masks, int last_length=0, const TMaskedQueryRegions *last_masks=NULL)
Definition: magicblast.cpp:651
CStructWrapper< TData > * WrapStruct(TData *obj, TData *(*del)(TData *))
Auxiliary function to create a CStructWrapper for a pointer to an object.
void SortAlignments(EOrdering order)
Sort alignments by selected criteria (pair configuration)
Definition: magicblast.cpp:640
CRef< CSeq_align_set > x_BuildSeqAlignSet(const BlastMappingResults *results)
Definition: magicblast.cpp:440
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
vector< CConstRef< objects::CSeq_id > > TQueryIdVector
List of query ids.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
Definition: query_data.cpp:52
bool IsBlastDb() const
Returns true if this object represents a BLAST database.
virtual void SetNumberOfThreads(size_t nthreads)
@inheritDoc
void x_Validate(void)
Perform sanity checks on input arguments.
Definition: magicblast.cpp:190
static CRef< CSeq_align > s_CreateSeqAlign(const HSPChain *chain, CRef< ILocalQueryData > &qdata, CRef< IBlastSeqInfoSrc > &seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
Definition: magicblast.cpp:334
static CRef< CSeq_align_set > x_CreateSeqAlignSet(const HSPChain *results, CRef< ILocalQueryData > qdata, CRef< IBlastSeqInfoSrc > seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
Create results.
Definition: magicblast.cpp:393
CRef< CSeq_align_set > GetFlatResults(bool no_discordant=false)
Get all results as a single Seq-align-set object.
Definition: magicblast.cpp:732
CConstRef< CSeq_id > m_QueryId
Query id.
Definition: magicblast.hpp:217
void Combine(const TSearchMessages &other_msgs)
Combine another set of search messages with this one.
Definition: blast_aux.cpp:1028
bool m_BtopSpliceSignals
Should BTOP strings be formatted with splice signals.
Definition: magicblast.hpp:131
EOrdering
Ordering of alignments.
Definition: magicblast.hpp:152
IBlastSeqInfoSrc * MakeSeqInfoSrc()
Retrieves or constructs the IBlastSeqInfoSrc.
BlastQueryInfo * m_QueryInfo
The query information structure.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
Definition: magicblast.cpp:475
CConstRef< CSeq_id > m_MateId
Mate id if results are for paired reads.
Definition: magicblast.hpp:220
virtual size_t GetNumQueries()=0
Get the number of queries.
int x_Run(void)
Definition: magicblast.cpp:104
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
TResultsInfo m_FirstInfo
Alignment flags for the query.
Definition: magicblast.hpp:232
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void GetQueryMessages(size_t index, TQueryMessages &qmsgs)
Retrieve error/warning messages for a specific query.
Definition: query_data.cpp:135
CRef< CSeq_align_set > m_Aligns
Alignments for a single or a pair of reads.
Definition: magicblast.hpp:223
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
CMagicBlast(CRef< IQueryFactory > query_factory, CRef< CLocalDbAdapter > blastdb, CRef< CMagicBlastOptionsHandle > options)
Constructor to map short reads as queries to a genome as BLAST database.
Definition: magicblast.cpp:54
static void s_ComputeBtopAndIdentity(const HSPChain *chain, string &btop, string &md_tag, double &perc_id, bool btop_splice_signals)
Definition: magicblast.cpp:208
CRef< CBlastPrelimSearch > m_PrelimSearch
Object that runs BLAST search.
Definition: magicblast.hpp:122
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
CRef< CBlastOptions > m_Options
Options to configure the search.
Definition: magicblast.hpp:119
TSearchMessages GetSearchMessages() const
Retrieve any error/warning messages that occurred during the search.
const TSeqLocInfoVector & GetQueryMasks(void) const
Return query masks.
CRef< CSeq_align_set > Run(void)
Run the RNA-Seq mapping.
Definition: magicblast.cpp:70
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
CMagicBlastResults(CConstRef< CSeq_id > query_id, CConstRef< CSeq_id > mate_id, CRef< CSeq_align_set > aligns, const TMaskedQueryRegions *query_mask=NULL, const TMaskedQueryRegions *mate_mask=NULL, int query_length=0, int mate_length=0)
Constructor for a pair.
Definition: magicblast.cpp:555
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
CRef< CMagicBlastResultSet > x_BuildResultSet(const BlastMappingResults *results)
Definition: magicblast.cpp:481
TResultsInfo m_LastInfo
Alignment flags for the mate.
Definition: magicblast.hpp:235
bool IsDbScanMode() const
Returns true if this is not a database but is database scanning mode.
@ eCoreBlastError
FIXME: need to interpret CORE errors.
@ fUnaligned
Read is unaligned.
Definition: magicblast.hpp:143
@ fFiltered
Read did not pass quality filtering.
Definition: magicblast.hpp:146
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
Definition: serialbase.hpp:482
static int BlastRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:750
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
position_type GetLength(void) const
Definition: range.hpp:158
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TExt & SetExt(void)
Assign a value to Ext data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define MAPPER_SPLICE_SIGNAL
Definition: jumper.h:233
int i
int len
Declares CMagicBlast, the C++ API for the BLAST RNA-Seq mapping engine.
unsigned int a
Definition: ncbi_localip.c:102
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
int tolower(Uchar c)
Definition: ncbictype.hpp:72
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
BlastMappingResults * Blast_MappingResultsFree(BlastMappingResults *results)
Free BlastMappingResults structure.
Definition: spliced_hits.c:170
BlastMappingResults * Blast_MappingResultsNew(void)
Initialize BlastMappingResults structure.
Definition: spliced_hits.c:164
Int4 query_length
Length of this query, strand or frame.
Int4 segment_flags
Flags describing segments for paired reads.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
Definition: blast_hits.h:116
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Definition: blast_hits.h:114
Structure holding all information about an HSP.
Definition: blast_hits.h:126
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
BlastHSPMappingInfo * map_info
Definition: blast_hits.h:146
Structure that contains BLAST mapping results.
Definition: spliced_hits.h:91
HSPChain ** chain_array
Definition: spliced_hits.h:93
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
A chain of HSPs: spliced alignment.
Definition: spliced_hits.h:60
Int4 score
Alignment score for the chain.
Definition: spliced_hits.h:63
Int4 count
Number of placements for the read.
Definition: spliced_hits.h:66
HSPContainer * hsps
A list of HSPs that belong to this chain.
Definition: spliced_hits.h:64
Int4 oid
Subject oid.
Definition: spliced_hits.h:62
Int4 context
Contex number of query sequence.
Definition: spliced_hits.h:61
struct HSPChain * next
Pointer to the next chain in a list.
Definition: spliced_hits.h:73
struct HSPContainer * next
Definition: spliced_hits.h:45
BlastHSP * hsp
Definition: spliced_hits.h:44
Uint1 query_base
Query base at this position.
Definition: jumper.h:89
Uint1 subject_base
Subject base at this position.
Definition: jumper.h:90
Int4 query_pos
Query position.
Definition: jumper.h:88
Alignment edit script for gapped alignment.
Definition: jumper.h:96
JumperEdit * edits
Definition: jumper.h:97
Int4 num_edits
Definition: jumper.h:98
#define _ASSERT
else result
Definition: token2.c:20
static HENV env
Definition: transaction2.c:38
Modified on Tue Dec 05 02:02:37 2023 by modify_doxy.py rev. 669887