NCBI C++ ToolKit
blast_seqalign.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_seqalign.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 * ===========================================================================
29 */
30 
31 /// @file blast_seqalign.cpp
32 /// Utility function to convert internal BLAST result structures into
33 /// CSeq_align_set objects.
34 
35 #include <ncbi_pch.hpp>
36 #include "blast_seqalign.hpp"
37 
45 #include <serial/iterator.hpp>
47 #include "../core/jumper.h"
48 
49 #include <algorithm>
50 
51 /** @addtogroup AlgoBlast
52  *
53  * @{
54  */
55 
58 BEGIN_SCOPE(blast)
59 
60 #ifndef SMALLEST_EVALUE
61 /// Threshold below which e-values are saved as 0
62 #define SMALLEST_EVALUE 1.0e-180
63 #endif
64 #ifndef GAP_VALUE
65 /// Value in the Dense-seg indicating a gap
66 #define GAP_VALUE -1
67 #endif
68 
69 /// BLAST alignments have always 2 dimensions (i.e.: Pairwise alignment is 2
70 /// dimensional)
71 static const TSeqPos kBlastAlignmentDim = 2;
72 
73 /// Converts a frame into the appropriate strand
74 static ENa_strand
75 s_Frame2Strand(short frame)
76 {
77  if (frame > 0)
78  return eNa_strand_plus;
79  else if (frame < 0)
80  return eNa_strand_minus;
81  else
82  return eNa_strand_unknown;
83 }
84 
85 /// Advances position in a sequence, according to an edit script instruction.
86 /// @param pos Current position on input, next position on output [in] [out]
87 /// @param pos2advance How much the position should be advanced? [in]
88 /// @return Current position.
89 static int
90 s_GetCurrPos(int& pos, int pos2advance)
91 {
92  int retval;
93 
94  if (pos < 0) /// @todo FIXME: is this condition possible?
95  retval = -(pos + pos2advance - 1);
96  else
97  retval = pos;
98  pos += pos2advance;
99  return retval;
100 }
101 
102 /// Finds the starting position of a sequence segment in an alignment, given an
103 /// editing script.
104 /// @param curr_pos Current position on input, modified to next position on
105 /// output [in] [out]
106 /// @param num number of letters specified by traceback edit [in]
107 /// @param strand Sequence strand [in]
108 /// @param translate Is sequence translated? [in]
109 /// @param length Sequence length [in]
110 /// @param original_length Original (nucleotide) sequence length, if it is
111 /// translated [in]
112 /// @param frame Translating frame [in]
113 /// @return Start position of the current alignment segment.
114 static TSeqPos
115 s_GetAlignmentStart(int& curr_pos, int num,
116  ENa_strand strand, bool translate, int length, int original_length,
117  short frame)
118 {
119  TSeqPos retval;
120 
121  if (strand == eNa_strand_minus) {
122 
123  if (translate)
124  retval = original_length -
125  CODON_LENGTH*(s_GetCurrPos(curr_pos, num) + num)
126  + frame + 1;
127  else
128  retval = length - s_GetCurrPos(curr_pos, num) - num;
129 
130  } else {
131 
132  if (translate)
133  retval = frame - 1 + CODON_LENGTH*s_GetCurrPos(curr_pos, num);
134  else
135  retval = s_GetCurrPos(curr_pos, num);
136 
137  }
138 
139  return retval;
140 }
141 
142 /// Finds length of a protein frame given a nucleotide length and a frame
143 /// number.
144 /// @param nuc_length Nucleotide sequence length [in]
145 /// @param frame Translation frame [in]
146 /// @return Length of the translated sequence.
147 static Int4
149 {
150  return (nuc_length - (ABS(frame)-1)%CODON_LENGTH) / CODON_LENGTH;
151 }
152 
153 /// Fills vectors of start positions, lengths and strands for all alignment segments.
154 /// Note that even though the edit_block is passed in, data for seqalign is
155 /// collected from the esp_head argument for nsegs segments. This editing script may
156 /// not be the full editing scripg if a discontinuous alignment is being built.
157 /// @param hsp HSP structure containing traceback information. [in]
158 /// @param esp Traceback edit script [in]
159 /// @param first first element of GapEditScript to use [in]
160 /// @param nsegs Number of alignment segments [in]
161 /// @param starts Vector of starting positions to fill [out]
162 /// @param lengths Vector of segment lengths to fill [out]
163 /// @param strands Vector of segment strands to fill [out]
164 /// @param query_length Length of query sequence [in]
165 /// @param subject_length Length of subject sequence [in]
166 /// @param translate1 Is query translated? [in]
167 /// @param translate2 Is subject translated? [in]
168 static void
170  unsigned int first, unsigned int nsegs,
171  CDense_seg::TStarts& starts,
172  CDense_seg::TLens& lengths,
173  CDense_seg::TStrands& strands,
174  Int4 query_length, Int4 subject_length,
175  bool translate1, bool translate2)
176 {
177  _ASSERT(hsp != NULL);
178 
179  ENa_strand m_strand, s_strand; // strands of alignment
180  TSignedSeqPos m_start, s_start; // running starts of alignment
181  int start1 = hsp->query.offset; // start of alignment on master sequence
182  int start2 = hsp->subject.offset; // start of alignment on slave sequence
183  int length1 = query_length;
184  int length2 = subject_length;
185 
186  lengths.reserve(nsegs);
187  starts.reserve(nsegs*kBlastAlignmentDim);
188  strands.reserve(nsegs*kBlastAlignmentDim);
189 
190  if (translate1)
191  length1 = s_GetProteinFrameLength(length1, hsp->query.frame);
192  if (translate2)
193  length2 = s_GetProteinFrameLength(length2, hsp->subject.frame);
194 
195  m_strand = s_Frame2Strand(hsp->query.frame);
196  s_strand = s_Frame2Strand(hsp->subject.frame);
197 
198  for (unsigned int esp_index = first; esp_index< (unsigned int)esp->size && esp_index < (unsigned int)(first+nsegs); esp_index++) {
199  switch (esp->op_type[esp_index]) {
200  case eGapAlignDecline:
201  case eGapAlignSub:
202  m_start =
203  s_GetAlignmentStart(start1, esp->num[esp_index], m_strand, translate1, length1,
204  query_length, hsp->query.frame);
205 
206  s_start =
207  s_GetAlignmentStart(start2, esp->num[esp_index], s_strand, translate2, length2,
208  subject_length, hsp->subject.frame);
209 
210  strands.push_back(m_strand);
211  strands.push_back(s_strand);
212  starts.push_back(m_start);
213  starts.push_back(s_start);
214  break;
215 
216  // Insertion on the master sequence (gap on slave)
217  case eGapAlignIns:
218  m_start =
219  s_GetAlignmentStart(start1, esp->num[esp_index], m_strand, translate1, length1,
220  query_length, hsp->query.frame);
221 
222  s_start = GAP_VALUE;
223 
224  strands.push_back(m_strand);
225  strands.push_back(esp_index == 0 ? eNa_strand_unknown : s_strand);
226  starts.push_back(m_start);
227  starts.push_back(s_start);
228  break;
229 
230  // Deletion on master sequence (gap; insertion on slave)
231  case eGapAlignDel:
232  m_start = GAP_VALUE;
233 
234  s_start =
235  s_GetAlignmentStart(start2, esp->num[esp_index], s_strand, translate2, length2,
236  subject_length, hsp->subject.frame);
237 
238  strands.push_back(esp_index == 0 ? eNa_strand_unknown : m_strand);
239  strands.push_back(s_strand);
240  starts.push_back(m_start);
241  starts.push_back(s_start);
242  break;
243 
244  default:
245  break;
246  }
247 
248  lengths.push_back(esp->num[esp_index]);
249  }
250 
251  // Make sure the vectors have the right size
252  if (lengths.size() != nsegs)
253  lengths.resize(nsegs);
254 
255  if (starts.size() != nsegs*2)
256  starts.resize(nsegs*2);
257 
258  if (strands.size() != nsegs*2)
259  strands.resize(nsegs*2);
260 }
261 
262 /// Creates a Dense-seg object from the starts, lengths and strands vectors and two
263 /// Seq-ids.
264 /// @param dense_seg the object to populate [in|out]
265 /// @param master Query Seq-id [in]
266 /// @param slave Subject Seq-ids [in]
267 /// @param starts Vector of start positions for alignment segments [in]
268 /// @param lengths Vector of alignment segments lengths [in]
269 /// @param strands Vector of alignment segments strands [in]
270 /// @return The Dense-seg object.
271 static void
273  CRef<CSeq_id> master, CRef<CSeq_id> slave,
274  CDense_seg::TStarts& starts,
275  CDense_seg::TLens& lengths,
276  CDense_seg::TStrands& strands)
277 {
278  _ASSERT(master);
279  _ASSERT(slave);
280 
281  dense_seg.SetDim(kBlastAlignmentDim);
282 
283  // Set the sequence ids
284  CDense_seg::TIds & ids = dense_seg.SetIds();
285  ids.reserve(kBlastAlignmentDim);
286 
287  ids.push_back(master);
288  ids.push_back(slave);
289 
290  dense_seg.SetNumseg((int) lengths.size());
291  dense_seg.SetLens().swap(lengths);
292  dense_seg.SetStrands().swap(strands);
293  dense_seg.SetStarts().swap(starts);
294 }
295 
296 /// Creates a Std-seg object from the starts, lengths and strands vectors and two
297 /// Seq-ids for a translated search.
298 /// @param master Query Seq-id [in]
299 /// @param slave Subject Seq-ids [in]
300 /// @param starts Vector of start positions for alignment segments [in]
301 /// @param lengths Vector of alignment segments lengths [in]
302 /// @param strands Vector of alignment segments strands [in]
303 /// @param translate_master Is query sequence translated? [in]
304 /// @param translate_slave Is subject sequenec translated? [in]
305 /// @return The Std-seg object.
308  CDense_seg::TStarts& starts,
309  CDense_seg::TLens& lengths,
310  CDense_seg::TStrands& strands,
311  bool translate_master,
312  bool translate_slave)
313 {
314  _ASSERT(master);
315  _ASSERT(slave);
316 
318  int nsegs = (int) lengths.size(); // number of segments in alignment
319  TSignedSeqPos m_start, m_stop; // start and stop for master sequence
320  TSignedSeqPos s_start, s_stop; // start and stop for slave sequence
321 
322  for (int i = 0; i < nsegs; i++) {
323  CRef<CStd_seg> std_seg(new CStd_seg());
324  CRef<CSeq_loc> master_loc(new CSeq_loc());
325  CRef<CSeq_loc> slave_loc(new CSeq_loc());
326 
327  std_seg->SetDim(kBlastAlignmentDim);
328 
329  // Set master seqloc
330  if ( (m_start = starts[2*i]) != GAP_VALUE) {
331  master_loc->SetInt().SetId(*master);
332  master_loc->SetInt().SetFrom(m_start);
333  if (translate_master)
334  m_stop = m_start + CODON_LENGTH*lengths[i] - 1;
335  else
336  m_stop = m_start + lengths[i] - 1;
337  master_loc->SetInt().SetTo(m_stop);
338  master_loc->SetInt().SetStrand(strands[2*i]);
339  } else {
340  master_loc->SetEmpty(*master);
341  }
342 
343  // Set slave seqloc
344  if ( (s_start = starts[2*i+1]) != GAP_VALUE) {
345  slave_loc->SetInt().SetId(*slave);
346  slave_loc->SetInt().SetFrom(s_start);
347  if (translate_slave)
348  s_stop = s_start + CODON_LENGTH*lengths[i] - 1;
349  else
350  s_stop = s_start + lengths[i] - 1;
351  slave_loc->SetInt().SetTo(s_stop);
352  slave_loc->SetInt().SetStrand(strands[2*i+1]);
353  } else {
354  slave_loc->SetEmpty(*slave);
355  }
356 
357  std_seg->SetIds().reserve(kBlastAlignmentDim);
358  std_seg->SetLoc().reserve(kBlastAlignmentDim);
359  std_seg->SetIds().push_back(master);
360  std_seg->SetIds().push_back(slave);
361  std_seg->SetLoc().push_back(master_loc);
362  std_seg->SetLoc().push_back(slave_loc);
363 
364  retval.push_back(std_seg);
365  }
366 
367  return retval;
368 }
369 
370 /// Checks if any decline-to-align segments immediately follow an insertion or
371 /// deletion, and swaps any such segments so indels are always to the right of
372 /// the decline-to-align segments.
373 /// @param hsp HSP structure, containint traceback [in] [out]
374 static void
376 {
377  GapEditScript* esp = hsp->gap_info;
378  for (int index=0; index<esp->size; index++)
379  {
380  // if GAPALIGN_DECLINE immediately follows an insertion or deletion
381  if (index > 0 && esp->op_type[index] == eGapAlignDecline &&
382  (esp->op_type[index-1] == eGapAlignIns || esp->op_type[index-1] == eGapAlignDel))
383  {
384  /* This is invalid condition and regions should be
385  exchanged */
386  int temp_num = esp->num[index];
387  EGapAlignOpType temp_op = esp->op_type[index];
388 
389  esp->num[index] = esp->num[index-1];
390  esp->op_type[index] = esp->op_type[index-1];
391  esp->num[index-1] = temp_num;
392  esp->op_type[index-1] = temp_op;
393  }
394  }
395  return;
396 }
397 
398 #if _DEBUG
399 static void
400 s_ValidateExon(const CSpliced_exon& exon, const CSeq_id& product_id,
401  const CSeq_id& genomic_id)
402 {
403  int product_start = exon.GetProduct_start().GetNucpos();
404  int product_end = exon.GetProduct_end().GetNucpos();
405  int genomic_start = exon.GetGenomic_start();
406  int genomic_end = exon.GetGenomic_end();
407 
408  int product_length = product_end - product_start + 1;
409  int genomic_length = genomic_end - genomic_start + 1;
410 
411  int p = 0, g = 0;
412  for (const auto& it : exon.GetParts()) {
413  switch (it->Which()) {
415  p += it->GetMatch();
416  g += it->GetMatch();
417  break;
418 
420  p += it->GetMismatch();
421  g += it->GetMismatch();
422  break;
423 
425  p += it->GetProduct_ins();
426  break;
427 
429  g += it->GetGenomic_ins();
430  break;
431 
432  default:
433  cerr << "Urecognized exon part\t" << product_id.AsFastaString()
434  << "\t" << genomic_id.AsFastaString() << endl;
435  }
436  }
437 
438  if (p != product_length) {
439  cerr << "Product\t" << product_id.AsFastaString() << "\t"
440  << exon.GetProduct_start().GetNucpos() << "\t"
441  << product_length << "\t" << p << endl;
442  }
443 
444  if (g != genomic_length) {
445  cerr << "Genomic\t" << genomic_id.AsFastaString() << "\t"
446  << exon.GetGenomic_start() << "\t"
447  << genomic_length << "\t" << g << endl;
448  }
449 
450 }
451 #endif
452 
453 void MakeSplicedSeg(CSpliced_seg& spliced_seg,
454  CRef<CSeq_id> product_id,
455  CRef<CSeq_id> genomic_id,
456  int product_length,
457  const HSPChain* chain)
458 {
459  spliced_seg.SetProduct_id(*product_id);
460  spliced_seg.SetGenomic_id(*genomic_id);
461  _ASSERT(chain->hsps);
462  int num_hsps = 0;
463  for (HSPContainer* h = chain->hsps; h; h = h->next) {
464  num_hsps++;
465  }
466  _ASSERT(num_hsps > 0);
467  ENa_strand product_strand = s_Frame2Strand(chain->hsps->hsp->query.frame);
468  ENa_strand genomic_strand = s_Frame2Strand(
469  chain->hsps->hsp->subject.frame);
470 
472  spliced_seg.SetProduct_length(product_length);
473 
474  CSpliced_seg::TExons& exons = spliced_seg.SetExons();
475  const Uint1 kGap = 15; // Gap in BLASTNA
476 
477  for (HSPContainer* h = chain->hsps; h; h = h->next) {
478  BlastHSP* hsp = h->hsp;
479  HSPContainer* last_h = h;
480  _ASSERT(hsp && last_h);
481 
482 
483  while (last_h->next &&
484  (last_h->hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) == 0) {
485 
486  last_h = last_h->next;
487  }
488 
489  BlastHSP* last_hsp = last_h->hsp;
490 
491  _ASSERT(hsp->gap_info->size > 1 ||
492  hsp->query.end - hsp->query.offset ==
493  hsp->subject.end - hsp->subject.offset);
494 
496  exon->SetProduct_start().SetNucpos(hsp->query.offset);
497  exon->SetProduct_end().SetNucpos(last_hsp->query.end - 1);
498  exon->SetGenomic_start(hsp->subject.offset);
499  exon->SetGenomic_end(last_hsp->subject.end - 1);
500 
501  exon->SetProduct_strand(product_strand);
502  exon->SetGenomic_strand(genomic_strand);
503 
504 
505  // save splice signal before next exon
508  l_bases[0] = BLASTNA_TO_IUPACNA[
509  (int)((hsp->map_info->left_edge >> 2) & 3)];
510  l_bases[1] = BLASTNA_TO_IUPACNA[
511  (int)(hsp->map_info->left_edge & 3)];
512  exon->SetAcceptor_before_exon().SetBases(l_bases);
513  }
514 
515  // save splice signal after exon
516  if (last_hsp->map_info->right_edge & MAPPER_SPLICE_SIGNAL) {
518  r_bases[0] = BLASTNA_TO_IUPACNA[
519  (int)((last_hsp->map_info->right_edge >> 2) & 3)];
520  r_bases[1] = BLASTNA_TO_IUPACNA[
521  (int)(last_hsp->map_info->right_edge & 3)];
522  exon->SetDonor_after_exon().SetBases(r_bases);
523  }
524 
525 
526  for (HSPContainer* hh=h,*prev=NULL;hh != last_h->next;
527  prev = hh, hh = hh->next) {
528 
529  int query_pos = hh->hsp->query.offset;
530  int subject_pos = hh->hsp->subject.offset;
531  int num_matches = 0;
532 
533  // record gaps between HSPs
534  if (prev) {
535 
536  _ASSERT(hh->hsp->query.offset >= prev->hsp->query.end);
537  _ASSERT(hh->hsp->subject.offset >= prev->hsp->subject.end);
538  if (hh->hsp->query.offset > prev->hsp->query.end) {
540  chunk->SetProduct_ins(hh->hsp->query.offset -
541  prev->hsp->query.end);
542 
543  exon->SetParts().push_back(chunk);
544  }
545 
546  if (hh->hsp->subject.offset > prev->hsp->subject.end) {
548  chunk->SetGenomic_ins(hh->hsp->subject.offset -
549  prev->hsp->subject.end);
550 
551  exon->SetParts().push_back(chunk);
552  }
553  }
554 
555  const JumperEditsBlock* hsp_edits = hh->hsp->map_info->edits;
556  for (int i=0;i < hsp_edits->num_edits;i++) {
557  num_matches = hsp_edits->edits[i].query_pos - query_pos;
558  query_pos += num_matches;
559  subject_pos += num_matches;
560  _ASSERT(num_matches >= 0);
561  if (num_matches > 0) {
562  // record number of matches
564  chunk->SetMatch(num_matches);
565  exon->SetParts().push_back(chunk);
566  }
567 
568  // record mismatch or gap
570  _ASSERT(hsp_edits->edits[i].query_base != kGap ||
571  hsp_edits->edits[i].subject_base != kGap);
572 
573  if (hsp_edits->edits[i].query_base == kGap) {
574  chunk->SetGenomic_ins(1);
575  subject_pos++;
576  }
577  else if (hsp_edits->edits[i].subject_base == kGap) {
578  chunk->SetProduct_ins(1);
579  query_pos++;
580  }
581  else {
582  chunk->SetMismatch(1);
583  query_pos++;
584  subject_pos++;
585  }
586 
587  exon->SetParts().push_back(chunk);
588  }
589 
590  num_matches = MAX(hh->hsp->query.end - query_pos, 0);
591  _ASSERT(hh->hsp->query.end - query_pos >= -1);
592  // an HSP may end with a mismatch or a gap, if a splice signal was
593  // found and HSP extent was updated (mapping reads to a genome)
594  _ASSERT(num_matches >= 0);
595  if (num_matches > 0) {
597  chunk->SetMatch(num_matches);
598  exon->SetParts().push_back(chunk);
599  }
600  }
601 
602 #if _DEBUG
603  s_ValidateExon(*exon, *product_id, *genomic_id);
604 #endif
605 
606 
607  exons.push_back(exon);
608  h = last_h;
609  }
610 
611 #if _DEBUG
612  spliced_seg.Validate(true);
613 #endif
614 }
615 
616 /// Creates a Seq-align for a single HSP from precalculated vectors of start
617 /// positions, lengths and strands of segments, sequence identifiers and other
618 /// information.
619 /// @param master Query sequence identifier [in]
620 /// @param slave Subject sequence identifier [in]
621 /// @param starts Start positions of alignment segments [in]
622 /// @param lengths Lengths of alignment segments [in]
623 /// @param strands Strands of alignment segments [in]
624 /// @param translate_master Is query translated? [in]
625 /// @param translate_slave Is subject translated? [in]
626 /// @return Resulting Seq-align object.
627 static CRef<CSeq_align>
629  CDense_seg::TStarts starts,
630  CDense_seg::TLens lengths,
631  CDense_seg::TStrands strands,
632  bool translate_master, bool translate_slave)
633 {
634  CRef<CSeq_align> sar(new CSeq_align());
637 
638  if (translate_master || translate_slave) {
639  sar->SetSegs().SetStd() =
640  s_CreateStdSegs(master, slave, starts, lengths, strands,
641  translate_master, translate_slave);
642  } else {
643  s_CreateDenseg(sar->SetSegs().SetDenseg(), master, slave, starts,
644  lengths, strands);
645  }
646 
647  return sar;
648 }
649 
650 /// Converts a traceback editing block to a Seq-align, provided the 2 sequence
651 /// identifiers.
652 /// @param program Type of BLAST program [in]
653 /// @param hsp Internal HSP structure [in]
654 /// @param id1 Query sequence identifier [in]
655 /// @param id2 Subject sequence identifier [in]
656 /// @param query_length Length of query sequence [in]
657 /// @param subject_length Length of subject sequence [in]
658 /// @return Resulting Seq-align object.
659 static CRef<CSeq_align>
661  CRef<CSeq_id> id1, CRef<CSeq_id> id2,
662  Int4 query_length, Int4 subject_length)
663 {
664  _ASSERT(hsp != NULL);
665 
666  CDense_seg::TStarts starts;
667  CDense_seg::TLens lengths;
668  CDense_seg::TStrands strands;
669  bool translate1, translate2;
670  bool is_disc_align = false;
671 
672  if (hsp->score == 0) {
673  return CRef<CSeq_align>();
674  }
675 
676  GapEditScript* t = hsp->gap_info;
677  for (int i=0; i<t->size; i++) {
678  if (t->op_type[i] == eGapAlignDecline)
679  {
680  is_disc_align = true;
681  break;
682  }
683  }
684 
685  translate1 = (program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
686  program == eBlastTypeRpsTblastn);
687  translate2 = (program == eBlastTypeTblastn || program == eBlastTypePsiTblastn ||
688  program == eBlastTypeTblastx);
689 
690  if (is_disc_align) {
691 
692  /* By request of Steven Altschul - we need to have
693  the unaligned part being to the left if it is adjacent to the
694  gap (insertion or deletion) - so this function will do
695  shuffeling */
696  s_CorrectUASequence(hsp);
697 
698  CRef<CSeq_align> seqalign(new CSeq_align());
700  seqalign->SetDim(kBlastAlignmentDim);
701 
702  bool skip_region;
703  GapEditScript* esp=hsp->gap_info;
704  int nsegs = 0;
705 
706  for (int index=0; index< esp->size; index++)
707  {
708  skip_region = false;
709  int index2 = index;
710  int first = index;
711  for (index2=first; index2<esp->size; index2++, nsegs++){
712  if (esp->op_type[index2] == eGapAlignDecline) {
713  if (nsegs != 0) { // end of aligned region
714  break;
715  } else {
716  while (index2<esp->size && esp->op_type[index2] == eGapAlignDecline) {
717  nsegs++;
718  index2++;
719  }
720  skip_region = true;
721  break;
722  }
723  }
724  }
725 
726  // build seqalign for required regions only
727  if (!skip_region) {
728 
729  s_CollectSeqAlignData(hsp, esp, 0, nsegs, starts, lengths,
730  strands, query_length, subject_length,
731  translate1, translate2);
732 
733  CRef<CSeq_align> sa_tmp =
734  s_CreateSeqAlign(id1, id2, starts, lengths, strands,
735  translate1, translate2);
736 
737  // Add this seqalign to the list
738  if (sa_tmp)
739  seqalign->SetSegs().SetDisc().Set().push_back(sa_tmp);
740  }
741  }
742 
743  return seqalign;
744 
745  } else {
746 
747  s_CollectSeqAlignData(hsp, hsp->gap_info, 0, hsp->gap_info->size, starts, lengths,
748  strands, query_length, subject_length,
749  translate1, translate2);
750 
751  CRef<CSeq_align> retval = s_CreateSeqAlign(id1, id2, starts, lengths,
752  strands, translate1,
753  translate2);
754 
755  return retval;
756  }
757 }
758 
759 /// This function is used for out-of-frame traceback conversion
760 /// Converts an OOF editing script chain to a Seq-align of type Std-seg.
761 /// @param program BLAST program: blastx or tblastn.
762 /// @param hsp HSP structure containing traceback produced by an out-of-frame
763 /// gapped extension [in]
764 /// @param query_id Query sequence identifier [in]
765 /// @param subject_id Subject sequence identifier [in]
766 /// @param query_length Length of query sequence [in]
767 /// @param subject_length Length of subject sequence [in]
768 static CRef<CSeq_align>
770  CRef<CSeq_id> query_id, CRef<CSeq_id> subject_id,
771  Int4 query_length, Int4 subject_length)
772 {
773  _ASSERT(hsp != NULL);
774 
775  CRef<CSeq_align> seqalign(new CSeq_align());
776 
777  Boolean reverse = FALSE;
778  Int2 frame1, frame2;
779  Int4 start1, start2;
780  Int4 original_length1, original_length2;
781  CRef<CSeq_interval> seq_int1_last;
782  CRef<CSeq_interval> seq_int2_last;
783  CRef<CSeq_id> id1;
784  CRef<CSeq_id> id2;
785  CRef<CSeq_loc> slp1, slp2;
786  ENa_strand strand1, strand2;
787  bool first_shift;
788  Int4 from1, from2, to1, to2;
789 
790  if (program == eBlastTypeBlastx) {
791  reverse = TRUE;
792  start1 = hsp->subject.offset;
793  start2 = hsp->query.offset;
794  frame1 = hsp->subject.frame;
795  frame2 = hsp->query.frame;
796  original_length1 = subject_length;
797  original_length2 = query_length;
798  id1.Reset(subject_id);
799  id2.Reset(query_id);
800  } else {
801  start1 = hsp->query.offset;
802  start2 = hsp->subject.offset;
803  frame1 = hsp->query.frame;
804  frame2 = hsp->subject.frame;
805  original_length1 = query_length;
806  original_length2 = subject_length;
807  id1.Reset(query_id);
808  id2.Reset(subject_id);
809  }
810 
811  strand1 = s_Frame2Strand(frame1);
812  strand2 = s_Frame2Strand(frame2);
813 
814  seqalign->SetDim(kBlastAlignmentDim);
815 
816  seqalign->SetType(CSeq_align::eType_partial); /**partial for gapped translating search. */
817 
818  first_shift = false;
819 
820  GapEditScript* esp = hsp->gap_info;
821 
822  for (int index=0; index<esp->size; index++)
823  {
824  slp1.Reset(new CSeq_loc());
825  slp2.Reset(new CSeq_loc());
826 
827  switch (esp->op_type[index]) {
828  case eGapAlignDel: /* deletion of three nucleotides. */
829 
830  first_shift = false;
831 
832  slp1->SetInt().SetFrom(s_GetCurrPos(start1, esp->num[index]));
833  slp1->SetInt().SetTo(MIN(start1,original_length1) - 1);
834  slp1->SetInt().SetId(*id1);
835  slp1->SetInt().SetStrand(strand1);
836 
837  /* Empty nucleotide piece */
838  slp2->SetEmpty(*id2);
839 
840  seq_int1_last.Reset(&slp1->SetInt());
841  /* Keep previous seq_int2_last, in case there is a frame shift
842  immediately after this gap */
843 
844  break;
845 
846  case eGapAlignIns: /* insertion of three nucleotides. */
847  /* If gap is followed after frameshift - we have to
848  add this element for the alignment to be correct */
849 
850  if(first_shift) { /* Second frameshift in a row */
851  /* Protein coordinates */
852  slp1->SetInt().SetFrom(s_GetCurrPos(start1, 1));
853  to1 = MIN(start1,original_length1) - 1;
854  slp1->SetInt().SetTo(to1);
855  slp1->SetInt().SetId(*id1);
856  slp1->SetInt().SetStrand(strand1);
857 
858  /* Nucleotide scale shifted by op_type */
859  from2 = s_GetCurrPos(start2, 3);
860  to2 = MIN(start2,original_length2) - 1;
861  slp2->SetInt().SetFrom(from2);
862  slp2->SetInt().SetTo(to2);
863  if (start2 > original_length2)
864  slp1->SetInt().SetTo(to1 - 1);
865 
866  /* Transfer to DNA minus strand coordinates */
867  if(strand2 == eNa_strand_minus) {
868  slp2->SetInt().SetTo(original_length2 - from2 - 1);
869  slp2->SetInt().SetFrom(original_length2 - to2 - 1);
870  }
871 
872  slp2->SetInt().SetId(*id2);
873  slp2->SetInt().SetStrand(strand2);
874 
875  CRef<CStd_seg> seg(new CStd_seg());
876  seg->SetDim(kBlastAlignmentDim);
877 
878  CStd_seg::TIds& ids = seg->SetIds();
879  ids.reserve(kBlastAlignmentDim);
880  seg->SetLoc().reserve(kBlastAlignmentDim);
881 
882  if (reverse) {
883  seg->SetLoc().push_back(slp2);
884  seg->SetLoc().push_back(slp1);
885  ids.push_back(id2);
886  ids.push_back(id1);
887  } else {
888  seg->SetLoc().push_back(slp1);
889  seg->SetLoc().push_back(slp2);
890  ids.push_back(id1);
891  ids.push_back(id2);
892  }
893  ids.resize(seg->GetDim());
894 
895  seqalign->SetSegs().SetStd().push_back(seg);
896  }
897 
898  first_shift = false;
899 
900  /* Protein piece is empty */
901  slp1->SetEmpty(*id1);
902 
903  /* Nucleotide scale shifted by 3, protein gapped */
904  from2 = s_GetCurrPos(start2, esp->num[index]*3);
905  to2 = MIN(start2,original_length2) - 1;
906  slp2->SetInt().SetFrom(from2);
907  slp2->SetInt().SetTo(to2);
908 
909  /* Transfer to DNA minus strand coordinates */
910  if(strand2 == eNa_strand_minus) {
911  slp2->SetInt().SetTo(original_length2 - from2 - 1);
912  slp2->SetInt().SetFrom(original_length2 - to2 - 1);
913  }
914  slp2->SetInt().SetId(*id2);
915  slp2->SetInt().SetStrand(strand2);
916 
917  seq_int1_last.Reset(NULL);
918  seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */
919 
920  break;
921 
922  case eGapAlignSub: /* Substitution. */
923 
924  first_shift = false;
925 
926  /* Protein coordinates */
927  from1 = s_GetCurrPos(start1, esp->num[index]);
928  to1 = MIN(start1, original_length1) - 1;
929  /* Adjusting last segment and new start point in
930  nucleotide coordinates */
931  from2 = s_GetCurrPos(start2, esp->num[index]*((Uint1)esp->op_type[index]));
932  to2 = start2 - 1;
933  /* Chop off three bases and one residue at a time.
934  Why does this happen, seems like a bug?
935  */
936  while (to2 >= original_length2) {
937  to2 -= 3;
938  to1--;
939  }
940  /* Transfer to DNA minus strand coordinates */
941  if(strand2 == eNa_strand_minus) {
942  int tmp_int;
943  tmp_int = to2;
944  to2 = original_length2 - from2 - 1;
945  from2 = original_length2 - tmp_int - 1;
946  }
947 
948  slp1->SetInt().SetFrom(from1);
949  slp1->SetInt().SetTo(to1);
950  slp1->SetInt().SetId(*id1);
951  slp1->SetInt().SetStrand(strand1);
952  slp2->SetInt().SetFrom(from2);
953  slp2->SetInt().SetTo(to2);
954  slp2->SetInt().SetId(*id2);
955  slp2->SetInt().SetStrand(strand2);
956 
957 
958  seq_int1_last.Reset(&slp1->SetInt()); /* Will be used to adjust "to" value */
959  seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */
960 
961  break;
962  case eGapAlignDel2: /* gap of two nucleotides. */
963  case eGapAlignDel1: /* Gap of one nucleotide. */
964  case eGapAlignIns1: /* Insertion of one nucleotide. */
965  case eGapAlignIns2: /* Insertion of two nucleotides. */
966 
967  if(first_shift) { /* Second frameshift in a row */
968  /* Protein coordinates */
969  from1 = s_GetCurrPos(start1, 1);
970  to1 = MIN(start1,original_length1) - 1;
971 
972  /* Nucleotide scale shifted by op_type */
973  from2 = s_GetCurrPos(start2, (Uint1)esp->op_type[index]);
974  to2 = start2 - 1;
975  if (to2 >= original_length2) {
976  to2 = original_length2 -1;
977  to1--;
978  }
979  /* Transfer to DNA minus strand coordinates */
980  if(strand2 == eNa_strand_minus) {
981  int tmp_int;
982  tmp_int = to2;
983  to2 = original_length2 - from2 - 1;
984  from2 = original_length2 - tmp_int - 1;
985  }
986 
987  slp1->SetInt().SetFrom(from1);
988  slp1->SetInt().SetTo(to1);
989  slp1->SetInt().SetId(*id1);
990  slp1->SetInt().SetStrand(strand1);
991  slp2->SetInt().SetFrom(from2);
992  slp2->SetInt().SetTo(to2);
993  slp2->SetInt().SetId(*id2);
994  slp2->SetInt().SetStrand(strand2);
995 
996  seq_int1_last.Reset(&slp1->SetInt());
997  seq_int2_last.Reset(&slp2->SetInt());
998 
999  break;
1000  }
1001 
1002  first_shift = true;
1003 
1004  /* If this substitution is following simple frameshift
1005  we do not need to start new segment, but may continue
1006  old one */
1007 
1008  if(seq_int2_last) {
1009  s_GetCurrPos(start2, esp->num[index]*((Uint1)esp->op_type[index]-3));
1010  if(strand2 != eNa_strand_minus) {
1011  seq_int2_last->SetTo(start2 - 1);
1012  } else {
1013  /* Transfer to DNA minus strand coordinates */
1014  seq_int2_last->SetFrom(original_length2 - start2);
1015  }
1016 
1017  /* Adjustment for multiple shifts - theoretically possible,
1018  but very improbable */
1019  if(seq_int2_last->GetFrom() > seq_int2_last->GetTo()) {
1020 
1021  if(strand2 != eNa_strand_minus) {
1022  seq_int2_last->SetTo(seq_int2_last->GetTo() + 3);
1023  } else {
1024  seq_int2_last->SetFrom(seq_int2_last->GetFrom() - 3);
1025  }
1026 
1027  if (seq_int1_last.GetPointer() &&
1028  seq_int1_last->GetTo() != 0)
1029  seq_int1_last->SetTo(seq_int1_last->GetTo() + 1);
1030  }
1031 
1032  } else if ((Uint1)esp->op_type[index] > 3) {
1033  /* Protein piece is empty */
1034  slp1->SetEmpty(*id1);
1035  /* Simulating insertion of nucleotides */
1036  from2 = s_GetCurrPos(start2,
1037  esp->num[index]*((Uint1)esp->op_type[index]-3));
1038  to2 = MIN(start2,original_length2) - 1;
1039 
1040  /* Transfer to DNA minus strand coordinates */
1041  if(strand2 == eNa_strand_minus) {
1042  int tmp_int;
1043  tmp_int = to2;
1044  to2 = original_length2 - from2 - 1;
1045  from2 = original_length2 - tmp_int - 1;
1046  }
1047  slp2->SetInt().SetFrom(from2);
1048  slp2->SetInt().SetTo(to2);
1049 
1050  slp2->SetInt().SetId(*id2);
1051 
1052  seq_int1_last.Reset(NULL);
1053  seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */
1054  break;
1055  } else {
1056  continue; /* Main loop */
1057  }
1058  continue; /* Main loop */
1059  /* break; */
1060  default:
1061  continue; /* Main loop */
1062  /* break; */
1063  }
1064 
1065  CRef<CStd_seg> seg(new CStd_seg());
1066  seg->SetDim(kBlastAlignmentDim);
1067 
1068  CStd_seg::TIds& ids = seg->SetIds();
1069  ids.reserve(kBlastAlignmentDim);
1070  seg->SetLoc().reserve(kBlastAlignmentDim);
1071 
1072  if (reverse) {
1073  seg->SetLoc().push_back(slp2);
1074  seg->SetLoc().push_back(slp1);
1075  ids.push_back(id2);
1076  ids.push_back(id1);
1077  } else {
1078  seg->SetLoc().push_back(slp1);
1079  seg->SetLoc().push_back(slp2);
1080  ids.push_back(id1);
1081  ids.push_back(id2);
1082  }
1083  ids.resize(seg->GetDim());
1084 
1085  seqalign->SetSegs().SetStd().push_back(seg);
1086  }
1087 
1088  return seqalign;
1089 }
1090 
1091 /// Creates and initializes CScore with a given name, and with integer or
1092 /// double value. Integer value is used if it is not zero, otherwise
1093 /// double value is assigned.
1094 /// @param ident_string Score type name [in]
1095 /// @param d Real value of the score [in]
1096 /// @param i Integer value of the score. [in]
1097 /// @return Resulting CScore object.
1098 static CRef<CScore>
1099 s_MakeScore(const string& ident_string, double d, int i, bool is_integer)
1100 {
1101  CRef<CScore> retval(new CScore());
1102  retval->SetId().SetStr(ident_string);
1103 
1104  if (is_integer)
1105  retval->SetValue().SetInt(i);
1106  else
1107  retval->SetValue().SetReal(d);
1108 
1109  return retval;
1110 }
1111 
1112 /// Computes the exact size of a CSeq_align::TScore for a given HSP
1113 /// @param hsp HSP for which the score objects will be built, must be non-NULL
1114 /// [in]
1115 /// @param seqid_list list of IDs associated with this HSP [in]
1116 static size_t
1117 s_CalculateScoreVectorSize(const BlastHSP* hsp, const vector<string> & seqid_list)
1118 {
1119  _ASSERT(hsp);
1120  // query coverage hsp
1121  size_t retval = 1;
1122 
1123  if (hsp->score) {
1124  retval+=2;
1125  }
1126 
1127  if (hsp->num > 1) {
1128  retval++;
1129  }
1130 
1131  double evalue = (hsp->evalue < SMALLEST_EVALUE) ? 0.0 : hsp->evalue;
1132  if (evalue >= 0.0) {
1133  retval++;
1134  }
1135 
1136  if (hsp->bit_score >= 0.0) {
1137  retval++;
1138  }
1139 
1140  if (hsp->num_ident >= 0) {
1141  retval++;
1142  }
1143 
1144  if (hsp->comp_adjustment_method > 0) {
1145  retval++;
1146  }
1147 
1148  if(hsp->num_positives > 0){
1149  retval++;
1150  }
1151 
1152  if ( !seqid_list.empty() ) {
1153  retval += seqid_list.size();
1154  }
1155  return retval;
1156 }
1157 
1158 /// Creates a list of score objects for a Seq-align, given an HSP structure.
1159 /// @param hsp Structure containing HSP information [in]
1160 /// @param scores Linked list of score objects to put into a Seq-align [out]
1161 /// @param seqid_list List of GIs for the subject sequence.
1162 static void
1164  CSeq_align::TScore & scores,
1165  const vector<string> & seqid_list,
1166  Int4 query_length)
1167 {
1168  if (!hsp)
1169  return;
1170 
1171  scores.reserve(s_CalculateScoreVectorSize(hsp, seqid_list));
1172 
1173  if (hsp->score) {
1174  static const string kScore("score");
1175  scores.push_back(s_MakeScore(kScore, 0.0, hsp->score, true));
1176  static const string kBlastScore("blast_score");
1177  scores.push_back(s_MakeScore(kBlastScore, 0.0, hsp->score, true));
1178  }
1179 
1180  if (hsp->num > 1) {
1181  static const string kSumN("sum_n");
1182  scores.push_back(s_MakeScore(kSumN, 0.0, hsp->num, true));
1183  }
1184 
1185  // Set the E-Value
1186  double evalue = (hsp->evalue < SMALLEST_EVALUE) ? 0.0 : hsp->evalue;
1187  if (evalue >= 0.0) {
1188  string score_type = (hsp->num <= 1) ? "e_value" : "sum_e";
1189  scores.push_back(s_MakeScore(score_type, evalue, 0, false));
1190  }
1191 
1192  // Calculate the bit score from the raw score
1193 
1194  if (hsp->bit_score >= 0.0) {
1195  static const string kBitScore("bit_score");
1196  scores.push_back(s_MakeScore(kBitScore, hsp->bit_score, 0, false));
1197  }
1198 
1199  // Set the identity score
1200  if (hsp->num_ident >= 0) {
1201  static const string kNumIdent("num_ident");
1202  scores.push_back(s_MakeScore(kNumIdent, 0.0, hsp->num_ident, true));
1203  }
1204 
1205  if (hsp->comp_adjustment_method > 0) {
1206  static const string kCompAdj("comp_adjustment_method");
1207  scores.push_back(s_MakeScore(kCompAdj, 0.0,
1208  hsp->comp_adjustment_method, true));
1209  }
1210 
1211  if ( !seqid_list.empty() ) {
1212  ITERATE(vector<string>, sid, seqid_list) {
1213  scores.push_back(s_MakeScore(*sid, 0.0, 0, true));
1214  }
1215  }
1216 
1217  if (hsp->num_positives > 0) {
1218  static const string kNumPositives("num_positives");
1219  scores.push_back(s_MakeScore(kNumPositives, 0.0, hsp->num_positives, true));
1220  }
1221 
1222  if(query_length > 0) {
1223  static const string kQueryCovHsp("hsp_percent_coverage");
1224  double hsp_coverage = Blast_HSPGetQueryCoverage( hsp, query_length);
1225  scores.push_back(s_MakeScore(kQueryCovHsp, hsp_coverage, 0, false));
1226  }
1227  return;
1228 }
1229 
1230 /// Produce UserObject with Seq-ids to limit formatting to ("use_this_gi")
1231 /// @param seqalign Seq-align object to fill in [in][out]
1232 /// @param seqid_list list of strings with seqids [in]
1233 static void
1235  const vector<string> & seqid_list)
1236 {
1237  if (seqid_list.empty())
1238  return;
1239 
1240  CRef<CUser_object> userObject(new CUser_object());
1241  userObject->SetType().SetStr("use_this_seqid");
1242  userObject->AddField("SEQIDS", seqid_list);
1243  seqalign->SetExt().push_back(userObject);
1244 }
1245 
1246 
1247 /// Given an HSP structure, creates a list of scores and inserts them into
1248 /// a Seq-align.
1249 /// @param seqalign Seq-align object to fill [in] [out]
1250 /// @param hsp An HSP structure [in]
1251 /// @param gi_list List of GIs for the subject sequence.
1252 static void
1254  const BlastHSP * hsp,
1255  const vector<string> & seqid_list,
1256  Int4 query_length)
1257 {
1258  // Add the scores for this HSP
1259  CSeq_align::TScore& score_list = seqalign->SetScore();
1260  s_BuildScoreList(hsp, score_list, seqid_list, query_length);
1261 }
1262 
1263 
1264 /// Creates a Dense-diag object from HSP information and sequence identifiers
1265 /// for a non-translated ungapped search.
1266 /// @param hsp An HSP structure [in]
1267 /// @param query_id Query sequence identifier [in]
1268 /// @param subject_id Subject sequence identifier [in]
1269 /// @param query_length Length of the query [in]
1270 /// @param subject_length Length of the subject [in]
1271 /// @param gi_list List of GIs for the subject sequence.
1272 /// @return Resulting Dense-diag object.
1275  CRef<CSeq_id> subject_id,
1276  Int4 query_length, Int4 subject_length,
1277  const vector<string> & seqid_list)
1278 {
1279  CRef<CDense_diag> retval(new CDense_diag());
1280 
1281  retval->SetDim(kBlastAlignmentDim);
1282 
1283  // Set the sequence ids
1284  CDense_diag::TIds& ids = retval->SetIds();
1285  ids.reserve(kBlastAlignmentDim);
1286  ids.push_back(query_id);
1287  ids.push_back(subject_id);
1288 
1289  retval->SetLen(hsp->query.end - hsp->query.offset);
1290 
1291  CDense_diag::TStrands& strands = retval->SetStrands();
1292  strands.reserve(kBlastAlignmentDim);
1293  strands.push_back(s_Frame2Strand(hsp->query.frame));
1294  strands.push_back(s_Frame2Strand(hsp->subject.frame));
1295  CDense_diag::TStarts& starts = retval->SetStarts();
1296  starts.reserve(kBlastAlignmentDim);
1297  if (hsp->query.frame >= 0) {
1298  starts.push_back(hsp->query.offset);
1299  } else {
1300  starts.push_back(query_length - hsp->query.end);
1301  }
1302  if (hsp->subject.frame >= 0) {
1303  starts.push_back(hsp->subject.offset);
1304  } else {
1305  starts.push_back(subject_length - hsp->subject.end);
1306  }
1307 
1308  CSeq_align::TScore& score_list = retval->SetScores();
1309  s_BuildScoreList(hsp, score_list, seqid_list, query_length);
1310 
1311  return retval;
1312 }
1313 
1314 /// Creates a Std-seg object from HSP information and sequence identifiers
1315 /// for a translated ungapped search.
1316 /// @param hsp An HSP structure [in]
1317 /// @param query_id Query sequence identifier [in]
1318 /// @param subject_id Subject sequence identifier [in]
1319 /// @param query_length Length of the query [in]
1320 /// @param subject_length Length of the subject [in]
1321 /// @param gi_list List of GIs for the subject sequence.
1322 /// @return Resulting Std-seg object.
1325  CRef<CSeq_id> subject_id,
1326  Int4 query_length, Int4 subject_length,
1327  const vector<string> & seqid_list)
1328 {
1329  CRef<CStd_seg> retval(new CStd_seg());
1330 
1331  retval->SetDim(kBlastAlignmentDim);
1332  retval->SetLoc().reserve(kBlastAlignmentDim);
1333 
1334  CRef<CSeq_loc> query_loc(new CSeq_loc());
1335  CRef<CSeq_loc> subject_loc(new CSeq_loc());
1336  query_loc->SetInt().SetId(*query_id);
1337  subject_loc->SetInt().SetId(*subject_id);
1338 
1339  // Set the sequence ids
1340  CStd_seg::TIds& ids = retval->SetIds();
1341  ids.reserve(kBlastAlignmentDim);
1342  ids.push_back(query_id);
1343  ids.push_back(subject_id);
1344 
1345  query_loc->SetInt().SetStrand(s_Frame2Strand(hsp->query.frame));
1346  subject_loc->SetInt().SetStrand(s_Frame2Strand(hsp->subject.frame));
1347 
1348  if (hsp->query.frame == 0) {
1349  query_loc->SetInt().SetFrom(hsp->query.offset);
1350  query_loc->SetInt().SetTo(hsp->query.end - 1);
1351  } else if (hsp->query.frame > 0) {
1352  query_loc->SetInt().SetFrom(CODON_LENGTH*(hsp->query.offset) +
1353  hsp->query.frame - 1);
1354  query_loc->SetInt().SetTo(CODON_LENGTH*(hsp->query.end) +
1355  hsp->query.frame - 2);
1356  } else {
1357  query_loc->SetInt().SetFrom(query_length -
1358  CODON_LENGTH*(hsp->query.end) + hsp->query.frame + 1);
1359  query_loc->SetInt().SetTo(query_length - CODON_LENGTH*hsp->query.offset
1360  + hsp->query.frame);
1361  }
1362 
1363  if (hsp->subject.frame == 0) {
1364  subject_loc->SetInt().SetFrom(hsp->subject.offset);
1365  subject_loc->SetInt().SetTo(hsp->subject.end - 1);
1366  } else if (hsp->subject.frame > 0) {
1367  subject_loc->SetInt().SetFrom(CODON_LENGTH*(hsp->subject.offset) +
1368  hsp->subject.frame - 1);
1369  subject_loc->SetInt().SetTo(CODON_LENGTH*(hsp->subject.end) +
1370  hsp->subject.frame - 2);
1371 
1372  } else {
1373  subject_loc->SetInt().SetFrom(subject_length -
1374  CODON_LENGTH*(hsp->subject.end) + hsp->subject.frame + 1);
1375  subject_loc->SetInt().SetTo(subject_length -
1376  CODON_LENGTH*hsp->subject.offset + hsp->subject.frame);
1377  }
1378 
1379  retval->SetLoc().push_back(query_loc);
1380  retval->SetLoc().push_back(subject_loc);
1381 
1382  CSeq_align::TScore& score_list = retval->SetScores();
1383  s_BuildScoreList(hsp, score_list, seqid_list, query_length);
1384 
1385  return retval;
1386 }
1387 
1388 /// Creates a Seq-align from an HSP list for an ungapped search.
1389 /// @param program BLAST program [in]
1390 /// @param hsp_list HSP list structure [in]
1391 /// @param query_id Query sequence identifier [in]
1392 /// @param subject_id Subject sequence identifier [in]
1393 /// @param query_length Length of the query [in]
1394 /// @param subject_length Length of the subject [in]
1395 /// @param gi_list List of GIs for the subject sequence.
1396 void
1398  BlastHSPList* hsp_list,
1399  CRef<CSeq_id> query_id,
1400  CRef<CSeq_id> subject_id,
1401  Int4 query_length,
1402  Int4 subject_length,
1403  const vector<string> & seqid_list,
1404  vector<CRef<CSeq_align > > & sa_vector)
1405 {
1406  CRef<CSeq_align> seqalign(new CSeq_align());
1407  BlastHSP** hsp_array;
1408  int index;
1409 
1410  seqalign->SetType(CSeq_align::eType_diags);
1411 
1412  sa_vector.clear();
1413 
1414  hsp_array = hsp_list->hsp_array;
1415 
1416  vector<string> emptyList; // FIXME: change prototypes below.
1417  /* All HSPs are put in one seqalign, containing a list of
1418  * DenseDiag for same molecule search, or StdSeg for translated searches.
1419  */
1420  if (program == eBlastTypeBlastn ||
1421  program == eBlastTypeBlastp ||
1422  program == eBlastTypeRpsBlast) {
1423  for (index=0; index<hsp_list->hspcnt; index++) {
1424  BlastHSP* hsp = hsp_array[index];
1425  seqalign->SetSegs().SetDendiag().push_back(
1427  query_id,
1428  subject_id,
1429  query_length,
1430  subject_length,
1431  emptyList));
1432  }
1433  } else { // Translated search
1434  for (index=0; index<hsp_list->hspcnt; index++) {
1435  BlastHSP* hsp = hsp_array[index];
1436  seqalign->SetSegs().SetStd().push_back(
1438  query_id,
1439  subject_id,
1440  query_length,
1441  subject_length,
1442  emptyList));
1443  }
1444  }
1445  s_AddUserObjectToSeqAlign(seqalign, seqid_list);
1446  sa_vector.push_back(seqalign);
1447  return;
1448 }
1449 
1450 /// This is called for each query and each subject in a BLAST search.
1451 /// @param program BLAST program [in]
1452 /// @param hsp_list HSP list structure [in]
1453 /// @param query_id Query sequence identifier [in]
1454 /// @param subject_id Subject sequence identifier [in]
1455 /// @param query_length Length of query sequence [in]
1456 /// @param subject_length Length of subject sequence [in]
1457 /// @param is_ooframe Was this a search with out-of-frame gapping? [in]
1458 /// @param gi_list List of GIs for the subject sequence [in]
1459 /// @param sa_vector Resulting Seq-align object [in|out]
1460 void
1462  CRef<CSeq_id> query_id, CRef<CSeq_id> subject_id,
1463  Int4 query_length, Int4 subject_length, bool is_ooframe,
1464  const vector<string> & seqid_list,
1465  vector<CRef<CSeq_align > > & sa_vector)
1466 {
1467  // Process the list of HSPs corresponding to one subject sequence and
1468  // create one seq-align for each list of HSPs (use disc seqalign when
1469  // multiple HSPs are found).
1470  BlastHSP** hsp_array = hsp_list->hsp_array;
1471 
1472  sa_vector.clear();
1473  sa_vector.reserve(hsp_list->hspcnt);
1474  vector<string> emptyList;
1475 
1476  for (int index = 0; index < hsp_list->hspcnt; index++) {
1477  BlastHSP* hsp = hsp_array[index];
1478  CRef<CSeq_align> seqalign;
1479 
1480  if (is_ooframe) {
1481  seqalign =
1482  s_OOFBlastHSP2SeqAlign(program, hsp, query_id, subject_id,
1483  query_length, subject_length);
1484  } else {
1485  seqalign =
1486  s_BlastHSP2SeqAlign(program, hsp, query_id, subject_id,
1487  query_length, subject_length);
1488  }
1489 
1490  if (seqalign.Empty()) continue;
1491 
1492  if(eBlastTypePsiBlast == program)
1493  {
1494  if(hsp->num_ident == 0)
1495  hsp->num_ident = -1;
1496  }
1497  // Pass in empty list until removed.
1498  s_AddScoresToSeqAlign(seqalign, hsp, emptyList, query_length);
1499 
1500  s_AddUserObjectToSeqAlign(seqalign, seqid_list);
1501  sa_vector.push_back(seqalign);
1502  }
1503 
1504  return;
1505 }
1506 
1508 {
1510  retval->Set().clear();
1511  _ASSERT(retval->Get().empty());
1512  return retval;
1513 }
1514 
1516 {
1517  _ASSERT(sar);
1518  const int query_row = 0;
1519 
1520  TSeqPos q_shift = 0;
1521 
1522  if (query.IsInt()) {
1523  q_shift = query.GetInt().GetFrom();
1524  }
1525 
1526  if (q_shift > 0) {
1527  sar->OffsetRow(query_row, q_shift);
1528  }
1529 }
1530 
1531 /// Remap subject alignment if its location specified the reverse strand or
1532 /// a starting location other than the beginning of the sequence.
1533 /// @param subj_aligns Seq-align containing HSPs for a given
1534 /// query-subject alignment [in|out]
1535 /// @param subj_loc Location of the subject sequence searched. [in]
1536 static void
1537 s_RemapToSubjectLoc(CRef<CSeq_align> & subj_aligns, const CSeq_loc& subj_loc)
1538 {
1539  const int kSubjDimension = 1;
1540  _ASSERT(subj_loc.IsInt() || subj_loc.IsWhole());
1541  subj_aligns.Reset(RemapAlignToLoc(*subj_aligns, kSubjDimension, subj_loc));
1542 }
1543 
1547  const CSeq_loc & query_loc,
1548  TSeqPos query_length,
1549  const IBlastSeqInfoSrc * seqinfo_src,
1550  bool is_gapped,
1551  bool is_ooframe,
1552  TSeqLocInfoVector & subj_masks)
1553 {
1555 
1556  if (!hit_list) {
1557  return seq_aligns;
1558  }
1559 
1560  CRef<CSeq_id> query_id(new CSeq_id);
1561  SerialAssign(*query_id, CSeq_loc_CI(query_loc).GetSeq_id());
1562  _ASSERT(query_id);
1563 
1564  for (int index = 0; index < hit_list->hsplist_count; index++) {
1565  BlastHSPList* hsp_list = hit_list->hsplist_array[index];
1566  if (!hsp_list)
1567  continue;
1568 
1569  // Sort HSPs with e-values as first priority and scores as
1570  // tie-breakers, since that is the order we want to see them in
1571  // in Seq-aligns.
1572  Blast_HSPListSortByEvalue(hsp_list);
1573 
1574  const Uint4 kOid = hsp_list->oid;
1575  TSeqPos subj_length = 0;
1576  CRef<CSeq_id> subject_id;
1577  GetSequenceLengthAndId(seqinfo_src, kOid, subject_id, &subj_length);
1578  if(subject_id.Empty()){
1579  LOG_POST(Info << "No unfiltered subject id for oid " + NStr::UIntToString(kOid));
1580  continue;
1581  }
1582 
1583  // Union subject sequence ranges
1584  vector <TSeqRange> ranges;
1585  for (int i=0; i<hsp_list->hspcnt; i++) {
1586  const BlastHSP* hsp = hsp_list->hsp_array[i];
1587  TSeqRange rg;
1588  rg.SetFrom(hsp->subject.offset);
1589  rg.SetTo(hsp->subject.end);
1590  ranges.push_back(rg);
1591  }
1592 
1593  // Extract subject masks
1594  TMaskedSubjRegions masks;
1595  if (!ranges.empty() && seqinfo_src->GetMasks(kOid, ranges, masks)) {
1596  subj_masks.push_back(masks);
1597  }
1598 
1599  // Get SeqIds for entrez query restriction.
1600  vector<string> seqid_list;
1601  GetFilteredRedundantSeqids(*seqinfo_src, hsp_list->oid, seqid_list, subject_id->IsGi());
1602  // stores a CSeq_align for each matching sequence
1603  vector<CRef<CSeq_align > > hit_align;
1604  if (is_gapped) {
1606  hsp_list,
1607  query_id,
1608  subject_id,
1609  query_length,
1610  subj_length,
1611  is_ooframe,
1612  seqid_list,
1613  hit_align);
1614  } else {
1616  hsp_list,
1617  query_id,
1618  subject_id,
1619  query_length,
1620  subj_length,
1621  seqid_list,
1622  hit_align);
1623  }
1624 
1625  if (seqinfo_src->CanReturnPartialSequence() == true)
1626  {
1627  CConstRef<CSeq_loc> subj_loc = seqinfo_src->GetSeqLoc(kOid);
1628  NON_CONST_ITERATE(vector<CRef<CSeq_align > >, iter, hit_align) {
1629  RemapToQueryLoc(*iter, query_loc);
1630  if ( !is_ooframe )
1631  s_RemapToSubjectLoc(*iter, *subj_loc);
1632  seq_aligns->Set().push_back(*iter);
1633  }
1634  }
1635  else
1636  {
1637  NON_CONST_ITERATE(vector<CRef<CSeq_align > >, iter, hit_align) {
1638  RemapToQueryLoc(*iter, query_loc);
1639  seq_aligns->Set().push_back(*iter);
1640  }
1641  }
1642  }
1643  return seq_aligns;
1644 }
1645 
1649  class ILocalQueryData & query,
1650  const IBlastSeqInfoSrc * seqinfo_src,
1651  const SPHIQueryInfo * pattern_info,
1652  vector<TSeqLocInfoVector>& subj_masks)
1653 {
1654  TSeqAlignVector retval;
1655 
1656  /* Split results into an array of BlastHSPResults structures corresponding
1657  to different pattern occurrences. */
1658  BlastHSPResults* *phi_results =
1660 
1661  subj_masks.clear();
1662  subj_masks.resize(pattern_info->num_patterns);
1663  retval.reserve(pattern_info->num_patterns);
1664 
1665  if (phi_results) {
1666  for (int pattern_index = 0; pattern_index < pattern_info->num_patterns;
1667  ++pattern_index) {
1668  CBlastHSPResults one_phi_results(phi_results[pattern_index]);
1669 
1670  if (one_phi_results) {
1671  // PHI BLAST does not work with multiple queries, so we only
1672  // need to look at the first hit list.
1673  BlastHitList* hit_list = one_phi_results->hitlist_array[0];
1674 
1675  // PHI BLAST is always gapped, and never out-of-frame, hence
1676  // true and false values for the respective booleans in the next
1677  // call.
1678  CRef<CSeq_align_set> seq_aligns(
1679  BlastHitList2SeqAlign_OMF(hit_list,
1680  prog,
1681  *query.GetSeq_loc(0),
1682  static_cast<TSeqPos>(query.GetSeqLength(0)),
1683  seqinfo_src,
1684  true,
1685  false,
1686  subj_masks[pattern_index]));
1687 
1688  retval.push_back(seq_aligns);
1689 
1690  }
1691  else
1692  { // Makes an empty SeqAlign set as this pattern had no hits.
1693  CRef<CSeq_align_set> seq_aligns(
1695  prog,
1696  *query.GetSeq_loc(0),
1697  static_cast<TSeqPos>(query.GetSeqLength(0)),
1698  seqinfo_src,
1699  true,
1700  false,
1701  subj_masks[pattern_index]));
1702  retval.push_back(seq_aligns);
1703 
1704  }
1705  }
1706  sfree(phi_results);
1707  }
1708 
1709  return retval;
1710 }
1711 
1712 /**
1713  * @brief This function changes the subject frame for HSPs if the program is
1714  * blastn and the subject was specified with a negative strand. This is
1715  * necessary because the engine doesn't handle negative strands in subjects for
1716  * blastn (it does for translated searches).
1717  *
1718  * @param subj_strand Strand for the subject sequence [in]
1719  * @param program BLAST program [in]
1720  * @param hsp_list list of HSPs to possibly adjust [in]
1721  */
1723  EBlastProgramType program,
1724  BlastHSPList* hsp_list)
1725 {
1726  _ASSERT(hsp_list);
1727  if (subj_strand != eNa_strand_minus ||
1728  (program != eBlastTypeBlastn && program != eBlastTypeMapping))
1729  return;
1730 
1731  for (int index = 0; index < hsp_list->hspcnt; index++) {
1732  BlastHSP* hsp = hsp_list->hsp_array[index];
1733  hsp->subject.frame = -1;
1734  }
1735 }
1736 
1737 /** Extracts results from the BlastHSPResults structure for only one subject
1738  * sequence, identified by its index, and converts them into a vector of
1739  * CSeq_align_set objects. Returns one vector element per query sequence;
1740  * The CSeq_align_set consists of as many CSeq_align-s as there are HSPs in the
1741  * BlastHSPList for each query-subject pair
1742  * @param results results from running the BLAST algorithm [in]
1743  * @param query_data All query sequences [in]
1744  * @param seqinfo_src Source of subject sequences information [in]
1745  * @param prog type of BLAST program [in]
1746  * @param subj_idx Index of this subject sequence in a set [in]
1747  * @param is_gapped Is this a gapped search? [in]
1748  * @param is_ooframe Is it a search with out-of-frame gapping? [in]
1749  * @return Vector of seqalign sets (one set per query sequence).
1750  */
1751 static TSeqAlignVector
1753  ILocalQueryData& query_data,
1754  const IBlastSeqInfoSrc& seqinfo_src,
1756  Uint4 subj_idx,
1757  bool is_gapped,
1758  bool is_ooframe,
1759  vector<TSeqLocInfoVector>& subj_masks)
1760 {
1761  _ASSERT(results->num_queries == (int)query_data.GetNumQueries());
1762 
1763  TSeqAlignVector retval;
1764  CRef<CSeq_id> subject_id;
1765  TSeqPos subj_length = 0;
1766 
1767  // Subject is the same for all queries, so retrieve its id right away
1768  GetSequenceLengthAndId(&seqinfo_src, subj_idx, subject_id, &subj_length);
1769  // For blastn, we may need to fix the strand in the HSPs, as the engine
1770  // doesn't expect negative subject strands
1771  const ENa_strand kSubjStrand = seqinfo_src.GetSeqLoc(subj_idx)->GetStrand();
1772 
1773  vector<CRef<CSeq_align > > hit_align;
1774  retval.reserve(results->num_queries);
1775 
1776  // Process each query's hit list
1777  for (int qindex = 0; qindex < results->num_queries; qindex++) {
1778  CRef<CSeq_align_set> seq_aligns;
1779  BlastHitList* hit_list = results->hitlist_array[qindex];
1780  BlastHSPList* hsp_list = NULL;
1781 
1782  // Find the HSP list corresponding to this subject, if it exists
1783  if (hit_list) {
1784  int sindex;
1785  for (sindex = 0; sindex < hit_list->hsplist_count; ++sindex) {
1786  hsp_list = hit_list->hsplist_array[sindex];
1787  if (hsp_list->oid == static_cast<Int4>(subj_idx))
1788  break;
1789  }
1790  /* If hsp_list for this subject is not found, set it to NULL */
1791  if (sindex == hit_list->hsplist_count)
1792  hsp_list = NULL;
1793  }
1794 
1795  if (hsp_list) {
1796  // Sort HSPs with e-values as first priority and scores as
1797  // tie-breakers, since that is the order we want to see them in
1798  // in Seq-aligns.
1799  Blast_HSPListSortByEvalue(hsp_list);
1800 
1801  CConstRef<CSeq_loc> seqloc = query_data.GetSeq_loc(qindex);
1802  CRef<CSeq_id> query_id(new CSeq_id);
1803  SerialAssign(*query_id, *seqloc->GetId());
1804  TSeqPos query_length = static_cast<TSeqPos>(query_data.GetSeqLength(qindex));
1805  s_AdjustNegativeSubjFrameInBlastn(kSubjStrand, prog, hsp_list);
1806 
1807  vector<string> seqid_list;
1808  GetFilteredRedundantSeqids(seqinfo_src, hsp_list->oid, seqid_list, subject_id->IsGi());
1809 
1810 
1811  // Union subject sequence ranges
1812  vector <TSeqRange> ranges;
1813  for (int i=0; i<hsp_list->hspcnt; i++) {
1814  const BlastHSP* hsp = hsp_list->hsp_array[i];
1815  TSeqRange rg;
1816  rg.SetFrom(hsp->subject.offset);
1817  rg.SetTo(hsp->subject.end);
1818  ranges.push_back(rg);
1819  }
1820 
1821  // Extract subject masks
1822  TMaskedSubjRegions masks;
1823  if (!ranges.empty() &&
1824  seqinfo_src.GetMasks(subj_idx, ranges, masks)) {
1825  subj_masks[qindex].push_back(masks);
1826  }
1827 
1828  hit_align.clear();
1829  if (is_gapped) {
1831  hsp_list,
1832  query_id,
1833  subject_id,
1834  query_length,
1835  subj_length,
1836  is_ooframe,
1837  seqid_list,
1838  hit_align);
1839  } else {
1841  hsp_list,
1842  query_id,
1843  subject_id,
1844  query_length,
1845  subj_length,
1846  seqid_list,
1847  hit_align);
1848  }
1849  seq_aligns.Reset(new CSeq_align_set());
1850  CConstRef<CSeq_loc> subj_loc = seqinfo_src.GetSeqLoc(subj_idx);
1851  NON_CONST_ITERATE(vector<CRef<CSeq_align > >, iter, hit_align) {
1852  RemapToQueryLoc(*iter, *seqloc);
1853  if ( !is_ooframe )
1854  s_RemapToSubjectLoc(*iter, *subj_loc);
1855  seq_aligns->Set().push_back(*iter);
1856  }
1857  } else {
1858  seq_aligns = CreateEmptySeq_align_set();
1859  }
1860  retval.push_back(seq_aligns);
1861  }
1862 
1863  return retval;
1864 }
1865 
1866 /// Transpose the (linearly organized) seqalign set matrix from
1867 /// (q1 s1 q2 s1 ... qN s1, ..., q1 sM q2 sM ... qN sM) to
1868 /// (q1 s1 q1 s2 ... q1 sM, ..., qN s1 qN s2 ... qN sM)
1869 /// this method only reorganizes the seqalign sets, does not copy them.
1870 /// @param alnvec data structure to reorganize [in]
1871 /// @param num_queries number of queries [in]
1872 /// @param num_subjects number of subjects [in]
1873 /// @retval transposed data structure
1874 static TSeqAlignVector
1876  const size_t num_queries,
1877  const size_t num_subjects)
1878 {
1879  TSeqAlignVector result_alnvec;
1880  result_alnvec.reserve(alnvec.size());
1881 
1882  for (size_t iQuery = 0; iQuery < num_queries; iQuery++)
1883  {
1884  for (size_t iSubject = 0; iSubject < num_subjects; iSubject++)
1885  {
1886  size_t iLinearIndex = iSubject * num_queries + iQuery;
1887  CRef<CSeq_align_set> aln_set = alnvec[iLinearIndex];
1888  result_alnvec.push_back(aln_set);
1889  }
1890  }
1891 
1892  _ASSERT(result_alnvec.size() == alnvec.size());
1893  return result_alnvec;
1894 }
1895 
1896 static TSeqAlignVector
1899  class ILocalQueryData& query_data,
1900  const IBlastSeqInfoSrc* seqinfo_src,
1901  bool is_gapped,
1902  bool is_ooframe,
1903  vector<TSeqLocInfoVector>& subj_masks)
1904 {
1905  TSeqAlignVector retval;
1906  size_t seqinfo_size = seqinfo_src->Size();
1907  retval.reserve(query_data.GetNumQueries() * seqinfo_size);
1908 
1909  int num_of_queries = results->num_queries;
1910  _ASSERT(num_of_queries == (int)query_data.GetNumQueries());
1911 
1912  // Compute the subject masks
1913  subj_masks.clear();
1914  subj_masks.resize(num_of_queries *seqinfo_size);
1915 
1916  for (Uint4 index = 0; index < seqinfo_size; index++) {
1917  vector<TSeqLocInfoVector> tmp_subj_masks(num_of_queries);
1918  TSeqAlignVector seqalign =
1920  *seqinfo_src, prog, index,
1921  is_gapped, is_ooframe,
1922  tmp_subj_masks);
1923 
1924  /* Merge the new vector with the current. Assume that both vectors
1925  contain CSeq_align_sets for all queries, i.e. have the same
1926  size. */
1927  _ASSERT(seqalign.size() == query_data.GetNumQueries());
1928 
1929  for (TSeqAlignVector::size_type i = 0; i < seqalign.size(); ++i) {
1930  retval.push_back(seqalign[i]);
1931  //seqalign size = num of queries
1932  subj_masks[ seqinfo_size * i + index] = tmp_subj_masks[i];
1933  }
1934  }
1935  _ASSERT(retval.size() == query_data.GetNumQueries() * seqinfo_size);
1936 
1937  return s_TransposeSeqAlignVector(retval, query_data.GetNumQueries(),
1938  seqinfo_size);
1939 }
1940 
1941 static TSeqAlignVector
1944  class ILocalQueryData & query,
1945  const IBlastSeqInfoSrc * seqinfo_src,
1946  bool is_gapped,
1947  bool is_ooframe,
1948  vector<TSeqLocInfoVector>& subj_masks)
1949 {
1950  _ASSERT(results->num_queries == (int)query.GetNumQueries());
1951 
1952  TSeqAlignVector retval;
1953  CConstRef<CSeq_id> query_id;
1954 
1955  subj_masks.clear();
1956  subj_masks.resize(results->num_queries);
1957  retval.reserve(results->num_queries);
1958 
1959  // Process each query's hit list
1960  for (int index = 0; index < results->num_queries; index++) {
1961  BlastHitList* hit_list = results->hitlist_array[index];
1962 
1964  seq_aligns(BlastHitList2SeqAlign_OMF(hit_list,
1965  prog,
1966  *query.GetSeq_loc(index),
1967  static_cast<TSeqPos>(query.GetSeqLength(index)),
1968  seqinfo_src,
1969  is_gapped,
1970  is_ooframe,
1971  subj_masks[index]));
1972 
1973  retval.push_back(seq_aligns);
1974  _TRACE("Query " << index << ": " << seq_aligns->Get().size()
1975  << " seqaligns");
1976 
1977  }
1978 
1979  return retval;
1980 }
1981 
1984  ILocalQueryData & local_data,
1985  const IBlastSeqInfoSrc& seqinfo_src,
1986  EBlastProgramType program,
1987  bool gapped,
1988  bool oof_mode,
1989  vector<TSeqLocInfoVector>& subj_masks,
1990  EResultType result_type /* = eDatabaseSearch*/)
1991 {
1992  TSeqAlignVector retval;
1993 
1994  if (! hsp_results)
1995  return retval;
1996 
1997  // For PHI BLAST, results need to be split by query pattern
1998  // occurrence, which is done in a separate function. Results for
1999  // different pattern occurrences are put in separate discontinuous
2000  // Seq_aligns, and linked in a Seq_align_set.
2001 
2002  BlastQueryInfo * query_info = local_data.GetQueryInfo();
2003 
2004  if (Blast_ProgramIsPhiBlast(program)) {
2005  retval = PhiBlastResults2SeqAlign_OMF(hsp_results,
2006  program,
2007  local_data,
2008  & seqinfo_src,
2009  query_info->pattern_info,
2010  subj_masks);
2011  } else {
2012  if (result_type == eSequenceComparison) {
2013  retval =
2014  s_BlastResults2SeqAlignSequenceCmp_OMF(hsp_results, program,
2015  local_data, &seqinfo_src,
2016  gapped, oof_mode,
2017  subj_masks);
2018  } else {
2019  retval =
2020  s_BlastResults2SeqAlignDatabaseSearch_OMF(hsp_results, program,
2021  local_data,
2022  &seqinfo_src, gapped,
2023  oof_mode, subj_masks);
2024  }
2025  }
2026 
2027  return retval;
2028 }
2029 
2030 /// Creates a Std-seg object from HSP information and sequence identifiers
2031 /// for a non-translated ungapped search.
2032 /// @param hsp An HSP structure [in]
2033 /// @param query_id Query sequence identifier [in]
2034 /// @param subject_id Subject sequence identifier [in]
2035 /// @param query_length Length of the query [in]
2036 /// @param subject_length Length of the subject [in]
2037 /// @param gi_list List of GIs for the subject sequence.
2038 /// @return Resulting Std-seg object.
2041  CRef<CSeq_id> subject_id,
2042  Int4 query_length, Int4 subject_length,
2043  const vector<string> & seqid_list)
2044 {
2045  CRef<CStd_seg> retval(new CStd_seg());
2046 
2047  retval->SetDim(kBlastAlignmentDim);
2048  retval->SetLoc().reserve(kBlastAlignmentDim);
2049 
2050  CRef<CSeq_loc> query_loc(new CSeq_loc());
2051  CRef<CSeq_loc> subject_loc(new CSeq_loc());
2052  query_loc->SetInt().SetId(*query_id);
2053  subject_loc->SetInt().SetId(*subject_id);
2054 
2055  // Set the sequence ids
2056  CStd_seg::TIds& ids = retval->SetIds();
2057  ids.reserve(kBlastAlignmentDim);
2058  ids.push_back(query_id);
2059  ids.push_back(subject_id);
2060 
2061  query_loc->SetInt().SetStrand(s_Frame2Strand(hsp->query.frame));
2062  subject_loc->SetInt().SetStrand(s_Frame2Strand(hsp->subject.frame));
2063 
2064  if (hsp->query.frame >= 0) {
2065  query_loc->SetInt().SetFrom(hsp->query.offset);
2066  query_loc->SetInt().SetTo(hsp->query.end -1);
2067  } else {
2068  query_loc->SetInt().SetFrom(query_length - hsp->query.end);
2069  query_loc->SetInt().SetTo(query_length - hsp->query.offset -1);
2070  }
2071 
2072  if (hsp->subject.frame >= 0) {
2073  subject_loc->SetInt().SetFrom(hsp->subject.offset);
2074  subject_loc->SetInt().SetTo(hsp->subject.end-1);
2075  } else {
2076  subject_loc->SetInt().SetFrom(subject_length - hsp->subject.end);
2077  subject_loc->SetInt().SetTo(subject_length - hsp->subject.offset -1);
2078  }
2079 
2080  retval->SetLoc().push_back(query_loc);
2081  retval->SetLoc().push_back(subject_loc);
2082 
2083  CSeq_align::TScore& score_list = retval->SetScores();
2084  s_BuildScoreList(hsp, score_list, seqid_list, query_length);
2085 
2086  return retval;
2087 }
2088 
2089 
2090 void
2092  BlastHitList* hit_list,
2093  const CSeq_loc & query_loc,
2094  TSeqPos query_length,
2095  const IBlastSeqInfoSrc * subject_seqinfo,
2096  list<CRef<CStd_seg > > & seg_list)
2097 {
2098  seg_list.clear();
2099 
2100  CRef<CSeq_id> query_id(new CSeq_id);
2101  SerialAssign(*query_id, CSeq_loc_CI(query_loc).GetSeq_id());
2102  _ASSERT(query_id);
2103 
2105  (*fun_ptr) (BlastHSP* , CRef<CSeq_id> , CRef<CSeq_id> , Int4 , Int4 ,
2106  const vector<string> & ) = NULL;
2107 
2109  fun_ptr = x_UngappedHSPToStdSeg;
2110  else
2111  fun_ptr = x_NonTranslatedHSPToStdSeg;
2112 
2113  for (int i = 0; i < hit_list->hsplist_count; i++)
2114  {
2115  BlastHSPList* hsp_list = hit_list->hsplist_array[i];
2116  if (!hsp_list)
2117  continue;
2118 
2119  BlastHSP ** hsp_array = hsp_list->hsp_array;
2120 
2121  if(hsp_list->hspcnt > 0)
2122  {
2123  TSeqPos subject_length = 0;
2124  CRef<CSeq_id> subject_id;
2125  vector<string> seqid_list;
2126  GetSequenceLengthAndId(subject_seqinfo, hsp_list->oid, subject_id, &subject_length);
2127  GetFilteredRedundantSeqids(*subject_seqinfo, hsp_list->oid, seqid_list, subject_id->IsGi());
2128 
2129  for (int j = 0; j < hsp_list->hspcnt; j++)
2130  {
2131  BlastHSP* hsp = hsp_array[j];
2132 
2133  if(!hsp)
2134  continue;
2135 
2136  seg_list.push_back((*fun_ptr) (hsp, query_id, subject_id,
2137  query_length, subject_length, seqid_list));
2138  }
2139  }
2140  }
2141 }
2142 
2143 END_SCOPE(blast)
2145 
2146 /* @} */
CRef< CSeq_align > RemapAlignToLoc(const CSeq_align &align, CSeq_align::TDim row, const CSeq_loc &loc)
Remap seq-align row to the seq-loc.
Definition: Seq_align.cpp:1401
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define CODON_LENGTH
Codons are always of length 3.
Definition: blast_def.h:63
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
Definition: blast_hits.c:1437
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
Definition: blast_hits.c:3570
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Definition: blast_hits.c:1034
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Definition: blast_program.c:70
#define TRANSLATED_SUBJECT_MASK
This bit is on if the subject is translated.
Definition: blast_program.h:58
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypePsiTblastn
Definition: blast_program.h:83
@ eBlastTypeRpsTblastn
Definition: blast_program.h:85
@ eBlastTypeMapping
Definition: blast_program.h:88
@ eBlastTypeTblastx
Definition: blast_program.h:79
@ eBlastTypePsiBlast
Definition: blast_program.h:82
@ eBlastTypeRpsBlast
Definition: blast_program.h:84
@ eBlastTypeTblastn
Definition: blast_program.h:77
@ eBlastTypeBlastp
Definition: blast_program.h:73
#define TRANSLATED_QUERY_MASK
This bit is on if the query is translated.
Definition: blast_program.h:56
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EResultType
Specifies the style of Seq-aligns that should be built from the internal BLAST data structures.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
Wrapper class for BlastHSPResults .
Definition: blast_aux.hpp:343
Definition: Score.hpp:57
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Seq_align.cpp:1332
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSpliced_exon_chunk –.
void Validate(bool full_test=false) const
Validators.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Provides access (not ownership) to the C structures used to configure local BLAST search class implem...
Definition: query_data.hpp:55
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
EGapAlignOpType
Operation types within the edit script.
Definition: gapinfo.h:44
@ eGapAlignDel2
Frame shift deletion of two nucleotides.
Definition: gapinfo.h:46
@ eGapAlignIns2
Frame shift insertion of two nucleotides.
Definition: gapinfo.h:50
@ eGapAlignIns1
Frame shift insertion of one nucleotide.
Definition: gapinfo.h:49
@ eGapAlignIns
Insertion: a gap in subject.
Definition: gapinfo.h:51
@ eGapAlignDel1
Frame shift deletion of one nucleotide.
Definition: gapinfo.h:47
@ eGapAlignDecline
Non-aligned region.
Definition: gapinfo.h:52
@ eGapAlignSub
Substitution.
Definition: gapinfo.h:48
@ eGapAlignDel
Deletion: a gap in query.
Definition: gapinfo.h:45
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
void BLASTPrelminSearchHitListToStdSeg(EBlastProgramType program, BlastHitList *hit_list, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *subject_seqinfo, list< CRef< CStd_seg > > &seg_list)
CRef< CDense_diag > x_UngappedHSPToDenseDiag(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Dense-diag object from HSP information and sequence identifiers for a non-translated ungapp...
static CSeq_align::C_Segs::TStd s_CreateStdSegs(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, bool translate_master, bool translate_slave)
Creates a Std-seg object from the starts, lengths and strands vectors and two Seq-ids for a translate...
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(Uint4 index) const =0
Method to retrieve the sequence location given its ordinal number.
static int s_GetCurrPos(int &pos, int pos2advance)
Advances position in a sequence, according to an edit script instruction.
static const TSeqPos kBlastAlignmentDim
BLAST alignments have always 2 dimensions (i.e.
static void s_CollectSeqAlignData(const BlastHSP *hsp, const GapEditScript *esp, unsigned int first, unsigned int nsegs, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, Int4 query_length, Int4 subject_length, bool translate1, bool translate2)
Fills vectors of start positions, lengths and strands for all alignment segments.
static void s_CorrectUASequence(BlastHSP *hsp)
Checks if any decline-to-align segments immediately follow an insertion or deletion,...
#define SMALLEST_EVALUE
Threshold below which e-values are saved as 0.
static CRef< CSeq_align > s_BlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > id1, CRef< CSeq_id > id2, Int4 query_length, Int4 subject_length)
Converts a traceback editing block to a Seq-align, provided the 2 sequence identifiers.
static void s_CreateDenseg(CDense_seg &dense_seg, CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands)
Creates a Dense-seg object from the starts, lengths and strands vectors and two Seq-ids.
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
void GetFilteredRedundantSeqids(const IBlastSeqInfoSrc &sisrc, int oid, vector< string > &seqids, bool use_gis=true)
Get Seqids for a sequence in a redundant database.
static size_t s_CalculateScoreVectorSize(const BlastHSP *hsp, const vector< string > &seqid_list)
Computes the exact size of a CSeq_align::TScore for a given HSP.
static TSeqPos s_GetAlignmentStart(int &curr_pos, int num, ENa_strand strand, bool translate, int length, int original_length, short frame)
Finds the starting position of a sequence segment in an alignment, given an editing script.
static CRef< CScore > s_MakeScore(const string &ident_string, double d, int i, bool is_integer)
Creates and initializes CScore with a given name, and with integer or double value.
CRef< CSeq_align_set > BlastHitList2SeqAlign_OMF(const BlastHitList *hit_list, EBlastProgramType prog, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, TSeqLocInfoVector &subj_masks)
static Int4 s_GetProteinFrameLength(Int4 nuc_length, Int2 frame)
Finds length of a protein frame given a nucleotide length and a frame number.
static TSeqAlignVector s_BLAST_OneSubjectResults2CSeqAlign(const BlastHSPResults *results, ILocalQueryData &query_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType prog, Uint4 subj_idx, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
Extracts results from the BlastHSPResults structure for only one subject sequence,...
CRef< CStd_seg > x_NonTranslatedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a non-translated ungapped ...
static void s_AddUserObjectToSeqAlign(CRef< CSeq_align > &seqalign, const vector< string > &seqid_list)
Produce UserObject with Seq-ids to limit formatting to ("use_this_gi")
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
static ENa_strand s_Frame2Strand(short frame)
Converts a frame into the appropriate strand.
TSeqAlignVector PhiBlastResults2SeqAlign_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, const SPHIQueryInfo *pattern_info, vector< TSeqLocInfoVector > &subj_masks)
static TSeqAlignVector s_BlastResults2SeqAlignDatabaseSearch_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
#define GAP_VALUE
Value in the Dense-seg indicating a gap.
static CRef< CSeq_align > s_CreateSeqAlign(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts starts, CDense_seg::TLens lengths, CDense_seg::TStrands strands, bool translate_master, bool translate_slave)
Creates a Seq-align for a single HSP from precalculated vectors of start positions,...
static void s_ValidateExon(const CSpliced_exon &exon, const CSeq_id &product_id, const CSeq_id &genomic_id)
virtual size_t GetNumQueries()=0
Get the number of queries.
static void s_RemapToSubjectLoc(CRef< CSeq_align > &subj_aligns, const CSeq_loc &subj_loc)
Remap subject alignment if its location specified the reverse strand or a starting location other tha...
TSeqAlignVector LocalBlastResults2SeqAlign(BlastHSPResults *hsp_results, ILocalQueryData &local_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType program, bool gapped, bool oof_mode, vector< TSeqLocInfoVector > &subj_masks, EResultType result_type)
Convert traceback output into Seq-align format.
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
void BLASTHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, bool is_ooframe, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
This is called for each query and each subject in a BLAST search.
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
void BLASTUngappedHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
Creates a Seq-align from an HSP list for an ungapped search.
CRef< CStd_seg > x_UngappedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a translated ungapped sear...
void RemapToQueryLoc(CRef< CSeq_align > sar, const CSeq_loc &query)
Remaps Seq-align offsets relative to the query Seq-loc.
virtual bool CanReturnPartialSequence() const =0
Return true if the implementation can return anything besides a seq-loc for the entire sequence.
static TSeqAlignVector s_BlastResults2SeqAlignSequenceCmp_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query_data, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
static void s_AddScoresToSeqAlign(CRef< CSeq_align > &seqalign, const BlastHSP *hsp, const vector< string > &seqid_list, Int4 query_length)
Given an HSP structure, creates a list of scores and inserts them into a Seq-align.
static void s_BuildScoreList(const BlastHSP *hsp, CSeq_align::TScore &scores, const vector< string > &seqid_list, Int4 query_length)
Creates a list of score objects for a Seq-align, given an HSP structure.
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
static void s_AdjustNegativeSubjFrameInBlastn(ENa_strand subj_strand, EBlastProgramType program, BlastHSPList *hsp_list)
This function changes the subject frame for HSPs if the program is blastn and the subject was specifi...
virtual bool GetMasks(Uint4 index, const TSeqRange &target_range, TMaskedSubjRegions &retval) const =0
Retrieves the subject masks for the corresponding index.
static TSeqAlignVector s_TransposeSeqAlignVector(const TSeqAlignVector &alnvec, const size_t num_queries, const size_t num_subjects)
Transpose the (linearly organized) seqalign set matrix from (q1 s1 q2 s1 ...
static CRef< CSeq_align > s_OOFBlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length)
This function is used for out-of-frame traceback conversion Converts an OOF editing script chain to a...
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
Definition: serialbase.hpp:482
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5103
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
vector< TSeqPos > TStarts
Definition: Dense_diag_.hpp:94
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
vector< CRef< CSeq_id > > TIds
Definition: Dense_diag_.hpp:93
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
vector< CRef< CSeq_id > > TIds
Definition: Std_seg_.hpp:92
TExt & SetExt(void)
Assign a value to Ext data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
list< CRef< CSpliced_exon > > TExons
vector< ENa_strand > TStrands
Definition: Dense_diag_.hpp:96
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
@ eType_diags
unbroken, but not ordered, diagonals
Definition: Seq_align_.hpp:102
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define MAPPER_SPLICE_SIGNAL
Definition: jumper.h:233
int i
if(yy_accept[yy_current_state])
static char * prog
Definition: mdb_load.c:33
const struct ncbi::grid::netcache::search::fields::SIZE size
EIPRangeType t
Definition: ncbi_localip.c:101
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ABS(a)
returns absolute value of a (|a|)
Definition: ncbi_std.h:122
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
static int * results[]
static int pattern_info(int what, void *where, BOOL unsetok)
Definition: pcre2test.c:4156
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
static const char * kScore
Definition: showdefline.cpp:77
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
Definition: blast_hits.h:116
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Structure holding all information about an HSP.
Definition: blast_hits.h:126
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
Int4 num_ident
Number of identical base pairs in this HSP.
Definition: blast_hits.h:128
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
double bit_score
Bit score, calculated from score.
Definition: blast_hits.h:129
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
Definition: blast_hits.h:135
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
GapEditScript * gap_info
ALL gapped alignment is here.
Definition: blast_hits.h:134
Int4 num_positives
Definition: blast_hits.h:144
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Definition: blast_hits.h:139
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
BlastHSPMappingInfo * map_info
Definition: blast_hits.h:146
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
The query related information.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Int4 * num
Array of number of operations.
Definition: gapinfo.h:59
Int4 size
Size of above arrays.
Definition: gapinfo.h:60
EGapAlignOpType * op_type
Array of type of operation.
Definition: gapinfo.h:58
A chain of HSPs: spliced alignment.
Definition: spliced_hits.h:60
HSPContainer * hsps
A list of HSPs that belong to this chain.
Definition: spliced_hits.h:64
struct HSPContainer * next
Definition: spliced_hits.h:45
BlastHSP * hsp
Definition: spliced_hits.h:44
Uint1 query_base
Query base at this position.
Definition: jumper.h:89
Uint1 subject_base
Subject base at this position.
Definition: jumper.h:90
Int4 query_pos
Query position.
Definition: jumper.h:88
Alignment edit script for gapped alignment.
Definition: jumper.h:96
JumperEdit * edits
Definition: jumper.h:97
Int4 num_edits
Definition: jumper.h:98
In PHI BLAST, structure containing information about all pattern occurrences in query.
Definition: blast_def.h:300
static string query
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Fri Sep 20 14:57:35 2024 by modify_doxy.py rev. 669887