NCBI C++ ToolKit
Seq_align.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Seq_align.cpp 100703 2023-08-31 19:30:37Z lavr $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the data definition file
34  * 'seqalign.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
55 #include <serial/iterator.hpp>
56 #include <util/static_set.hpp>
57 
58 // generated includes
60 
61 // generated classes
62 
64 
65 BEGIN_objects_SCOPE // namespace ncbi::objects::
66 
67 // destructor
69 {
70 }
71 
72 
74 {
75  switch (GetSegs().Which()) {
76  case C_Segs::e_Denseg:
77  return GetSegs().GetDenseg().CheckNumRows();
78 
79  //case C_Segs::e_Packed:
80  //return GetSegs().GetPacked().CheckNumRows();
81 
82  case C_Segs::e_Sparse:
83  return GetSegs().GetSparse().CheckNumRows();
84 
85  case C_Segs::e_Spliced:
86  {{
87  // spliced seg always has two rows: genomic and protein/transcript
88  return 2;
89  }}
90 
91  case C_Segs::e_Disc:
92  {{
93  TDim numrows = 0;
94  ITERATE (C_Segs::TDisc::Tdata, std_i, GetSegs().GetDisc().Get()) {
95  TDim seg_numrows = (*std_i)->CheckNumRows();
96  if (numrows) {
97  if ( seg_numrows != numrows ) {
98  NCBI_THROW(CSeqalignException, eInvalidAlignment,
99  "CSeq_align::CheckNumRows(): Number of rows "
100  "is not the same for each disc seg.");
101  }
102  } else {
103  numrows = seg_numrows;
104  }
105  }
106  return numrows;
107  }}
108 
109  case C_Segs::e_Std:
110  {{
111  TDim numrows = 0;
112  ITERATE (C_Segs::TStd, std_i, GetSegs().GetStd()) {
113  TDim seg_numrows = (*std_i)->CheckNumRows();
114  if (numrows) {
115  if (seg_numrows != numrows) {
116  NCBI_THROW(CSeqalignException, eInvalidAlignment,
117  "CSeq_align::CheckNumRows(): Number of rows "
118  "is not the same for each std seg.");
119  }
120  } else {
121  numrows = seg_numrows;
122  }
123  }
124  return numrows;
125  }}
126 
127  case C_Segs::e_Dendiag:
128  {
129  TDim numrows = 0;
130  ITERATE (C_Segs::TDendiag, it, GetSegs().GetDendiag()) {
131  TDim seg_numrows = (*it)->CheckNumRows();
132  if (numrows) {
133  if ( seg_numrows != numrows ) {
134  NCBI_THROW(CSeqalignException, eInvalidAlignment,
135  "CSeq_align::CheckNumRows(): Number of rows "
136  "is not the same for each dendiag seg.");
137  }
138  } else {
139  numrows = seg_numrows;
140  }
141  }
142  return numrows;
143  }
144 
145  default:
146  NCBI_THROW(CSeqalignException, eUnsupported,
147  "CSeq_align::CheckNumRows() currently does not handle "
148  "this type of alignment");
149  }
150 }
151 
152 
154 {
155  switch (GetSegs().Which()) {
156  case C_Segs::e_Denseg:
157  return GetSegs().GetDenseg().GetSeqRange(row);
158 
159  case C_Segs::e_Sparse:
160  return GetSegs().GetSparse().GetSeqRange(row);
161 
162  case C_Segs::e_Dendiag:
163  {
164  CRange<TSeqPos> rng;
165  ITERATE (C_Segs::TDendiag, dendiag_i, GetSegs().GetDendiag()) {
166  rng.CombineWith((*dendiag_i)->GetSeqRange(row));
167  }
168  return rng;
169  }
170 
171  case C_Segs::e_Std:
172  {
173  CRange<TSeqPos> rng;
174  CSeq_id seq_id;
175  size_t seg_i = 0;
176  ITERATE (C_Segs::TStd, std_i, GetSegs().GetStd()) {
177  TDim row_i = 0;
178  ITERATE (CStd_seg::TLoc, i, (*std_i)->GetLoc()) {
179  const CSeq_loc& loc = **i;
180  if (row_i++ == row) {
181  if (loc.IsInt()) {
182  if ( !seg_i ) {
183  seq_id.Assign(loc.GetInt().GetId());
184  } else if (seq_id.Compare(loc.GetInt().GetId())
185  != CSeq_id::e_YES) {
187  eInvalidRowNumber,
188  "CSeq_align::GetSeqRange():"
189  " Row seqids not consistent."
190  " Cannot determine range.");
191  }
192  rng.CombineWith
194  (loc.GetInt().GetFrom(),
195  loc.GetInt().GetTo()));
196  } else if (loc.IsPnt()) {
197  if ( !seg_i ) {
198  seq_id.Assign(loc.GetPnt().GetId());
199  } else if (seq_id.Compare(loc.GetPnt().GetId())
200  != CSeq_id::e_YES) {
202  eInvalidRowNumber,
203  "CSeq_align::GetSeqRange():"
204  " Row seqids not consistent."
205  " Cannot determine range.");
206  }
207  rng.CombineWith
209  (loc.GetPnt().GetPoint(),
210  loc.GetPnt().GetPoint()));
211  }
212  }
213  }
214  if (row < 0 || row >= row_i) {
215  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
216  "CSeq_align::GetSeqRange():"
217  " Invalid row number");
218  }
219  if (CanGetDim() && row_i != GetDim()) {
220  NCBI_THROW(CSeqalignException, eInvalidAlignment,
221  "CSeq_align::GetSeqRange():"
222  " loc.size is inconsistent with dim");
223  }
224  seg_i++;
225  }
226  if (rng.Empty()) {
227  NCBI_THROW(CSeqalignException, eInvalidAlignment,
228  "CSeq_align::GetSeqRange(): Row is empty");
229  }
230  return rng;
231  }
232 
233  case C_Segs::e_Disc:
234  {
235  CRange<TSeqPos> rng;
236  ITERATE (C_Segs::TDisc::Tdata, disc_i, GetSegs().GetDisc().Get()) {
237  rng.CombineWith((*disc_i)->GetSeqRange(row));
238  }
239  return rng;
240  }
241 
242  case C_Segs::e_Spliced:
243  return GetSegs().GetSpliced().GetSeqRange(row);
244 
245  default:
246  NCBI_THROW(CSeqalignException, eUnsupported,
247  "CSeq_align::GetSeqRange() currently does not handle "
248  "this type of alignment.");
249  }
250 }
251 
253 {
254  switch (GetSegs().Which()) {
255  case C_Segs::e_Denseg:
256  return GetSegs().GetDenseg().GetSeqStart(row);
257  case C_Segs::e_Sparse:
258  return GetSegs().GetSparse().GetSeqStart(row);
259  case C_Segs::e_Spliced:
260  return GetSegs().GetSpliced().GetSeqStart(row);
261  case C_Segs::e_Dendiag:
262  case C_Segs::e_Std:
263  case C_Segs::e_Disc:
264  return GetSeqRange(row).GetFrom();
265  default:
266  NCBI_THROW(CSeqalignException, eUnsupported,
267  "CSeq_align::GetSeqStart() currently does not handle "
268  "this type of alignment.");
269  }
270 }
271 
272 
274 {
275  switch (GetSegs().Which()) {
276  case C_Segs::e_Denseg:
277  return GetSegs().GetDenseg().GetSeqStop(row);
278  case C_Segs::e_Sparse:
279  return GetSegs().GetSparse().GetSeqStop(row);
280  case C_Segs::e_Spliced:
281  return GetSegs().GetSpliced().GetSeqStop(row);
282  case C_Segs::e_Dendiag:
283  case C_Segs::e_Std:
284  case C_Segs::e_Disc:
285  return GetSeqRange(row).GetTo();
286  default:
287  NCBI_THROW(CSeqalignException, eUnsupported,
288  "CSeq_align::GetSeqStop() currently does not handle "
289  "this type of alignment.");
290  }
291 }
292 
293 
295 {
296  switch (GetSegs().Which()) {
297  case C_Segs::e_Denseg:
298  return GetSegs().GetDenseg().GetSeqStrand(row);
299  case C_Segs::e_Sparse:
300  return GetSegs().GetSparse().GetSeqStrand(row);
301  case C_Segs::e_Spliced:
302  return GetSegs().GetSpliced().GetSeqStrand(row);
303  case C_Segs::e_Disc:
304  return GetSegs().GetDisc().Get().front()->GetSeqStrand(row);
305  case C_Segs::e_Std:
306  return GetSegs().GetStd().front()->GetLoc()[row]->GetStrand();
307  case C_Segs::e_Dendiag:
308  return GetSegs().GetDendiag().front()->GetSeqStrand(row);
309  default:
310  NCBI_THROW(CSeqalignException, eUnsupported,
311  "CSeq_align::GetSeqStrand() currently does not handle "
312  "this type of alignment.");
313  }
314 }
315 
316 
318 {
319  switch (GetSegs().Which()) {
320  case C_Segs::e_Denseg:
321  return GetSegs().GetDenseg().GetSeq_id(row);
322 
323  case C_Segs::e_Sparse:
324  return GetSegs().GetSparse().GetSeq_id(row);
325 
326  case C_Segs::e_Dendiag:
327  {{
328  // If segments have different number of rows, this will try
329  // to find the segment with enough rows to get the id.
330  ITERATE(CSeq_align::C_Segs::TDendiag, seg, GetSegs().GetDendiag()) {
331  if( (*seg)->IsSetIds() &&
332  (size_t)row < (*seg)->GetIds().size() ) {
333  return *(*seg)->GetIds()[row];
334  }
335  }
336  break;
337  }}
338 
339  case C_Segs::e_Std:
340  {{
341  // If segments have different number of rows, this will try
342  // to find the segment with enough rows to get the id.
343  ITERATE(CSeq_align::C_Segs::TStd, seg, GetSegs().GetStd()) {
344  if ( (*seg)->IsSetIds() &&
345  (size_t)row < (*seg)->GetIds().size()) {
346  return *((*seg)->GetIds()[row]);
347  }
348  else if ( (*seg)->IsSetLoc() &&
349  (size_t)row < (*seg)->GetLoc().size() ) {
350  const CSeq_loc& loc = *(*seg)->GetLoc()[row];
351  CConstRef<CSeq_id> id(loc.GetId());
352  if (id) {
353  return *id;
354  }
355  }
356  }
357  }}
358  break;
359 
360  case C_Segs::e_Disc:
361  {{
362  // Try to find a sub-alignment for which we can get a
363  // Seq-id for this row.
364  ITERATE (CSeq_align_set::Tdata, sub_aln, GetSegs().GetDisc().Get()) {
365  try {
366  const CSeq_id& rv = (*sub_aln)->GetSeq_id(row);
367  return rv;
368  }
369  catch (const CSeqalignException&) {
370  }
371  }
372  }}
373  break;
374 
375  case C_Segs::e_Spliced:
376  {{
377  // Technically, there is no row order for product and product.
378  // However, in spliced seg, since product comes first, we assign it
379  // as row 0.
380  // Hence, the genomic is assigned to row 1.
381  const C_Segs::TSpliced& spliced_seg = GetSegs().GetSpliced();
382  if (row == 0 && spliced_seg.IsSetProduct_id()) {
383  return spliced_seg.GetProduct_id();
384  } else if (row == 1 && spliced_seg.IsSetGenomic_id()) {
385  return spliced_seg.GetGenomic_id();
386  }
387  }}
388  break;
389 
390  default:
391  NCBI_THROW(CSeqalignException, eUnsupported,
392  "CSeq_align::GetSeq_id() currently does not handle "
393  "this type of alignment.");
394  }
395 
396  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
397  "CSeq_align::GetSeq_id(): "
398  "can not get seq-id for the row requested.");
399  // return CSeq_id();
400 }
401 
402 
404 static const TScoreNamePair sc_ScoreNames[] = {
405  { CSeq_align::eScore_Score, "score" },
406  { CSeq_align::eScore_Blast, "score" },
407  { CSeq_align::eScore_BitScore, "bit_score" },
408  { CSeq_align::eScore_EValue, "e_value" },
409  { CSeq_align::eScore_AlignLength, "align_length" },
410  { CSeq_align::eScore_IdentityCount, "num_ident" },
411  { CSeq_align::eScore_PositiveCount, "num_positives" },
412  { CSeq_align::eScore_NegativeCount, "num_negatives" },
413  { CSeq_align::eScore_MismatchCount, "num_mismatch" },
414  { CSeq_align::eScore_GapCount, "gap_count" },
415  { CSeq_align::eScore_PercentIdentity_Gapped, "pct_identity_gap" },
416  { CSeq_align::eScore_PercentIdentity_Ungapped, "pct_identity_ungap" },
417  { CSeq_align::eScore_PercentIdentity_GapOpeningOnly, "pct_identity_gapopen_only" },
418  { CSeq_align::eScore_PercentCoverage, "pct_coverage" },
419  { CSeq_align::eScore_SumEValue, "sum_e" },
420  { CSeq_align::eScore_CompAdjMethod, "comp_adjustment_method" },
421  { CSeq_align::eScore_HighQualityPercentCoverage, "pct_coverage_hiqual" },
422  { CSeq_align::eScore_Matches, "matches"},
423  { CSeq_align::eScore_OverallIdentity, "identity"},
424  { CSeq_align::eScore_Splices, "splices"},
425  { CSeq_align::eScore_ConsensusSplices, "consensus_splices"},
426  { CSeq_align::eScore_ProductCoverage, "product_coverage"},
427  { CSeq_align::eScore_ExonIdentity, "exon_identity"}
428 };
429 
430 static const char* const sc_ScoreHelpText[] = {
431  "Blast score",
432  "Blast score",
433  "Blast-style bit score",
434  "Blast-style e-value",
435  "Length of the aligned segments, including the length of all gap segments",
436  "Count of identities",
437  "Count of positives; protein-to-DNA score",
438  "Count of negatives; protein-to-DNA score",
439  "Count of mismatches",
440  "Number of gaps in the alignment",
441  "Percent identity (0.0-100.0); count each base in a gap in any row as a mismatch",
442  "Percent identity (0.0-100.0); don't count gaps",
443  "Percent identity (0.0-100.0); count a gap of any length in any row as a mismatch of length 1",
444  "Percentage of query sequence aligned to subject (0.0-100.0)",
445  "Blast-style sum_e",
446  "Composition-adjustment method from BLAST",
447  "Percent coverage (0.0-100.0) of high quality region",
448  "Count of identities",
449  "Percent identity; count gaps within exons and introns on product",
450  "Number of splices; 2 x number of introns that have no gap on product",
451  "Number of splices with consensus splice sequence",
452  "Percentage of query sequence aligned to subject (0.0-100.0)",
453  "Percent identity; count gaps within exons only"
454 };
455 
456 static const bool sc_IsInteger[] = {
457  true, // eScore_Score
458  true, // eScore_Blast
459  false, // eScore_BitScore
460  false, // eScore_EValue
461  true, // eScore_AlignLength
462  true, // eScore_IdentityCount
463  true, // eScore_PositiveCount
464  true, // eScore_NegativeCount
465  true, // eScore_MismatchCount
466  true, // eScore_GapCount
467  false, // eScore_PercentIdentity_Gapped
468  false, // eScore_PercentIdentity_Ungapped
469  false, // eScore_PercentIdentity_GapOpeningOnly
470  false, // eScore_PercentCoverage
471  false, // eScore_SumEValue
472  false, // eScore_CompAdjMethod
473  false, // eScore_HighQualityPercentCoverage
474  true, // eScore_Matches
475  false, // eScore_OverallIdentity
476  true, // eScore_Splices
477  true, // eScore_ConsensusSplices
478  false, // eScore_ProductCoverage
479  false // eScore_ExonIdentity
480 };
481 
482 
484 {
485  DEFINE_STATIC_FAST_MUTEX(s_ScoreNameMap_mutex);
486  CFastMutexGuard guard(s_ScoreNameMap_mutex);
487  {
488  static TScoreNameMap m_ScoreNameMap;
489  if (m_ScoreNameMap.empty()) {
490  /// initialize map
491  for(unsigned score = eScore_Blast;
492  score <= eScore_ExonIdentity; ++score)
493  {
494  m_ScoreNameMap[sc_ScoreNames[score].second] =
495  sc_ScoreNames[score].first;
496  }
497  }
498 
499  return m_ScoreNameMap;
500  }
501 }
502 
504 {
505  return sc_ScoreNames[score].second;
506 }
507 
509 {
510  return sc_ScoreHelpText[score];
511 }
512 
514 {
515  return sc_IsInteger[score];
516 }
517 
518 
519 /// retrieve a named score object
521 {
522  CConstRef<CScore> score;
523  if (IsSetScore()) {
524  ITERATE (TScore, iter, GetScore()) {
525  if ((*iter)->IsSetId() && (*iter)->GetId().IsStr() &&
526  (*iter)->GetId().GetStr() == name) {
527  score = *iter;
528  break;
529  }
530  }
531  }
532 
533  return score;
534 }
535 
536 
538 {
539  CRef<CScore> score;
540  if (IsSetScore()) {
541  NON_CONST_ITERATE (TScore, iter, SetScore()) {
542  if ((*iter)->IsSetId() && (*iter)->GetId().IsStr() &&
543  (*iter)->GetId().GetStr() == name) {
544  score = *iter;
545  break;
546  }
547  }
548  }
549 
550  if ( !score ) {
551  score.Reset(new CScore);
552  score->SetId().SetStr(name);
553  SetScore().push_back(score);
554  }
555 
556  return score;
557 }
558 
559 
560 ///---------------------------------------------------------------------------
561 /// PRE : name of score to return
562 /// POST: whether or not we found that score; score converted to int
563 bool CSeq_align::GetNamedScore(const string &id, int &score) const
564 {
566  if (ref) {
567  if (ref->GetValue().IsInt()) {
568  score = ref->GetValue().GetInt();
569  } else {
570  score = (int)ref->GetValue().GetReal();
571  }
572  return true;
573  }
574  return false;
575 }
576 
577 ///---------------------------------------------------------------------------
578 /// PRE : name of score to return
579 /// POST: whether or not we found that score; score converted to double
580 bool CSeq_align::GetNamedScore(const string &id, double &score) const
581 {
583  if (ref) {
584  if (ref->GetValue().IsInt()) {
585  score = (double)ref->GetValue().GetInt();
586  } else {
587  score = ref->GetValue().GetReal();
588  }
589  return true;
590  }
591  return false;
592 }
593 
594 
596 {
597  return GetNamedScore(sc_ScoreNames[type].second, score);
598 }
599 
600 bool CSeq_align::GetNamedScore(EScoreType type, double& score) const
601 {
602  return GetNamedScore(sc_ScoreNames[type].second, score);
603 }
604 
605 
606 void CSeq_align::ResetNamedScore(const string& name)
607 {
608  if (IsSetScore()) {
609  NON_CONST_ITERATE (TScore, iter, SetScore()) {
610  if ((*iter)->IsSetId() && (*iter)->GetId().IsStr() &&
611  (*iter)->GetId().GetStr() == name) {
612  SetScore().erase(iter);
613  break;
614  }
615  }
616  }
617 }
618 
620 {
622 }
623 
625 {
627  ref->SetValue().SetInt(score);
628 }
629 
631 {
633  ref->SetValue().SetReal(score);
634 }
635 
636 void CSeq_align::SetNamedScore(const string& id, int score)
637 {
638  CRef<CScore> ref = x_SetNamedScore(id);
639  ref->SetValue().SetInt(score);
640 }
641 
642 void CSeq_align::SetNamedScore(const string& id, double score)
643 {
644  CRef<CScore> ref = x_SetNamedScore(id);
645  ref->SetValue().SetReal(score);
646 }
647 
648 
649 void CSeq_align::Validate(bool full_test) const
650 {
651  switch (GetSegs().Which()) {
652  case TSegs::e_Dendiag:
653  ITERATE(TSegs::TDendiag, dendiag_it, GetSegs().GetDendiag()) {
654  (*dendiag_it)->Validate();
655  }
656  break;
657  case C_Segs::e_Denseg:
658  GetSegs().GetDenseg().Validate(full_test);
659  break;
660  case C_Segs::e_Disc:
661  ITERATE(CSeq_align_set::Tdata, seq_align_it, GetSegs().GetDisc().Get()) {
662  (*seq_align_it)->Validate(full_test);
663  }
664  break;
665  case C_Segs::e_Std:
666  CheckNumRows();
667  break;
668  case C_Segs::e_Spliced:
669  GetSegs().GetSpliced().Validate(full_test);
670  break;
671  case C_Segs::e_Sparse:
672  GetSegs().GetSparse().Validate(full_test);
673  break;
674  default:
675  NCBI_THROW(CSeqalignException, eUnsupported,
676  "CSeq_align::Validate() currently does not handle "
677  "this type of alignment");
678  }
679 }
680 
681 
682 ///---------------------------------------------------------------------------
683 /// PRE : currently only implemented for dense-seg segments
684 /// POST: same alignment, opposite orientation
686 {
687  switch (GetSegs().Which()) {
688  case C_Segs::e_Denseg:
689  SetSegs().SetDenseg().Reverse();
690  break;
691  default:
692  NCBI_THROW(CSeqalignException, eUnsupported,
693  "CSeq_align::Reverse() currently only handles dense-seg "
694  "alignments");
695  }
696 }
697 
698 ///---------------------------------------------------------------------------
699 /// PRE : currently only implemented for dense-seg segments; two row numbers
700 /// POST: same alignment, position of the two rows has been swapped
702 {
703  switch (GetSegs().Which()) {
704  case C_Segs::e_Denseg:
705  SetSegs().SetDenseg().SwapRows(row1, row2);
706  break;
707 
708  case C_Segs::e_Std:
709  NON_CONST_ITERATE (C_Segs::TStd, it, SetSegs().SetStd()) {
710  (*it)->SwapRows(row1, row2);
711  }
712  break;
713 
714  case C_Segs::e_Disc:
715  SetSegs().SetDisc().SwapRows(row1, row2);
716  break;
717  default:
718  NCBI_THROW(CSeqalignException, eUnsupported,
719  "CSeq_align::SwapRows currently only handles dense-seg "
720  "alignments");
721  }
722 }
723 
724 ///----------------------------------------------------------------------------
725 /// PRE : the Seq-align has StdSeg segs
726 /// POST: Seq_align of type Dense-seg is created with m_Widths if necessary
729 {
730  if ( !GetSegs().IsStd() ) {
731  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
732  "CSeq_align::CreateDensegFromStdseg(): "
733  "Input Seq-align should have segs of type StdSeg!");
734  }
735 
737  sa->SetType(eType_not_set);
738  if (IsSetScore()) {
739  sa->SetScore() = GetScore();
740  }
741  CDense_seg& ds = sa->SetSegs().SetDenseg();
742 
743  typedef CDense_seg::TDim TNumrow;
744  typedef CDense_seg::TNumseg TNumseg;
745 
746  vector<TSeqPos> row_lens;
747  CDense_seg::TLens& seg_lens = ds.SetLens();
748  CDense_seg::TStarts& starts = ds.SetStarts();
749  CDense_seg::TStrands& strands = ds.SetStrands();
750  CDense_seg::TWidths& widths = ds.SetWidths();
751  vector<bool> widths_determined;
752 
753  TSeqPos row_len;
754  TSeqPos from, to;
755  ENa_strand strand;
756 
757 
758  TNumseg seg = 0;
759  TNumrow dim = 0, row = 0;
760  ITERATE (C_Segs::TStd, std_i, GetSegs().GetStd()) {
761 
762  const CStd_seg& ss = **std_i;
763 
764  seg_lens.push_back(0);
765  TSeqPos& seg_len = seg_lens.back();
766  row_lens.clear();
767  widths_determined.push_back(false);
768 
769  row = 0;
770  ITERATE (CStd_seg::TLoc, i, ss.GetLoc()) {
771 
772  const CSeq_id* seq_id = 0;
773  // push back initialization values
774  if (seg == 0) {
775  widths.push_back(0);
776  strands.push_back(eNa_strand_unknown);
777  }
778 
779  if ((*i)->IsInt()) {
780  const CSeq_interval& interval = (*i)->GetInt();
781 
782  // determine start and len
783  from = interval.GetFrom();
784  to = interval.GetTo();
785  starts.push_back(from);
786  row_len = to - from + 1;
787  row_lens.push_back(row_len);
788 
789  int width = 0;
790  // try to determine/check the seg_len and width
791  if (!seg_len) {
792  width = 0;
793  seg_len = row_len;
794  } else {
795  if (row_len * 3 == seg_len) {
796  seg_len /= 3;
797  width = 1;
798  } else if (row_len / 3 == seg_len) {
799  width = 3;
800  } else if (row_len != seg_len) {
801  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
802  "CreateDensegFromStdseg(): "
803  "Std-seg segment lengths not accurate!");
804  }
805  }
806  if (width > 0) {
807  widths_determined[seg] = true;
808  if (widths[row] > 0 && widths[row] != width) {
809  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
810  "CreateDensegFromStdseg(): "
811  "Std-seg segment lengths not accurate!");
812  } else {
813  widths[row] = width;
814  }
815  }
816 
817  // get the id
818  seq_id = &(*i)->GetInt().GetId();
819 
820  // determine/check the strand
821  if (interval.CanGetStrand()) {
822  strand = interval.GetStrand();
823  if (seg == 0 || strands[row] == eNa_strand_unknown) {
824  strands[row] = strand;
825  } else {
826  if (strands[row] != strand) {
828  eInvalidInputAlignment,
829  "CreateDensegFromStdseg(): "
830  "Inconsistent strands!");
831  }
832  }
833  }
834  } else if ((*i)->IsEmpty()) {
835  starts.push_back(-1);
836  if (seg == 0) {
837  strands[row] = eNa_strand_unknown;
838  }
839  seq_id = &(*i)->GetEmpty();
840  row_lens.push_back(0);
841  }
842 
843  // determine/check the id
844  if (seg == 0) {
845  CRef<CSeq_id> id(new CSeq_id);
846  SerialAssign(*id, *seq_id);
847  ds.SetIds().push_back(id);
848  } else {
849  CSeq_id& id = *ds.SetIds()[row];
850  if (!SerialEquals(id, *seq_id)) {
851  if (SeqIdChooser) {
852  SeqIdChooser->ChooseSeqId(id, *seq_id);
853  } else {
854  string errstr =
855  string("CreateDensegFromStdseg(): Seq-ids: ") +
856  id.AsFastaString() + " and " +
857  seq_id->AsFastaString() + " are not identical!" +
858  " Without the OM it cannot be determined if they belong to"
859  " the same sequence."
860  " Define and pass ChooseSeqId to resolve seq-ids.";
861  NCBI_THROW(CSeqalignException, eInvalidInputAlignment, errstr);
862  }
863  }
864  }
865 
866  // next row
867  row++;
868  if (seg == 0) {
869  dim++;
870  }
871  }
872  if (dim != ss.GetDim() || row != dim) {
873  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
874  "CreateDensegFromStdseg(): "
875  "Inconsistent dimentions!");
876  }
877 
878  if (widths_determined[seg]) {
879  // go back and determine/check widths
880  for (row = 0; row < dim; row++) {
881  if ((row_len = row_lens[row]) > 0) {
882  int width = 0;
883  if (row_len == seg_len * 3) {
884  width = 3;
885  } else if (row_len == seg_len) {
886  width = 1;
887  }
888  if (widths[row] > 0 && widths[row] != width) {
889  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
890  "CreateDensegFromStdseg(): "
891  "Std-seg segment lengths not accurate!");
892  } else {
893  widths[row] = width;
894  }
895  }
896  }
897  }
898 
899  // next seg
900  seg++;
901  }
902 
903  ds.SetDim(dim);
904  ds.SetNumseg(seg);
905 
906  // go back and finish lens determination
907  bool widths_failure = false;
908  bool widths_success = false;
909  for (seg = 0; seg < ds.GetNumseg(); seg++) {
910  if (!widths_determined[seg]) {
911  for(row = 0; row < dim; row++) {
912  if (starts[seg * dim + row] >= 0) {
913  int width = widths[row];
914  if (width == 3) {
915  seg_lens[seg] /= 3;
916  } else if (width == 0) {
917  widths_failure = true;
918  }
919  break;
920  }
921  }
922  } else {
923  widths_success = true;
924  }
925  }
926 
927  if (widths_failure) {
928  if (widths_success) {
929  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
930  "CreateDensegFromStdseg(): "
931  "Some widths cannot be determined!");
932  } else {
933  ds.ResetWidths();
934  }
935  }
936 
937  // finish the strands
938  for (seg = 1; seg < ds.GetNumseg(); seg++) {
939  for (row = 0; row < dim; row++) {
940  strands.push_back(strands[row]);
941  }
942  }
943 
944  return sa;
945 }
946 
947 
948 ///----------------------------------------------------------------------------
949 /// PRE : the Seq-align is a Dense-seg of aligned nucleotide sequences
950 /// POST: Seq_align of type Dense-seg is created with each of the m_Widths
951 /// equal to 3 and m_Lengths devided by 3.
954 {
955  if ( !GetSegs().IsDenseg() ) {
956  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
957  "CSeq_align::CreateTranslatedDensegFromNADenseg(): "
958  "Input Seq-align should have segs of type Dense-seg!");
959  }
960 
962  sa->SetType(eType_not_set);
963 
964  if (GetSegs().GetDenseg().IsSetWidths()) {
965  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
966  "CSeq_align::CreateTranslatedDensegFromNADenseg(): "
967  "Widths already exist for the original alignment");
968  }
969 
970  // copy from the original
971  sa->Assign(*this);
972 
973  CDense_seg& ds = sa->SetSegs().SetDenseg();
974 
975  // fix the lengths
976  const CDense_seg::TLens& orig_lens = GetSegs().GetDenseg().GetLens();
977  CDense_seg::TLens& lens = ds.SetLens();
978 
979  for (CDense_seg::TNumseg numseg = 0; numseg < ds.GetNumseg(); numseg++) {
980  if (orig_lens[numseg] % 3) {
981  string errstr =
982  string("CSeq_align::CreateTranslatedDensegFromNADenseg(): ") +
983  "Length of segment " + NStr::IntToString(numseg) +
984  " is not divisible by 3.";
985  NCBI_THROW(CSeqalignException, eInvalidInputAlignment, errstr);
986  } else {
987  lens[numseg] = orig_lens[numseg] / 3;
988  }
989  }
990 
991  // add the widths
992  ds.SetWidths().resize(ds.GetDim(), 3);
993 
994 #if _DEBUG
995  ds.Validate(true);
996 #endif
997 
998  return sa;
999 }
1000 
1001 
1002 /// Strict weak ordering for pairs (by first)
1003 /// Used by CreateDensegFromDisc
1004 template <typename T, typename Pred = less<TSeqPos> >
1005 struct ds_cmp {
1006  bool operator()(const T& x, const T& y) {
1007  return m_Pred(x.first, y.first);
1008  }
1009 private:
1010  Pred m_Pred;
1011 };
1012 
1013 
1016 {
1017  if ( !GetSegs().IsDisc() ) {
1018  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
1019  "CSeq_align::CreateDensegFromDisc(): "
1020  "Input Seq-align should have segs of type StdSeg!");
1021  }
1022 
1023  CRef<CSeq_align> new_sa(new CSeq_align);
1024  new_sa->SetType(eType_not_set);
1025  if (IsSetScore()) {
1026  new_sa->SetScore() = GetScore();
1027  }
1028 
1029  CDense_seg& new_ds = new_sa->SetSegs().SetDenseg();
1030 
1031  new_ds.SetDim(0);
1032  new_ds.SetNumseg(0);
1033 
1034 
1035  /// Order the discontinuous densegs
1036  typedef pair<TSeqPos, const CDense_seg *> TPosDsPair;
1037  typedef vector<TPosDsPair> TDsVec;
1038  TDsVec ds_vec;
1039  ds_vec.reserve(GetSegs().GetDisc().Get().size());
1040  int strand = -1;
1041  ITERATE (CSeq_align_set::Tdata, sa_i, GetSegs().GetDisc().Get()) {
1042  const CDense_seg& ds = (*sa_i)->GetSegs().GetDenseg();
1043  ds_vec.push_back(make_pair<TSeqPos, const CDense_seg *>(ds.GetSeqStart(0), &ds));
1044  if (ds.IsSetStrands() &&
1045  !ds.GetStrands().empty()) {
1046  if (strand < 0) {
1047  strand = ds.GetStrands()[0];
1048  } else {
1049  if (strand != ds.GetStrands()[0]) {
1050  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
1051  "CreateDensegFromDisc(): "
1052  "Inconsistent strands!");
1053  }
1054  }
1055  }
1056  }
1057  if ( !IsReverse(ENa_strand(strand)) ) {
1058  sort(ds_vec.begin(), ds_vec.end(),
1059  ds_cmp<TPosDsPair>());
1060  } else {
1061  sort(ds_vec.begin(), ds_vec.end(),
1062  ds_cmp<TPosDsPair, greater<TSeqPos> >());
1063  }
1064 
1065 
1066  /// First pass: determine dim & numseg
1067  CDense_seg::TStrands single_segment_strands;
1068  ITERATE(TDsVec, ds_i, ds_vec) {
1069  const CDense_seg& ds = *ds_i->second;
1070 
1071  /// Numseg
1072  new_ds.SetNumseg() += ds.GetNumseg();
1073 
1074  /// Dim
1075  if ( !new_ds.GetDim() ) {
1076  new_ds.SetDim(ds.GetDim());
1077  } else if (new_ds.GetDim() != ds.GetDim() ) {
1078  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
1079  "CreateDensegFromDisc(): "
1080  "All disc dense-segs need to have the same dimension!");
1081  }
1082 
1083  /// Strands?
1084  if ( !ds.GetStrands().empty() ) {
1085  if (single_segment_strands.empty()) {
1086  single_segment_strands.assign(ds.GetStrands().begin(),
1087  ds.GetStrands().begin() + ds.GetDim());
1088  } else {
1089  if ( !equal(single_segment_strands.begin(),
1090  single_segment_strands.end(),
1091  ds.GetStrands().begin()) ) {
1092  NCBI_THROW(CSeqalignException, eInvalidInputAlignment,
1093  "CreateDensegFromDisc(): "
1094  "All disc dense-segs need to have the same strands!");
1095  }
1096  }
1097  }
1098  }
1099 
1100  new_ds.SetStarts().resize(new_ds.GetDim() * new_ds.GetNumseg());
1101  new_ds.SetLens().resize(new_ds.GetNumseg());
1102  if ( !single_segment_strands.empty() ) {
1103  /// Multiply the strands by the number of segments.
1104  new_ds.SetStrands().reserve(new_ds.GetDim() * new_ds.GetNumseg());
1105  for (CDense_seg::TNumseg seg = 0;
1106  seg < new_ds.GetNumseg();
1107  ++seg) {
1108  new_ds.SetStrands().insert(new_ds.SetStrands().end(),
1109  single_segment_strands.begin(),
1110  single_segment_strands.begin() + new_ds.GetDim());
1111  }
1112  }
1113 
1114 
1115  /// Second pass: validate and set ids and starts
1116 
1117  CDense_seg::TNumseg new_seg = 0;
1118  int new_starts_i = 0;
1119 
1120  ITERATE(TDsVec, ds_i, ds_vec) {
1121  const CDense_seg& ds = *ds_i->second;
1122 
1123  _ASSERT(ds.GetStarts().size() == (size_t)ds.GetNumseg() * ds.GetDim());
1124  _ASSERT(new_ds.GetDim() == ds.GetDim());
1125 
1126  /// Ids
1127  if (new_ds.GetIds().empty()) {
1128  new_ds.SetIds().resize(new_ds.GetDim());
1129  for (CDense_seg::TDim row = 0; row < ds.GetDim(); ++row) {
1130  CRef<CSeq_id> id(new CSeq_id);
1131  SerialAssign(*id, *ds.GetIds()[row]);
1132  new_ds.SetIds()[row] = id;
1133  }
1134  } else {
1135  if (SeqIdChooser) {
1136  for (CDense_seg::TDim row = 0; row < ds.GetDim(); ++row) {
1137  SeqIdChooser->ChooseSeqId(*new_ds.SetIds()[row], *ds.GetIds()[row]);
1138  }
1139  } else {
1140 #if _DEBUG
1141  for (CDense_seg::TDim row = 0; row < ds.GetDim(); ++row) {
1142  const CSeq_id& new_id = *new_ds.GetIds()[row];
1143  const CSeq_id& id = *ds.GetIds()[row];
1144  if ( !SerialEquals(new_id, id) ) {
1145  string errstr =
1146  string("CreateDensegFromDisc(): Seq-ids: ") +
1147  new_id.AsFastaString() + " and " +
1148  id.AsFastaString() + " are not identical!" +
1149  " Without the OM it cannot be determined if they belong to"
1150  " the same sequence."
1151  " Define and pass ChooseSeqId to resolve seq-ids.";
1152  NCBI_THROW(CSeqalignException, eInvalidInputAlignment, errstr);
1153  }
1154  }
1155 #endif
1156  }
1157  }
1158 
1159  /// Starts
1160  int starts_i = 0;
1161  for (CDense_seg::TNumseg seg = 0;
1162  seg < ds.GetNumseg();
1163  ++new_seg, ++seg) {
1164 
1165  new_ds.SetLens()[new_seg] = ds.GetLens()[seg];
1166 
1167  for (CDense_seg::TDim row = 0;
1168  row < ds.GetDim();
1169  ++starts_i, ++new_starts_i, ++row) {
1170  new_ds.SetStarts()[new_starts_i] = ds.GetStarts()[starts_i];
1171  }
1172  }
1173  }
1174 
1175 
1176  /// Perform full test (including segment starts consistency check)
1177  new_sa->Validate(true);
1178 
1179 
1180  return new_sa;
1181 }
1182 
1183 static TSeqPos s_Distance(const TSeqRange &range1,
1184  const TSeqRange &range2)
1185 {
1186  if (range1.IntersectingWith(range2)) {
1187  return 0;
1188  }
1189  return max(range1,range2).GetFrom() - min(range1,range2).GetTo();
1190 }
1191 
1192 /// Join the alignments in the set into one alignment.
1194 {
1195  if (aligns.Get().size() == 1) {
1196  return aligns.Get().front();
1197  }
1198 
1199  /// There's more than one, so create a disc alignment
1200  CRef<CSeq_align> align(new CSeq_align);
1201  align->SetDim(2);
1203  align->SetSegs().SetDisc().Set() = aligns.Get();
1204 
1205  try {
1206  align->Validate(true);
1207  }
1208  catch (CException& e) {
1209  ERR_POST(Error << e);
1210  cerr << MSerial_AsnText << aligns;
1211  cerr << MSerial_AsnText << *align;
1212  throw;
1213  }
1214  return align;
1215 }
1216 
1218  TSeqPos discontinuity_threshold) const
1219 {
1220  if (IsSetDim() && GetDim() > 2) {
1222  "SplitOnLongDiscontinuity() only implemented for pairwise alignments");
1223  }
1224 
1225  size_t num_splits = 0;
1226  switch (GetSegs().Which()) {
1227  case TSegs::e_Denseg:
1228  {{
1229  // determine if we can extract any slice based on a discontinuity
1230  const CDense_seg& seg = GetSegs().GetDenseg();
1231  CDense_seg::TNumseg last_seg = 0;
1232  CDense_seg::TNumseg curr_seg = 0;
1233  TSeqRange curr_range0;
1234  TSeqRange curr_range1;
1235  for ( ; curr_seg < seg.GetNumseg(); ++curr_seg) {
1236  TSignedSeqPos p0_curr = seg.GetStarts()[ curr_seg * 2 ];
1237  TSignedSeqPos p1_curr = seg.GetStarts()[ curr_seg * 2 + 1];
1238  if (p0_curr < 0 || p1_curr < 0) {
1239  continue;
1240  }
1241 
1242  TSeqPos curr_seg_len = seg.GetLens()[curr_seg];
1243 
1244  TSeqRange prev_range0 = curr_range0;
1245  TSeqRange prev_range1 = curr_range1;
1246  curr_range0 = TSeqRange(p0_curr, p0_curr+curr_seg_len);
1247  curr_range1 = TSeqRange(p1_curr, p1_curr+curr_seg_len);
1248 
1249  if (prev_range0.Empty()) {
1250  /// We're at start of alignment, so there's no gap to check for
1251  continue;
1252  }
1253 
1254  if (prev_range0.NotEmpty() &&
1255  (s_Distance(curr_range0, prev_range0) > discontinuity_threshold ||
1256  s_Distance(curr_range1, prev_range1) > discontinuity_threshold))
1257  {
1258  aligns.push_back(x_CreateSubsegAlignment(last_seg, curr_seg - 1));
1259  ++num_splits;
1260  last_seg = curr_seg;
1261  }
1262  }
1263  if (num_splits) {
1264  /// We had to split the alignment; create a final subseg alignment
1265  aligns.push_back(x_CreateSubsegAlignment(last_seg, curr_seg - 1));
1266  ++num_splits;
1267  }
1268  }}
1269  break;
1270 
1271  case TSegs::e_Disc:
1272  {{
1273  const CSeq_align_set &segs = GetSegs().GetDisc();
1274  CSeq_align_set curr_disc;
1275  TSeqRange curr_range0;
1276  TSeqRange curr_range1;
1277  ITERATE (list< CRef<CSeq_align> >, seg_it, segs.Get()) {
1278  TSeqRange prev_range0 = curr_range0;
1279  TSeqRange prev_range1 = curr_range1;
1280  curr_range0 = (*seg_it)->GetSeqRange(0);
1281  curr_range1 = (*seg_it)->GetSeqRange(1);
1282  list< CRef<CSeq_align> > seg_splits;
1283  (*seg_it)->SplitOnLongDiscontinuity(seg_splits,
1284  discontinuity_threshold);
1285 
1286  if (prev_range0.Empty() ||
1287  (s_Distance(curr_range0, prev_range0) <= discontinuity_threshold &&
1288  s_Distance(curr_range1, prev_range1) <= discontinuity_threshold))
1289  {
1290  /// distance between this alignment and previous one is less than
1291  /// threshold; first split section of this alignment should be joined
1292  /// to previous one
1293  curr_disc.Set().push_back(seg_splits.front());
1294  seg_splits.pop_front();
1295  if (seg_splits.empty()) {
1296  continue;
1297  }
1298  }
1299 
1300  /// Put current contents of curr_disc in aligns
1301  aligns.push_back(s_GetJoinedAlignment(curr_disc));
1302  ++num_splits;
1303 
1304  /// Put results of current section's split in aligns except for last
1305  /// section; place last section in curr_disc, since it may have to be
1306  /// joined with next one
1307  num_splits += seg_splits.size();
1308  curr_disc.Set().clear();
1309  curr_disc.Set().push_back(seg_splits.back());
1310  seg_splits.pop_back();
1311 
1312  aligns.splice(aligns.end(), seg_splits);
1313  }
1314  if (num_splits) {
1315  /// We had to split the alignment; create a final alignment from
1316  /// the contents of curr_disc
1317  aligns.push_back(s_GetJoinedAlignment(curr_disc));
1318  ++num_splits;
1319  }
1320  }}
1321  break;
1322 
1323  default:
1324  break;
1325  }
1326 
1327  if (!num_splits) {
1328  aligns.push_back(CRef<CSeq_align>(const_cast<CSeq_align *>(this)));
1329  }
1330 }
1331 
1334 {
1335  if (offset == 0) return;
1336 
1337  switch (SetSegs().Which()) {
1338  case TSegs::e_Dendiag:
1339  NON_CONST_ITERATE(TSegs::TDendiag, dendiag_it, SetSegs().SetDendiag()) {
1340  (*dendiag_it)->OffsetRow(row, offset);
1341  }
1342  break;
1343  case TSegs::e_Denseg:
1345  break;
1346  case TSegs::e_Std:
1347  NON_CONST_ITERATE(TSegs::TStd, std_it, SetSegs().SetStd()) {
1348  (*std_it)->OffsetRow(row, offset);
1349  }
1350  break;
1351  case TSegs::e_Disc:
1352  NON_CONST_ITERATE(CSeq_align_set::Tdata, seq_align_it, SetSegs().SetDisc().Set()) {
1353  (*seq_align_it)->OffsetRow(row, offset);
1354  }
1355  break;
1356  default:
1357  NCBI_THROW(CSeqalignException, eUnsupported,
1358  "CSeq_align::OffsetRow() currently does not handle "
1359  "this type of alignment");
1360  }
1361 }
1362 
1363 
1364 /// @deprecated
1366  const CSeq_loc& dst_loc,
1367  bool ignore_strand)
1368 {
1369  // Limit to certain types of locs:
1370  switch (dst_loc.Which()) {
1371  case CSeq_loc::e_Whole:
1372  return;
1373  case CSeq_loc::e_Int:
1374  break;
1375  default:
1376  NCBI_THROW(CSeqalignException, eUnsupported,
1377  "CSeq_align::RemapToLoc only supports int target seq-locs");
1378  }
1379 
1380  switch (SetSegs().Which()) {
1381  case TSegs::e_Denseg:
1382  SetSegs().SetDenseg().RemapToLoc(row, dst_loc, ignore_strand);
1383  break;
1384  case TSegs::e_Std:
1385  NON_CONST_ITERATE(TSegs::TStd, std_it, SetSegs().SetStd()) {
1386  (*std_it)->RemapToLoc(row, dst_loc, ignore_strand);
1387  }
1388  break;
1389  case TSegs::e_Disc:
1390  NON_CONST_ITERATE(CSeq_align_set::Tdata, seq_align_it, SetSegs().SetDisc().Set()) {
1391  (*seq_align_it)->RemapToLoc(row, dst_loc, ignore_strand);
1392  }
1393  break;
1394  default:
1395  NCBI_THROW(CSeqalignException, eUnsupported,
1396  "CSeq_align::RemapToLoc only supports Dense-seg and Std-seg alignments.");
1397  }
1398 }
1399 
1400 
1403  const CSeq_loc& loc)
1404 {
1405  if ( loc.IsWhole() ) {
1407  copy->Assign(align);
1408  return copy;
1409  }
1410  const CSeq_id* orig_id = loc.GetId();
1411  if ( !orig_id ) {
1412  NCBI_THROW(CAnnotMapperException, eBadLocation,
1413  "Location with multiple ids can not be used to "
1414  "remap seq-aligns.");
1415  }
1416  CRef<CSeq_id> id(new CSeq_id);
1417  id->Assign(*orig_id);
1418 
1419  // Create source seq-loc
1420  TSeqPos len = 0;
1421  for (CSeq_loc_CI it(loc); it; ++it) {
1422  if ( it.IsWhole() ) {
1423  NCBI_THROW(CAnnotMapperException, eBadLocation,
1424  "Whole seq-loc can not be used to "
1425  "remap seq-aligns.");
1426  }
1427  len += it.GetRange().GetLength();
1428  }
1429  CSeq_loc src_loc(*id, 0, len - 1);
1430  ENa_strand strand = loc.GetStrand();
1431  if (strand != eNa_strand_unknown) {
1432  src_loc.SetStrand(strand);
1433  }
1434  CSeq_loc_Mapper_Base mapper(src_loc, loc);
1435  return mapper.Map(align, row);
1436 }
1437 
1438 /// Get length of intersection between a range and a range collection
1440  const TSeqRange &range)
1441 {
1442  TSeqPos length = 0;
1443  ITERATE (CRangeCollection<TSeqPos>, it, ranges) {
1444  length += it->IntersectionWith(range).GetLength();
1445  }
1446  return length;
1447 }
1448 
1449 /// Retrieves the number of gaps in an alignment
1450 /// @param align Alignment to examine [in]
1451 /// @param get_total_count if true, the total number of gaps is retrived,
1452 /// otherwise only the number of gap openings is retrieved
1453 /// @throws CSeqalignException if alignment type is not supported
1454 static TSeqPos
1456  const CRangeCollection<TSeqPos> &ranges,
1457  bool get_total_count)
1458 {
1459  if (ranges.empty()) {
1460  return 0;
1461  }
1462 
1463  TSeqPos retval = 0;
1464  switch (align.GetSegs().Which()) {
1466  {{
1467  const CDense_seg& ds = align.GetSegs().GetDenseg();
1468  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
1469  bool is_gapped = false;
1470  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
1471  if (ds.GetStarts()[i * ds.GetDim() + j] == -1 &&
1472  (row < 0 || row == j))
1473  {
1474  is_gapped = true;
1475  break;
1476  }
1477  }
1478  if (is_gapped) {
1479  TSeqPos gap_len = ds.GetLens()[i];
1480  if (!ranges.begin()->IsWhole()) {
1481  TSignedSeqPos gap_start = ds.GetStarts()[i * ds.GetDim()];
1482  if (gap_start >= 0) {
1483  gap_len = s_IntersectionLength(ranges,
1484  TSeqRange(gap_start, gap_start + gap_len - 1));
1485  } else {
1486  gap_start = ds.GetStarts()[(i-1) * ds.GetDim()]
1487  + ds.GetLens()[i-1];
1488  if (s_IntersectionLength(ranges,
1489  TSeqRange(gap_start, gap_start)) == 0)
1490  {
1491  gap_len = 0;
1492  }
1493  }
1494  }
1495  retval += (get_total_count ? gap_len : (gap_len ? 1 : 0));
1496  }
1497  }
1498  }}
1499  break;
1500 
1502  {{
1504  align.GetSegs().GetDisc().Get()) {
1505  retval += s_GetGapCount(**iter, row, ranges, get_total_count);
1506  }
1507  }}
1508  break;
1509 
1511  {{
1512  ITERATE (CSpliced_seg::TExons, iter, align.GetSegs().GetSpliced().GetExons()) {
1513  for (CSeq_align::TDim r = 0; r < 2; ++r) {
1514  if (row != r) {
1515  CRangeCollection<TSeqPos> insertions =
1516  (*iter)->GetRowSeq_insertions(
1517  r, align.GetSegs().GetSpliced(), ranges);
1518  retval += get_total_count ? insertions.GetCoveredLength()
1519  : insertions.size();
1520  }
1521  }
1522  }
1523  }}
1524  break;
1525 
1527  if (row < 0 && !get_total_count && ranges.begin()->IsWhole()) {
1528  ITERATE (CSeq_align::TSegs::TStd, iter, align.GetSegs().GetStd()) {
1529  const CStd_seg& seg = **iter;
1530  ITERATE (CStd_seg::TLoc, it, seg.GetLoc()) {
1531  const CSeq_loc& loc = **it;
1532  if (loc.IsEmpty()) {
1533  ++retval;
1534  break;
1535  }
1536  }
1537  }
1538  break;
1539  }
1540 
1541  default:
1542  NCBI_THROW(CSeqalignException, eUnsupported,
1543  "CSeq_align::GetGapCount() currently does not handle "
1544  "this type of alignment.");
1545  }
1546 
1547  return retval;
1548 }
1549 
1551 {
1552  return s_GetGapCount(*this, row,
1554  true);
1555 }
1556 
1558 {
1559  return s_GetGapCount(*this, row,
1561  false);
1562 }
1563 
1565  const TSeqRange &range, TDim row) const
1566 {
1567  return s_GetGapCount(*this, row, CRangeCollection<TSeqPos>(range), true);
1568 }
1569 
1571  const TSeqRange &range, TDim row) const
1572 {
1573  return s_GetGapCount(*this, row, CRangeCollection<TSeqPos>(range), false);
1574 }
1575 
1577  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1578 {
1579  return s_GetGapCount(*this, row, ranges, true);
1580 }
1581 
1583  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1584 {
1585  return s_GetGapCount(*this, row, ranges, false);
1586 }
1587 
1588 string CSeq_align::SIndel::AsString(int row_pos) const
1589 {
1590  return NStr::NumericToString(row_pos == 0 ? product_pos : genomic_pos)
1591  + (row == 0 ? 'I' : 'D')
1593 }
1594 
1595 static vector<CSeq_align::SIndel>
1597  const CRangeCollection<TSeqPos> &ranges,
1598  bool include_frameshifts, bool include_non_frameshifts)
1599 {
1600  vector<CSeq_align::SIndel> results;
1601  if (ranges.empty()) {
1602  return results;;
1603  }
1604 
1605  CRef<CSeq_align> generated_denseg;
1606  switch (align.GetSegs().Which()) {
1608  break;
1609 
1611  generated_denseg = align.CreateDensegFromDisc();
1612  break;
1613 
1615  {{
1616  CRef<CSeq_align> as_disc_seg =
1617  align.GetSegs().GetSpliced().AsDiscSeg();
1618  if (align.GetSeqStart(1) > align.GetSeqStop(1)) {
1619  /// genomic sequence is circular; won't work on the entire
1620  /// alignment, need to check separately on the parts before and
1621  /// after the origin
1622  CSeq_align before_origin, after_origin;
1623  before_origin.SetType(CSeq_align::eType_disc);
1624  after_origin.SetType(CSeq_align::eType_disc);
1625  for (const CRef<CSeq_align> &segment
1626  : as_disc_seg->GetSegs().GetDisc().Get())
1627  {
1628  (segment->GetSeqStart(1) < align.GetSeqStart(1)
1629  ? after_origin : before_origin)
1630  . SetSegs().SetDisc().Set().push_back(segment);
1631  }
1632  vector<CSeq_align::SIndel> before_origin_frameshifts =
1633  s_GetIndels(before_origin, row, ranges, include_frameshifts, include_non_frameshifts),
1634  after_origin_frameshifts =
1635  s_GetIndels(after_origin, row, ranges, include_frameshifts, include_non_frameshifts);
1636  results = before_origin_frameshifts;
1637  results.insert(results.end(), after_origin_frameshifts.begin(),
1638  after_origin_frameshifts.end());
1639  return results;
1640  }
1641  generated_denseg = as_disc_seg->CreateDensegFromDisc();
1642  }}
1643  break;
1644 
1645  default:
1646  NCBI_THROW(CSeqalignException, eUnsupported,
1647  "CSeq_align::GetNumFrameshifts() currently does not handle "
1648  "this type of alignment.");
1649  }
1650 
1651  if (generated_denseg) {
1652  generated_denseg->SetSegs().SetDenseg().OrderAdjacentGaps();
1653  generated_denseg->SetSegs().SetDenseg().Compact();
1654  generated_denseg->SetSegs().SetDenseg().RemovePureGapSegs();
1655  }
1656 
1657  const CDense_seg &ds = (generated_denseg ? *generated_denseg : align)
1658  . GetSegs().GetDenseg();
1659 
1660  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
1661  bool is_gapped = false;
1662  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
1663  if (ds.GetStarts()[i * ds.GetDim() + j] == -1 && row != j)
1664  {
1665  is_gapped = true;
1666  break;
1667  }
1668  }
1669  if (is_gapped) {
1670  TSeqPos gap_len = ds.GetLens()[i];
1671  if (!ranges.begin()->IsWhole()) {
1672  TSignedSeqPos gap_start = ds.GetStarts()[i * ds.GetDim()];
1673  if (gap_start >= 0) {
1674  gap_len = s_IntersectionLength(ranges,
1675  TSeqRange(gap_start, gap_start + gap_len - 1));
1676  } else {
1677  gap_start = ds.GetStarts()[(i-1) * ds.GetDim()]
1678  + ds.GetLens()[i-1];
1679  if (s_IntersectionLength(ranges,
1680  TSeqRange(gap_start, gap_start)) == 0)
1681  {
1682  gap_len = 0;
1683  }
1684  }
1685  }
1686  if (gap_len > 0 &&
1687  ((include_frameshifts && gap_len % 3 != 0) ||
1688  (include_non_frameshifts && gap_len % 3 == 0)))
1689  {
1690  TSignedSeqPos product_gap_start = ds.GetStarts()[i*ds.GetDim()];
1691  TSignedSeqPos genomic_gap_start = ds.GetStarts()[i*ds.GetDim() + 1];
1692  CSeq_align::TDim inserted_row = product_gap_start < 0 ? 1 : 0;
1693  if (product_gap_start < 0) {
1694  ENa_strand product_strand = ds.IsSetStrands() ? ds.GetStrands()[(i-1) * ds.GetDim()] : eNa_strand_plus;
1695  product_gap_start = product_strand == eNa_strand_minus
1696  ? ds.GetStarts()[(i+1) * ds.GetDim()]
1697  + ds.GetLens()[i+1]
1698  : ds.GetStarts()[(i-1) * ds.GetDim()]
1699  + ds.GetLens()[i-1];
1700  } else {
1701  ENa_strand genomic_strand = ds.IsSetStrands() ? ds.GetStrands()[(i-1) * ds.GetDim() + 1] : eNa_strand_plus;
1702  genomic_gap_start = genomic_strand == eNa_strand_minus
1703  ? ds.GetStarts()[(i+1) * ds.GetDim() + 1]
1704  + ds.GetLens()[i+1]
1705  : ds.GetStarts()[(i-1) * ds.GetDim() + 1]
1706  + ds.GetLens()[i-1];
1707  }
1708  results.push_back(CSeq_align::SIndel(product_gap_start, genomic_gap_start, inserted_row, gap_len));
1709  }
1710  }
1711  }
1712 
1713  return results;
1714 }
1715 
1717 {
1718  return static_cast<TSeqPos>(
1719  s_GetIndels(*this, row,
1721  true, false)
1722  .size());
1723 }
1724 
1726  const TSeqRange &range, TDim row) const
1727 {
1728  return static_cast<TSeqPos>(
1729  s_GetIndels(*this, row, CRangeCollection<TSeqPos>(range), true, false)
1730  .size());
1731 }
1732 
1734  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1735 {
1736  return static_cast<TSeqPos>(
1737  s_GetIndels(*this, row, ranges, true, false)
1738  .size());
1739 }
1740 
1741 vector<CSeq_align::SIndel> CSeq_align::GetFrameshifts(TDim row) const
1742 {
1743  return s_GetIndels(*this, row,
1745 }
1746 
1747 vector<CSeq_align::SIndel> CSeq_align::GetFrameshiftsWithinRange(
1748  const TSeqRange &range, TDim row) const
1749 {
1750  return s_GetIndels(*this, row, CRangeCollection<TSeqPos>(range), true, false);
1751 }
1752 
1753 vector<CSeq_align::SIndel> CSeq_align::GetFrameshiftsWithinRanges(
1754  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1755 {
1756  return s_GetIndels(*this, row, ranges, true, false);
1757 }
1758 
1759 vector<CSeq_align::SIndel> CSeq_align::GetNonFrameshifts(TDim row) const
1760 {
1761  return s_GetIndels(*this, row,
1763 }
1764 
1765 vector<CSeq_align::SIndel> CSeq_align::GetNonFrameshiftsWithinRange(
1766  const TSeqRange &range, TDim row) const
1767 {
1768  return s_GetIndels(*this, row, CRangeCollection<TSeqPos>(range), false, true);
1769 }
1770 
1772  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1773 {
1774  return s_GetIndels(*this, row, ranges, false, true);
1775 }
1776 
1777 vector<CSeq_align::SIndel> CSeq_align::GetIndels(TDim row) const
1778 {
1779  return s_GetIndels(*this, row,
1781 }
1782 
1783 vector<CSeq_align::SIndel> CSeq_align::GetIndelsWithinRange(
1784  const TSeqRange &range, TDim row) const
1785 {
1786  return s_GetIndels(*this, row, CRangeCollection<TSeqPos>(range), true, true);
1787 }
1788 
1789 vector<CSeq_align::SIndel> CSeq_align::GetIndelsWithinRanges(
1790  const CRangeCollection<TSeqPos> &ranges, TDim row) const
1791 {
1792  return s_GetIndels(*this, row, ranges, true, true);
1793 }
1794 
1795 
1797 {
1799  switch (GetSegs().Which()) {
1801  {{
1802  const CDense_seg& ds = GetSegs().GetDenseg();
1803  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
1804  bool is_gapped = false;
1805  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
1806  if (ds.GetStarts()[i * ds.GetDim() + j] == -1)
1807  {
1808  is_gapped = true;
1809  break;
1810  }
1811  }
1812  if (!is_gapped) {
1813  TSignedSeqPos start = ds.GetStarts()[i * ds.GetDim() + row];
1814  ranges += TSeqRange(start, start + ds.GetLens()[i] - 1);
1815  }
1816  }
1817  }}
1818  break;
1819 
1821  {{
1823  GetSegs().GetDisc().Get()) {
1824  ranges += (*iter)->GetAlignedBases(row);
1825  }
1826  }}
1827  break;
1828 
1830  {{
1831  ITERATE (CSpliced_seg::TExons, iter, GetSegs().GetSpliced().GetExons()) {
1832  ranges += (*iter)->GetRowSeq_range(row, true);
1833  ranges -= (*iter)->GetRowSeq_insertions(row, GetSegs().GetSpliced());
1834  }
1835  }}
1836  break;
1837 
1838  default:
1839  NCBI_THROW(CSeqalignException, eUnsupported,
1840  "CSeq_align::GetInsertedRanges() currently does not handle "
1841  "this type of alignment.");
1842  }
1843 
1844  return ranges;
1845 }
1846 
1847 
1848 /// Get length of dense-seg alignment within range
1849 static inline TSeqPos s_DenseSegLength(const CDense_seg& ds,
1850  CDense_seg::TNumseg seg,
1851  const CRangeCollection<TSeqPos> &ranges)
1852 {
1853  if(ranges.begin()->IsWhole())
1854  return ds.GetLens()[seg];
1855 
1856  TSignedSeqPos start = ds.GetStarts()[seg * ds.GetDim()];
1857  TSeqRange seg_range(start, start + ds.GetLens()[seg] - 1);
1858  return s_IntersectionLength(ranges, seg_range);
1859 }
1860 
1861 //////////////////////////////////////////////////////////////////////////////
1862 ///
1863 /// calculate the length of our alignment within given range
1864 ///
1866  const CRangeCollection<TSeqPos> &ranges,
1867  bool ungapped)
1868 {
1869  if (ranges.empty()) {
1870  return 0;
1871  }
1872 
1873  TSeqPos len = 0;
1874  switch (align.GetSegs().Which()) {
1876  {{
1877  const CDense_seg& ds = align.GetSegs().GetDenseg();
1878  if (ungapped) {
1879  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
1880  bool is_gap_seg = false;
1881  for (CDense_seg::TDim j = 0;
1882  !is_gap_seg && j < ds.GetDim(); ++j) {
1883  //int start = ds.GetStarts()[i * ds.GetDim() + j];
1884  if (ds.GetStarts()[i * ds.GetDim() + j] == -1) {
1885  is_gap_seg = true;
1886  }
1887  }
1888  if ( !is_gap_seg ) {
1889  len += s_DenseSegLength(ds, i, ranges);
1890  }
1891  }
1892  } else {
1893  for (CDense_seg::TNumseg i = 0; size_t(i) < ds.GetLens().size();
1894  ++i) {
1895  len += s_DenseSegLength(ds, i, ranges);
1896  }
1897  }
1898  }}
1899  break;
1900 
1903  align.GetSegs().GetDisc().Get()) {
1904  len += s_GetAlignmentLength(**iter, ranges, ungapped);
1905  }
1906  break;
1907 
1909  {{
1910  if(!ranges.begin()->IsWhole())
1911  NCBI_THROW(CSeqalignException, eUnsupported,
1912  "Can't calculate alignment length within a range "
1913  "for standard seg representation");
1914 
1915  /// pass 1:
1916  /// find total ranges
1917  vector<TSeqPos> sizes;
1918 
1919  ITERATE (CSeq_align::TSegs::TStd, iter, align.GetSegs().GetStd()) {
1920  const CStd_seg& seg = **iter;
1921 
1922  bool is_gap = false;
1923  ITERATE (CStd_seg::TLoc, it, seg.GetLoc()) {
1924  if ((*it)->IsEmpty()) {
1925  is_gap = true;
1926  break;
1927  }
1928  }
1929 
1930  if ( !ungapped || !is_gap) {
1931  size_t i = 0;
1932  ITERATE (CStd_seg::TLoc, it, seg.GetLoc()) {
1933  if ( !(*it)->IsEmpty() ) {
1934  if (sizes.empty()) {
1935  sizes.resize(seg.GetDim(), 0);
1936  } else if (sizes.size() != (size_t)seg.GetDim()) {
1938  "invalid std-seg: "
1939  "inconsistent number of locs");
1940  }
1941 
1942  for (CSeq_loc_CI loc_it(**it); loc_it; ++loc_it) {
1943  sizes[i] += loc_it.GetRange().GetLength();
1944  }
1945  }
1946 
1947  ++i;
1948  }
1949  }
1950  }
1951 
1952  /// pass 2: determine shortest length
1953  if ( sizes.empty() ) return 0;
1954  vector<TSeqPos>::iterator iter = sizes.begin();
1955  vector<TSeqPos>::iterator smallest = iter;
1956  for (++iter; iter != sizes.end(); ++iter) {
1957  if (*iter < *smallest) {
1958  smallest = iter;
1959  }
1960  }
1961  return *smallest;
1962  }}
1963  break;
1964 
1967  align.GetSegs().GetSpliced().GetExons()) {
1968  CRangeCollection<TSeqPos> exon_ranges = ranges;
1969  exon_ranges &= (*iter)->GetRowSeq_range(0, true);
1970  len += exon_ranges.GetCoveredLength();
1971  if (ungapped) {
1972  len -= (*iter)->GetRowSeq_insertions(
1973  0, align.GetSegs().GetSpliced(), ranges).GetCoveredLength();
1974  } else {
1975  /// When computing length without gaps, genomic inserts
1976  /// are ignored; when computing with gaps, we need to add them
1977  /// to length if their starting point is within range
1978  len += (*iter)->GetRowSeq_insertions(
1979  1, align.GetSegs().GetSpliced(), ranges).GetCoveredLength();
1980  }
1981  }
1982  break;
1983 
1984  default:
1985  _ASSERT(false);
1986  break;
1987  }
1988 
1989  return len;
1990 }
1991 
1992 
1993 TSeqPos CSeq_align::GetAlignLength(bool include_gaps) const
1994 {
1995  return s_GetAlignmentLength(*this,
1997  !include_gaps );
1998 }
1999 
2000 
2002  bool include_gaps) const
2003 {
2004  return s_GetAlignmentLength(*this,
2006  !include_gaps );
2007 }
2008 
2009 
2011  bool include_gaps) const
2012 {
2013  return s_GetAlignmentLength(*this, ranges, !include_gaps );
2014 }
2015 
2016 
2018  TSeqRange r0 = GetSeqRange(0);
2019  TSeqRange r1 = GetSeqRange(1);
2020  double r = 0;
2021  if (r0.GetLength()) {
2022  r = double(r1.GetLength()) / double(r0.GetLength());
2023  }
2024  return r;
2025 }
2026 
2027 
2029 {
2030  CRef<CSeq_loc> loc(new CSeq_loc);
2031  switch (GetSegs().Which()) {
2033  {
2034  ITERATE(TSegs::TDendiag, it, GetSegs().GetDendiag()) {
2035  loc->SetPacked_int().Set().push_back(
2036  (*it)->CreateRowSeq_interval(row));
2037  }
2038  break;
2039  }
2041  {
2042  loc->SetInt(*GetSegs().GetDenseg().CreateRowSeq_interval(row));
2043  break;
2044  }
2046  {
2047  ITERATE(TSegs::TStd, it, GetSegs().GetStd()) {
2048  // Std-seg may contain empty locations, so
2049  // we have to use mix rather than packed-int.
2050  loc->SetMix().Set().push_back(
2051  (*it)->CreateRowSeq_loc(row));
2052  }
2053  break;
2054  }
2056  {
2057  ITERATE(TSegs::TDisc::Tdata, it, GetSegs().GetDisc().Get()) {
2058  loc->SetMix().Set().push_back((*it)->CreateRowSeq_loc(row));
2059  }
2060  break;
2061  }
2063  {
2064  if (row > 1) {
2065  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
2066  "CSeq_align::CreateRowSeq_loc() - row number must "
2067  "be 0 or 1 for spliced-segs.");
2068  }
2069  const CSpliced_seg& spl = GetSegs().GetSpliced();
2071  // Spliced-seg may be required to get seq-id if
2072  // it's not set in the exon.
2073  loc->SetPacked_int().Set().push_back(
2074  (*ex)->CreateRowSeq_interval(row, spl));
2075  }
2076  break;
2077  }
2080  default:
2081  NCBI_THROW(CSeqalignException, eUnsupported,
2082  "CSeq_align::CreateRowSeq_loc() currently does not handle "
2083  "this type of alignment.");
2084  break;
2085  }
2086  return loc;
2087 }
2088 
2090 {
2092  void AddLength(TSeqPos length)
2093  {
2094  if (range.first > length) {
2095  range.first = length;
2096  }
2097  if (range.second < length) {
2098  range.second = length;
2099  }
2100  }
2102  {
2103  if (range.first > new_range.first) {
2104  range.first = new_range.first;
2105  }
2106  if (range.second < new_range.second) {
2107  range.second = new_range.second;
2108  }
2109  }
2110 
2112 };
2113 
2115 {
2116  SLengthRange length_range;
2117  switch (GetSegs().Which()) {
2119  {{
2120  const CDense_seg& ds = GetSegs().GetDenseg();
2121  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
2122  bool is_gapped = false;
2123  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
2124  if (ds.GetStarts()[i * ds.GetDim() + j] == -1) {
2125  is_gapped = true;
2126  break;
2127  }
2128  }
2129  if (is_gapped) {
2130  length_range.AddLength(ds.GetLens()[i]);
2131  }
2132  }
2133  }}
2134  break;
2135 
2137  {{
2138  int num_rows = CheckNumRows();
2139  vector<TSeqRange> last_seg_ranges;
2141  GetSegs().GetDisc().Get()) {
2142  length_range.AddRange((*iter)->GapLengthRange());
2143  vector<TSeqRange> seg_ranges;
2144  for (int i=0; i < num_rows; i++) {
2145  seg_ranges.push_back((*iter)->GetSeqRange(i));
2146  /// If this is not first segment, include gaps between last
2147  /// segment and this one
2148  if (!last_seg_ranges.empty()) {
2149  TSeqPos gap = s_Distance(seg_ranges[i], last_seg_ranges[i]);
2150  if (gap > 0) {
2151  length_range.AddLength(gap);
2152  }
2153  }
2154  }
2155  last_seg_ranges = seg_ranges;
2156  }
2157  }}
2158  break;
2159 
2161  {{
2163  GetSegs().GetSpliced().GetExons()) {
2164  const CSpliced_exon& exon = **iter;
2165  for (unsigned r = 0; r < 2; ++r) {
2166  CRangeCollection<TSeqPos> insertions =
2167  exon.GetRowSeq_insertions(
2168  r, GetSegs().GetSpliced());
2169  ITERATE (CRangeCollection<TSeqPos>, ins_it, insertions)
2170  {
2171  length_range.AddLength(ins_it->GetLength());
2172  }
2173  }
2174  }
2175  }}
2176 
2177  break;
2178 
2179  default:
2180  NCBI_THROW(CSeqalignException, eUnsupported,
2181  "Can't get gap lengths for this type of alignment.");
2182  }
2183  return length_range.range;
2184 }
2185 
2187 {
2188  if (!GetSegs().IsSpliced()) {
2189  NCBI_THROW(CSeqalignException, eUnsupported,
2190  "Requested exon lengths for a non-spliced alignment.");
2191  }
2192  SLengthRange length_range;
2193  const CSpliced_exon* previous_exon = NULL;
2194  bool minus_strand = GetSeqStrand(1) == eNa_strand_minus;
2196  GetSegs().GetSpliced().GetExons()) {
2197  const CSpliced_exon* exon = &**iter;
2198  if (previous_exon) {
2199  bool descending_pos = exon->GetGenomic_end() <
2200  previous_exon->GetGenomic_start();
2201  if (descending_pos != minus_strand) {
2202  /// Crossed origins; don't count as intron size
2203  continue;
2204  }
2205  TSeqRange intron((minus_strand ? exon : previous_exon)->GetGenomic_end() + 1,
2206  (minus_strand ? previous_exon : exon)->GetGenomic_start() - 1);
2207  length_range.AddLength(intron.GetLength());
2208  }
2209  previous_exon = exon;
2210  }
2211  return length_range.range;
2212 }
2213 
2215 {
2216  if (!GetSegs().IsSpliced()) {
2217  NCBI_THROW(CSeqalignException, eUnsupported,
2218  "Requested exon lengths for a non-spliced alignment.");
2219  }
2220  SLengthRange length_range;
2222  GetSegs().GetSpliced().GetExons()) {
2223  const CSpliced_exon* exon = &**iter;
2224  length_range.AddLength(
2225  exon->GetGenomic_end() - exon->GetGenomic_start() + 1);
2226  }
2227  return length_range.range;
2228 }
2229 
2231 {
2232  // break at this segment
2233  CRef<CSeq_align> align(new CSeq_align);
2234  align->SetDim(2);
2236  const CDense_seg& seg = GetSegs().GetDenseg();
2237  CDense_seg& subseg = align->SetSegs().SetDenseg();
2238 
2239  subseg.SetIds() = seg.GetIds();
2240  subseg.SetDim(2);
2241  subseg.SetNumseg(to - from + 1);
2242  subseg.SetStarts().reserve(subseg.GetNumseg()*2);
2243  subseg.SetLens().reserve(subseg.GetNumseg());
2244  if (seg.IsSetStrands()) {
2245  subseg.SetStrands().reserve(subseg.GetNumseg()*2);
2246  }
2247 
2248  int i;
2249 
2250  for (i = from; i <= to; ++i) {
2251  subseg.SetLens().push_back(seg.GetLens()[i]);
2252  subseg.SetStarts().push_back(seg.GetStarts()[i * 2]);
2253  subseg.SetStarts().push_back(seg.GetStarts()[i * 2 + 1]);
2254  if (seg.IsSetStrands()) {
2255  subseg.SetStrands().push_back(seg.GetStrands()[i * 2]);
2256  subseg.SetStrands().push_back(seg.GetStrands()[i * 2 + 1]);
2257  }
2258  }
2259 
2260  subseg.TrimEndGaps();
2261  try {
2262  align->Validate(true);
2263  }
2264  catch (CException& e) {
2265  ERR_POST(Error << e);
2266  cerr << MSerial_AsnText << seg;
2267  cerr << MSerial_AsnText << *align;
2268  throw;
2269  }
2270  return align;
2271 }
2272 
2273 
2274 CConstRef<CUser_object> CSeq_align::FindExt(const string& ext_type) const
2275 {
2277  if ( IsSetExt() ) {
2278  ITERATE(TExt, it, GetExt()) {
2279  const CObject_id& obj_type = (*it)->GetType();
2280  if ( obj_type.IsStr() && obj_type.GetStr() == ext_type ) {
2281  ret.Reset(it->GetPointer());
2282  break;
2283  }
2284  }
2285  }
2286  return ret;
2287 }
2288 
2290 {
2291  CRef<CUser_object> ret;
2292  if ( IsSetExt() ) {
2293  NON_CONST_ITERATE(TExt, it, SetExt()) {
2294  const CObject_id& obj_type = (*it)->GetType();
2295  if ( obj_type.IsStr() && obj_type.GetStr() == ext_type ) {
2296  ret.Reset(it->GetPointer());
2297  break;
2298  }
2299  }
2300  }
2301  return ret;
2302 }
2303 
2304 END_objects_SCOPE // namespace ncbi::objects::
2305 
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
User-defined methods of the data storage class.
static const bool sc_IsInteger[]
Definition: Seq_align.cpp:456
static TSeqPos s_Distance(const TSeqRange &range1, const TSeqRange &range2)
Definition: Seq_align.cpp:1183
static vector< CSeq_align::SIndel > s_GetIndels(const CSeq_align &align, CSeq_align::TDim row, const CRangeCollection< TSeqPos > &ranges, bool include_frameshifts, bool include_non_frameshifts)
Definition: Seq_align.cpp:1596
static const TScoreNamePair sc_ScoreNames[]
Definition: Seq_align.cpp:404
static TSeqPos s_GetGapCount(const CSeq_align &align, CSeq_align::TDim row, const CRangeCollection< TSeqPos > &ranges, bool get_total_count)
Retrieves the number of gaps in an alignment.
Definition: Seq_align.cpp:1455
CRef< CSeq_align > RemapAlignToLoc(const CSeq_align &align, CSeq_align::TDim row, const CSeq_loc &loc)
Remap seq-align row to the seq-loc.
Definition: Seq_align.cpp:1401
static const char *const sc_ScoreHelpText[]
Definition: Seq_align.cpp:430
static CRef< CSeq_align > s_GetJoinedAlignment(const CSeq_align_set &aligns)
Join the alignments in the set into one alignment.
Definition: Seq_align.cpp:1193
static TSeqPos s_IntersectionLength(const CRangeCollection< TSeqPos > &ranges, const TSeqRange &range)
Get length of intersection between a range and a range collection.
Definition: Seq_align.cpp:1439
static TSeqPos s_DenseSegLength(const CDense_seg &ds, CDense_seg::TNumseg seg, const CRangeCollection< TSeqPos > &ranges)
Get length of dense-seg alignment within range.
Definition: Seq_align.cpp:1849
static TSeqPos s_GetAlignmentLength(const CSeq_align &align, const CRangeCollection< TSeqPos > &ranges, bool ungapped)
calculate the length of our alignment within given range
Definition: Seq_align.cpp:1865
SStaticPair< CSeq_align::EScoreType, const char * > TScoreNamePair
Definition: Seq_align.cpp:403
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Seq-loc and seq-align mapper exceptions.
void TrimEndGaps()
Trim leading/training gaps if possible.
Definition: Dense_seg.cpp:344
const CSeq_id & GetSeq_id(TDim row) const
Definition: Dense_seg.cpp:154
ENa_strand GetSeqStrand(TDim row) const
Definition: Dense_seg.cpp:241
void ResetWidths(void)
Definition: Dense_seg.hpp:225
TSeqPos GetSeqStop(TDim row) const
Definition: Dense_seg.cpp:203
void Reverse(void)
Reverse the segments' orientation.
Definition: Dense_seg.cpp:644
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Dense_seg.cpp:890
TWidths & SetWidths(void)
Definition: Dense_seg.hpp:217
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
Definition: Dense_seg.cpp:686
TSeqPos GetSeqStart(TDim row) const
Definition: Dense_seg.cpp:165
TDim CheckNumRows(void) const
Definition: Dense_seg.cpp:92
vector< int > TWidths
Definition: Dense_seg.hpp:73
CRange< TSeqPos > GetSeqRange(TDim row) const
Definition: Dense_seg.hpp:234
void Validate(bool full_test=false) const
Definition: Dense_seg.cpp:274
void RemapToLoc(TDim row, const CSeq_loc &loc, bool ignore_strand=false)
Definition: Dense_seg.cpp:922
size_type size() const
Definition: range_coll.hpp:98
bool empty() const
Definition: range_coll.hpp:102
const_iterator begin() const
Definition: range_coll.hpp:82
position_type GetCoveredLength(void) const
Returns total length covered by ranges in this collection, i.e.
Definition: range_coll.hpp:157
CRef –.
Definition: ncbiobj.hpp:618
Definition: Score.hpp:57
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
CRef< CSeq_align > CreateTranslatedDensegFromNADenseg(void) const
Create a Dense-seg with widths from Dense-seg of nucleotides Used by AlnMgr to handle translated nucl...
Definition: Seq_align.cpp:953
EScoreType
enum controlling known named scores
Definition: Seq_align.hpp:128
@ eScore_PercentIdentity_GapOpeningOnly
Definition: Seq_align.hpp:165
@ eScore_PercentIdentity_Gapped
Definition: Seq_align.hpp:163
@ eScore_ConsensusSplices
Definition: Seq_align.hpp:183
@ eScore_OverallIdentity
Definition: Seq_align.hpp:181
@ eScore_SumEValue
Definition: Seq_align.hpp:171
@ eScore_AlignLength
Definition: Seq_align.hpp:142
@ eScore_PercentIdentity_Ungapped
Definition: Seq_align.hpp:164
@ eScore_NegativeCount
Definition: Seq_align.hpp:151
@ eScore_ExonIdentity
Definition: Seq_align.hpp:185
@ eScore_PositiveCount
Definition: Seq_align.hpp:148
@ eScore_PercentCoverage
Definition: Seq_align.hpp:168
@ eScore_HighQualityPercentCoverage
Definition: Seq_align.hpp:177
@ eScore_ProductCoverage
Definition: Seq_align.hpp:184
@ eScore_CompAdjMethod
Definition: Seq_align.hpp:174
@ eScore_IdentityCount
Definition: Seq_align.hpp:145
@ eScore_MismatchCount
Definition: Seq_align.hpp:154
CRef< CSeq_align > CreateDensegFromDisc(SSeqIdChooser *SeqIdChooser=0) const
Definition: Seq_align.cpp:1015
CRef< CScore > x_SetNamedScore(const string &name)
Definition: Seq_align.cpp:537
TSeqPos GetNumGapOpeningsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1582
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
Definition: Seq_align.cpp:728
CRef< CSeq_align > x_CreateSubsegAlignment(int from, int to) const
Create a partial alignment containing the specified range of segments.
Definition: Seq_align.cpp:2230
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
Definition: Seq_align.cpp:1550
TLengthRange ExonLengthRange() const
Definition: Seq_align.cpp:2214
vector< SIndel > GetIndelsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1789
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Seq_align.cpp:1332
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
Definition: Seq_align.cpp:1796
vector< SIndel > GetNonFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1765
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
Definition: Seq_align.cpp:685
TSeqPos GetNumGapOpeningsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1570
TLengthRange IntronLengthRange() const
Definition: Seq_align.cpp:2186
static string ScoreName(EScoreType score)
Definition: Seq_align.cpp:503
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
static string HelpText(EScoreType score)
Definition: Seq_align.cpp:508
CConstRef< CScore > x_GetNamedScore(const string &name) const
retrieve a named score object
Definition: Seq_align.cpp:520
vector< SIndel > GetNonFrameshifts(TDim row=-1) const
Retrieves descriptions of all non-frameshift indels on a given row; i.e.
Definition: Seq_align.cpp:1759
TSeqPos GetNumFrameshiftsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1733
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
Definition: Seq_align.cpp:2028
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
void SwapRows(TDim row1, TDim row2)
Swap the position of two rows in the alignment NOTE: currently *only* works for dense-seg & disc.
Definition: Seq_align.cpp:701
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
TSeqPos GetTotalGapCountWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1576
void ResetNamedScore(const string &name)
Definition: Seq_align.cpp:606
void RemapToLoc(TDim row, const CSeq_loc &dst_loc, bool ignore_strand=false)
Definition: Seq_align.cpp:1365
TSeqPos GetAlignLengthWithinRange(const TSeqRange &range, bool include_gaps=true) const
Get the length of this alignment within a specified range By default, this function computes an align...
Definition: Seq_align.cpp:2001
void SplitOnLongDiscontinuity(list< CRef< CSeq_align > > &aligns, TSeqPos discontinuity_threshold) const
Split the alignment at any discontinuity greater than threshold; populate aligns list with new alignm...
Definition: Seq_align.cpp:1217
vector< SIndel > GetFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1747
double AlignLengthRatio() const
Definition: Seq_align.cpp:2017
vector< SIndel > GetIndelsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1783
TSeqPos GetTotalGapCountWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1564
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
vector< SIndel > GetFrameshifts(TDim row=-1) const
Retrieves descriptions of all frameshifts on a given row; i.e.
Definition: Seq_align.cpp:1741
vector< SIndel > GetNonFrameshiftsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1771
TSeqPos GetNumFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1725
vector< SIndel > GetIndels(TDim row=-1) const
Retrieves descriptions of all indels on a given row.
Definition: Seq_align.cpp:1777
TSeqPos GetNumFrameshifts(TDim row=-1) const
Retrieves the number of times a given row shifts frames; i.e.
Definition: Seq_align.cpp:1716
TSeqPos GetAlignLengthWithinRanges(const CRangeCollection< TSeqPos > &ranges, bool include_gaps=true) const
Get the length of this alignment within a specified range By default, this function computes an align...
Definition: Seq_align.cpp:2010
~CSeq_align(void)
destructor
Definition: Seq_align.cpp:68
static const TScoreNameMap & ScoreNameMap()
Definition: Seq_align.cpp:483
vector< SIndel > GetFrameshiftsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
Definition: Seq_align.cpp:1753
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
TLengthRange GapLengthRange() const
Definition: Seq_align.cpp:2114
CConstRef< CUser_object > FindExt(const string &ext_type) const
Find extension by type in ext container.
Definition: Seq_align.cpp:2274
static bool IsIntegerScore(EScoreType score)
Definition: Seq_align.cpp:513
pair< TSeqPos, TSeqPos > TLengthRange
Definition: Seq_align.hpp:192
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
Definition: Seq_align.cpp:1557
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper_Base –.
ENa_strand GetSeqStrand(TDim row) const
Definition: Sparse_seg.cpp:93
CRange< TSeqPos > GetSeqRange(TDim row) const
Definition: Sparse_seg.cpp:201
TDim CheckNumRows(void) const
Definition: Sparse_seg.cpp:63
void Validate(bool full_test=false) const
Validators.
Definition: Sparse_seg.cpp:55
TSeqPos GetSeqStart(TDim row) const
Definition: Sparse_seg.cpp:118
const CSeq_id & GetSeq_id(TDim row) const
Definition: Sparse_seg.cpp:72
TSeqPos GetSeqStop(TDim row) const
Definition: Sparse_seg.cpp:157
CRangeCollection< TSeqPos > GetRowSeq_insertions(CSeq_align::TDim row, const CSpliced_seg &seg) const
Return insertions within exon.
TSeqPos GetSeqStop(TDim row) const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: In case the product-type is protein, these only return the amin part of Prot-pos.
TSeqPos GetSeqStart(TDim row) const
void Validate(bool full_test=false) const
Validators.
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Spliced_seg.cpp:56
CRef< CSeq_align > AsDiscSeg() const
Convert this alignment to a discontinuous segment.
bool empty() const
Definition: map.hpp:149
Definition: map.hpp:338
#define T(s)
Definition: common.h:230
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
bool SerialEquals(const C &object1, const C &object2, ESerialRecursionMode how=eRecursive)
Compare serial objects.
Definition: serialbase.hpp:497
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
Definition: serialbase.hpp:482
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define numeric_limits
Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...
Definition: ncbi_limits.hpp:92
position_type GetLength(void) const
Definition: range.hpp:158
bool NotEmpty(void) const
Definition: range.hpp:152
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType & CombineWith(const TThisType &r)
Definition: range.hpp:345
bool Empty(void) const
Definition: range.hpp:148
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
Definition: ncbimtx.hpp:496
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
Tdata & Set(void)
Assign a value to data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
vector< CRef< CSeq_loc > > TLoc
Definition: Std_seg_.hpp:93
TSegs & SetSegs(void)
Assign a value to Segs data member.
Definition: Seq_align_.hpp:930
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
TDenseg & SetDenseg(void)
Select the variant.
Definition: Seq_align_.cpp:159
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Std_seg_.hpp:357
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
Definition: Dense_seg_.hpp:568
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
Definition: Seq_align_.hpp:989
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TExt & SetExt(void)
Assign a value to Ext data member.
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool CanGetDim(void) const
Check if it is safe to call GetDim method.
Definition: Seq_align_.hpp:843
TDim GetDim(void) const
Get the Dim member data.
Definition: Std_seg_.hpp:295
const TDendiag & GetDendiag(void) const
Get the variant data.
Definition: Seq_align_.hpp:726
TDisc & SetDisc(void)
Select the variant.
Definition: Seq_align_.cpp:203
bool IsSetDim(void) const
dimensionality Check if a value has been assigned to Dim data member.
Definition: Seq_align_.hpp:837
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
const TExt & GetExt(void) const
Get the Ext member data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TSparse & GetSparse(void) const
Get the variant data.
Definition: Seq_align_.cpp:241
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
list< CRef< CDense_diag > > TDendiag
Definition: Seq_align_.hpp:194
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
@ eType_disc
discontinuous alignment
Definition: Seq_align_.hpp:104
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
const TYPE & Get(const CNamedParameterList *param)
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
#define row(bind, expected)
Definition: string_bind.c:73
string AsString(int row_pos) const
Definition: Seq_align.cpp:1588
Create a Dense-seg from a Std-seg Used by AlnMgr to handle nucl2prot alignments.
Definition: Seq_align.hpp:348
virtual void ChooseSeqId(CSeq_id &id1, const CSeq_id &id2)=0
CSeq_align::TLengthRange range
Definition: Seq_align.cpp:2111
void AddRange(CSeq_align::TLengthRange new_range)
Definition: Seq_align.cpp:2101
void AddLength(TSeqPos length)
Definition: Seq_align.cpp:2092
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
first_type first
Definition: static_set.hpp:64
second_type second
Definition: static_set.hpp:65
Strict weak ordering for pairs (by first) Used by CreateDensegFromDisc.
Definition: Seq_align.cpp:1005
Pred m_Pred
Definition: Seq_align.cpp:1010
bool operator()(const T &x, const T &y)
Definition: Seq_align.cpp:1006
Definition: type.c:6
#define _ASSERT
Modified on Tue Apr 23 07:37:27 2024 by modify_doxy.py rev. 669887