NCBI C++ ToolKit
align_compare.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_compare.cpp 97943 2022-09-09 19:16:56Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
49 
52 
56 
57 #include <corelib/rwstream.hpp>
58 
59 #include <util/checksum.hpp>
60 
61 #include <cmath>
62 #include <ctype.h>
63 
66 
67 
68 // Retrieve a list of interval-by-interval accounting within an alignment
69 //
71 
72 static void s_UpdateSpans(const TSeqRange &query_range,
73  const TSeqRange &subject_range,
74  CAlignCompare::SAlignment& align_info,
76 {
77  align_info.spans[row == CAlignCompare::e_Query ? query_range : subject_range] =
78  row == CAlignCompare::e_Subject ? subject_range : query_range;
79 }
80 
81 static void s_GetAlignmentSpans_Interval(const CSeq_align& align,
82  CAlignCompare::SAlignment& align_info,
84 {
85  switch (align.GetSegs().Which()) {
88  align.GetSegs().GetDisc().Get()) {
89  s_GetAlignmentSpans_Interval(**it, align_info, row);
90  }
91  break;
92 
94  {{
96  align.GetSegs().GetStd()) {
97  // our expectation is to find a set of two locs
98  // each loc is expected to be a Seq-interval
99 
100  // we expect std-seg alignments to be single intervals
101  // check this here
102  if ((*seg_it)->GetLoc().size() != 2) {
104  "Pairwise Std-seg alignments in comparison "
105  "should always have two locs");
106  }
107  CConstRef<CSeq_loc> loc1 = (*seg_it)->GetLoc()[0];
108  CConstRef<CSeq_loc> loc2 = (*seg_it)->GetLoc()[1];
109 
110  if (loc1->IsEmpty() || loc2->IsEmpty() ||
111  loc1->IsNull() || loc2->IsNull()) {
112  // gaps - omit
113  continue;
114  }
115 
116  if (!loc1->IsInt() || !loc2->IsInt()) {
118  "Pairwise Std-set alignments in comparison "
119  "should always be intervals");
120  }
122  loc2->GetTotalRange(),
123  align_info, row);
124  }
125  }}
126  break;
127 
128  default:
129  {{
130  CAlnSeqId id0(align.GetSeq_id(0));
131  CAlnSeqId id1(align.GetSeq_id(1));
132 
133  TAlnSeqIdIRef r0(&id0);
134  TAlnSeqIdIRef r1(&id1);
135  CPairwiseAln pw(r0, r1);
136  ConvertSeqAlignToPairwiseAln(pw, align, 0, 1);
137  ITERATE (CPairwiseAln, it, pw) {
138  const CPairwiseAln::TAlignRange& r = *it;
139  s_UpdateSpans(TSeqRange(r.GetFirstFrom(), r.GetFirstTo()),
140  TSeqRange(r.GetSecondFrom(), r.GetSecondTo()),
141  align_info, row);
142  }
143  }}
144  break;
145  }
146 }
147 
148 static void s_GetAlignmentMismatches(const CSeq_align& align,
149  CAlignCompare::SAlignment& align_info,
151 {
152  if (!align.GetSegs().IsSpliced()) {
154  string traceback = lookup.GetTraceback(align, 0);
155  if (traceback.empty()) {
157  "Comparing mismatches for dense-seg alignments requires "
158  "traceback information");
159  }
160  int product_dir = align.GetSeqStrand(0) == eNa_strand_minus
161  ? -1 : 1;
162  TSeqPos product_pos = product_dir == 1 ? align.GetSeqStart(0)
163  : align.GetSeqStop(0);
164  int genomic_dir = align.GetSeqStrand(1) == eNa_strand_minus
165  ? -1 : 1;
166  TSeqPos genomic_pos = product_dir == 1 ? align.GetSeqStart(1)
167  : align.GetSeqStop(1);
168  unsigned match = 0;
169  ITERATE (string, it, traceback) {
170  if (isdigit(*it)) {
171  match = match * 10 + (*it - '0');
172  continue;
173  }
174  product_pos += match * product_dir;
175  genomic_pos += match * genomic_dir;
176  match = 0;
177  bool genomic_ins = *it == '-';
178  bool product_ins = *++it == '-';
179  if (!genomic_ins && !product_ins) {
180  /// mismatch
181  if (row != CAlignCompare::e_Subject) {
182  align_info.query_mismatches += TSeqRange(
183  product_pos, product_pos);
184 
185  }
186  if (row != CAlignCompare::e_Query) {
187  align_info.subject_mismatches += TSeqRange(
188  genomic_pos, genomic_pos);
189  }
190  }
191  if (!genomic_ins) {
192  product_pos += product_dir;
193  }
194  if (!product_ins) {
195  genomic_pos += genomic_dir;
196  }
197  }
198  if (product_pos != (product_dir == 1 ? align.GetSeqStop(0)+1
199  : align.GetSeqStart(0)-1)
200  || genomic_pos != (genomic_dir == 1 ? align.GetSeqStop(1)+1
201  : align.GetSeqStart(1)-1))
202  {
204  "Inconsistent length of traceback string " + traceback);
205  }
206  return;
207  }
208 
209  bool is_product_minus = align.GetSegs().GetSpliced().IsSetProduct_strand() &&
211  bool is_genomic_minus = align.GetSegs().GetSpliced().IsSetGenomic_strand() &&
214  {
215  const CSpliced_exon& exon = **it;
216  if (!exon.IsSetParts()) {
217  continue;
218  }
219  int product_dir = is_product_minus ||
220  (exon.IsSetProduct_strand() &&
222  ? -1 : 1;
223  TSeqPos product_pos = (product_dir == 1 ? exon.GetProduct_start()
224  : exon.GetProduct_end())
225  . AsSeqPos();
226  int genomic_dir = is_genomic_minus ||
227  (exon.IsSetGenomic_strand() &&
229  ? -1 : 1;
230  TSeqPos genomic_pos = genomic_dir == 1 ? exon.GetGenomic_start()
231  : exon.GetGenomic_end();
232  ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
233  switch ((*part_it)->Which()) {
235  if (row != CAlignCompare::e_Subject) {
236  TSeqPos product_mismatch_end = product_pos +
237  product_dir * ((*part_it)->GetMismatch()-1);
238  align_info.query_mismatches += TSeqRange(
239  min(product_pos,product_mismatch_end),
240  max(product_pos,product_mismatch_end));
241 
242  }
243  if (row != CAlignCompare::e_Query) {
244  TSeqPos genomic_mismatch_end = genomic_pos +
245  genomic_dir * ((*part_it)->GetMismatch()-1);
246  align_info.subject_mismatches += TSeqRange(
247  min(genomic_pos,genomic_mismatch_end),
248  max(genomic_pos,genomic_mismatch_end));
249  }
250  product_pos += product_dir * (*part_it)->GetMismatch();
251  genomic_pos += genomic_dir * (*part_it)->GetMismatch();
252  break;
253 
255  product_pos += product_dir * (*part_it)->GetMatch();
256  genomic_pos += genomic_dir * (*part_it)->GetMatch();
257  break;
258 
260  product_pos += product_dir * (*part_it)->GetProduct_ins();
261  break;
262 
264  genomic_pos += genomic_dir * (*part_it)->GetGenomic_ins();
265  break;
266 
267  default:
269  "Unsupported exon part");
270  }
271  }
272  }
273 }
274 
275 // Retrieve a list of exon-by-exon accounting within an alignment
276 //
277 static void s_GetAlignmentSpans_Exon(const CSeq_align& align,
278  CAlignCompare::SAlignment &align_info,
280 {
281  switch (align.GetSegs().Which()) {
283  s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
284  break;
285 
287  /* UNTESTED */
289  align.GetSegs().GetDisc().Get()) {
290  s_UpdateSpans((*it)->GetSeqRange(0), (*it)->GetSeqRange(1), align_info, row);
291  }
292  break;
293 
295  /* UNTESTED */
296  ITERATE (CSeq_align::TSegs::TStd, it, align.GetSegs().GetStd()) {
297  const CStd_seg& seg = **it;
298  s_UpdateSpans(seg.GetLoc()[0]->GetTotalRange(),
299  seg.GetLoc()[1]->GetTotalRange(), align_info, row);
300  }
301  break;
302 
305  align.GetSegs().GetSpliced().GetExons()) {
306  const CSpliced_exon& exon = **it;
307  TSeqRange genomic(exon.GetGenomic_start(), exon.GetGenomic_end());
308  TSeqRange product;
309  product.SetFrom(exon.GetProduct_start().AsSeqPos());
310  product.SetTo(exon.GetProduct_end().AsSeqPos());
311  s_UpdateSpans(product, genomic, align_info, row);
312  }
313  break;
314 
315  default:
317  "unhandled alignment type");
318  }
319 }
320 
321 
322 // Retrieve a list of intron-by-intron accounting within an alignment;
323 // meaningful only for Spliced-seg alignments
324 //
325 static void s_GetAlignmentSpans_Intron(const CSeq_align& align,
326  CAlignCompare::SAlignment &align_info,
328 {
329  if (!align.GetSegs().IsSpliced() ||
330  !align.GetSegs().GetSpliced().CanGetProduct_strand() ||
332  {
334  "intron mode only meaningful for Spliced-seg alignments");
335  }
336 
337  bool is_reverse = align.GetSegs().GetSpliced().GetProduct_strand() !=
339 
340  CRef<CSpliced_exon> last_exon;
342  align.GetSegs().GetSpliced().GetExons()) {
343  CRef<CSpliced_exon> exon = *it;
344  if (last_exon) {
345  CRef<CSpliced_exon> first_exon = is_reverse ? exon : last_exon;
346  CRef<CSpliced_exon> second_exon = is_reverse ? last_exon : exon;
347  TSeqRange genomic(first_exon->GetGenomic_end(),
348  second_exon->GetGenomic_start());
349  TSeqRange product;
350  product.SetFrom(last_exon->GetProduct_end().AsSeqPos());
351  product.SetTo(exon->GetProduct_start().AsSeqPos());
352  s_UpdateSpans(product, genomic, align_info, row);
353  }
354  last_exon = exon;
355  }
356 }
357 
358 
359 // Retrieve a list of total range spans for an alignment
360 //
361 static void s_GetAlignmentSpans_Span(const CSeq_align& align,
362  CAlignCompare::SAlignment &align_info,
364 {
365  s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
366 }
367 
368 template<typename T>
369 void s_PopulateScores(const CSeq_align &align,
370  const vector<string> &score_list,
371  vector<T> &scores,
372  bool required = true)
373 {
375  ITERATE (vector<string>, it, score_list) {
376  T value = 0;
377  try {
378  value = lookup.GetScore(align, *it);
379  } catch(CAlgoAlignUtilException &e) {
380  /// If scores are not required, use value of 0 for scores that were not found
381  if (required || e.GetErrCode() != CAlgoAlignUtilException::eScoreNotFound) {
382  throw;
383  }
384  }
385  scores.push_back(value);
386  }
387 }
388 
389 static void s_PopulateScoreSet(const CSeq_align &align,
390  const set<string> &score_set,
391  bool score_set_as_blacklist,
392  CAlignCompare::TIntegerScoreSet &integer_scores,
393  CAlignCompare::TRealScoreSet &real_scores)
394 {
395  if (score_set_as_blacklist) {
396  if (!align.IsSetScore()) {
397  return;
398  }
399  ITERATE (CSeq_align::TScore, score_it, align.GetScore()) {
400  if ((*score_it)->GetId().IsStr() &&
401  !score_set.count((*score_it)->GetId().GetStr()))
402  {
403  if ((*score_it)->GetValue().IsInt()) {
404  integer_scores[(*score_it)->GetId().GetStr()] =
405  (*score_it)->GetValue().GetInt();
406  } else {
407  real_scores[(*score_it)->GetId().GetStr()] =
408  (*score_it)->GetValue().GetReal();
409  }
410  }
411  }
412  } else {
414  ITERATE (set<string>, score_it, score_set) {
415  double value = lookup.GetScore(align, *score_it);
416  if (lookup.IsIntegerScore(align, *score_it)) {
417  integer_scores[*score_it] = static_cast<int>(value);
418  } else {
419  real_scores[*score_it] = value;
420  }
421  }
422  }
423 }
424 
426  const CAlignCompare::TRealScoreSet &scores2,
427  double real_score_tolerance)
428 {
430  it2 = scores2.begin();
431  it1 != scores1.end() || it2 != scores2.end(); ++it1, ++it2)
432  {
433  if (it1 == scores1.end() || it2 == scores2.end()
434  || it1->first != it2->first)
435  {
436  /// The two don't have the same set of real-value scores
437  return false;
438  }
439  double allowed_diff = max(abs(it1->second), abs(it2->second))
440  * real_score_tolerance;
441  if (abs(it1->second - it2->second) > allowed_diff) {
442  return false;
443  }
444  }
445  return true;
446 }
447 
448 static void s_PopulateExtSet(const CSeq_align &align,
449  const set<string> &ext_set,
450  bool ext_set_as_blacklist,
452 {
453  if (!align.IsSetExt()) {
454  if (!ext_set_as_blacklist && !ext_set.empty()) {
455  NCBI_THROW(CException, eUnknown, "Not all listed exts found");
456  }
457  return;
458  }
459  ITERATE (CSeq_align::TExt, ext_it, align.GetExt()) {
460  if (!(*ext_it)->GetType().IsStr()) {
461  continue;
462  }
463  string ext_type = (*ext_it)->GetType().GetStr();
464  bool is_in_set = ext_set.count(ext_type);
465  if ((ext_set_as_blacklist && !is_in_set)||
466  (!ext_set_as_blacklist && is_in_set))
467  {
468  exts[ext_type] = *ext_it;
469  }
470  }
471  if (!ext_set_as_blacklist && exts.size() < ext_set.size()) {
472  NCBI_THROW(CException, eUnknown, "Not all listed exts found");
473  }
474 }
475 
476 string s_ConvertToHexString(unsigned char * ptr, unsigned int length)
477 {
478  string retnString;
479 
480  for(unsigned int i = 0; i < length; i++) {
481  int top = (ptr[i] & 0xf0) >> 4;
482  int bot = (ptr[i] & 0x0f);
483  retnString += NStr::NumericToString(top, NStr::ENumToStringFlags::fDS_Binary, 16);
484  retnString += NStr::NumericToString(bot, NStr::ENumToStringFlags::fDS_Binary, 16);
485  }
486 
487  return retnString;
488 }
489 static bool s_EquivalentExts(const CAlignCompare::TExtSet &exts1,
490  const CAlignCompare::TExtSet &exts2)
491 {
493  exts2_md5(CChecksum::eMD5);
494  CWStream exts1_wstr(&exts1_md5), exts2_wstr(&exts2_md5);
496  it2 = exts2.begin();
497  it1 != exts1.end() || it2 != exts2.end(); ++it1, ++it2)
498  {
499  if (it1 == exts1.end() || it2 == exts2.end()
500  || it1->first != it2->first)
501  {
502  /// The two don't have the same set of exts
503  return false;
504  }
505  exts1_wstr << MSerial_AsnBinary << *it1->second;
506  exts2_wstr << MSerial_AsnBinary << *it2->second;
507  }
508  exts1_wstr.flush();
509  exts2_wstr.flush();
510  unsigned char exts1_md5_array[16], exts2_md5_array[16];
511  memset(exts1_md5_array, 0, 16);
512  memset(exts2_md5_array, 0, 16);
513  exts1_md5.GetChecksum().GetMD5Digest(exts1_md5_array);
514  exts2_md5.GetChecksum().GetMD5Digest(exts2_md5_array);
515  return s_ConvertToHexString(exts1_md5_array, 16)
516  == s_ConvertToHexString(exts2_md5_array, 16);
517 }
518 
519 //////////////////////////////////////////////////////////////////////////////
520 //
521 // SComparison constructor does the hard work of verifying that two sets of alignment
522 // span ranges actually overlap, and determines by how much
523 //
525 {
526  SComparison();
527 
529  const CAlignCompare::SAlignment& second,
530  double real_score_tolerance);
531 
537  float overlap;
538 };
539 
541 : spans_in_common(0)
542 , spans_overlap(0)
543 , spans_unique_first(0)
544 , spans_unique_second(0)
545 , is_equivalent(false)
546 , overlap(0)
547 {
548 }
549 
551  const CAlignCompare::SAlignment& second,
552  double real_score_tolerance)
553 : spans_in_common(0)
554 , spans_overlap(0)
555 , spans_unique_first(0)
556 , spans_unique_second(0)
557 , is_equivalent(false)
558 , overlap(0)
559 {
560  if (first.CompareGroup(second, false) != 0) {
561  /// Alignments have different disambiguiting score values, can't be compared
562  return;
563  }
564 
565  float dot = 0;
566  float sum_a = 0;
567  float sum_b = 0;
568 
569  CAlignCompare::TAlignmentSpans::const_iterator first_it = first.spans.begin();
571  for ( ; first_it != first.spans.end() && second_it != second.spans.end(); ) {
572  if (*first_it == *second_it) {
573  TSeqPos intersecting_len = first_it->first.GetLength();
574  dot += float(intersecting_len) * float(intersecting_len);
575  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
576  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
577 
578  spans_in_common += intersecting_len;
579  ++first_it;
580  ++second_it;
581  } else {
582  bool overlap =
583  first_it->first.IntersectingWith(second_it->first) &&
584  first_it->second.IntersectingWith(second_it->second);
585  TSeqPos intersecting_len = 0;
586  if (overlap) {
587  TSeqRange r = first_it->first;
588  r.IntersectWith(second_it->first);
589 
590  intersecting_len = r.GetLength();
591  dot += float(intersecting_len) * float(intersecting_len);
592 
593  spans_overlap += intersecting_len;
594  spans_unique_first -= intersecting_len;
595  spans_unique_second -= intersecting_len;
596  }
597  if (*first_it < *second_it) {
598  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
599  spans_unique_first += first_it->first.GetLength();
600  ++first_it;
601  } else {
602  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
603  spans_unique_second += second_it->first.GetLength();
604  ++second_it;
605  }
606  }
607  }
608  is_equivalent = spans_in_common == first.length &&
609  spans_in_common == second.length &&
610  first.query_mismatches == second.query_mismatches &&
611  first.subject_mismatches == second.subject_mismatches &&
612  first.integer_scores == second.integer_scores &&
613  s_EquivalentScores(first.real_scores, second.real_scores,
614  real_score_tolerance) &&
615  s_EquivalentExts(first.exts, second.exts);
616  for ( ; first_it != first.spans.end(); ++first_it) {
617  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
618  spans_unique_first += first_it->first.GetLength();
619  }
620  for ( ; second_it != second.spans.end(); ++second_it) {
621  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
622  spans_unique_second += second_it->first.GetLength();
623  }
624 
625  overlap = dot == 0 ? 0 : dot / ::sqrt(sum_a * sum_b);
626 }
627 
628 
630 {
632  const CAlignCompare::SAlignment *ptr2) const
633  {
634  const CAlignCompare::SAlignment& k1 = *ptr1;
635  const CAlignCompare::SAlignment& k2 = *ptr2;
636 
637  if (k1.query < k2.query) { return true; }
638  if (k2.query < k1.query) { return false; }
639  if (k1.subject < k2.subject) { return true; }
640  if (k2.subject < k1.subject) { return false; }
641 
642  if (k1.scores < k2.scores) { return true; }
643  if (k2.scores < k1.scores) { return false; }
644 
645  if (k1.query_strand < k2.query_strand) { return true; }
646  if (k2.query_strand < k1.query_strand) { return false; }
647  if (k1.subject_strand < k2.subject_strand) { return true; }
648  if (k2.subject_strand < k1.subject_strand) { return false; }
649 
650  if (k1.subject_range < k2.subject_range) { return true; }
651  if (k2.subject_range < k1.subject_range) { return false; }
652  if (k1.query_range < k2.query_range) { return true; }
653  if (k2.query_range < k1.query_range) { return false; }
654 
655  return ptr1 < ptr2;
656  }
657 };
658 
660 typedef pair<CAlignCompare::SAlignment *, CAlignCompare::SAlignment *> TPtrPair;
661 typedef pair<TPtrPair, SComparison> TComp;
662 
664 {
666 
667  SComp_Less(bool strict = false)
668  : strict_compare(strict)
669  {
670  }
671 
672  bool operator()(const TComp& c1, const TComp& c2) const
673  {
674  // strict comparison amounts to placing all identical pairs either before
675  // or after non-identical ones
676  // putting identical pairs first means that we evaluate the best examples first, and
677  // can establish equality without polluting the comparison with weaker
678  // alignments; non-strict means we combine weaker overlapping
679  // alignments together into equivalence groups with alignments that are
680  // identical
681  if (strict_compare) {
682  if (c1.second.is_equivalent && !c2.second.is_equivalent) {
683  return true;
684  }
685  if (c2.second.is_equivalent && !c1.second.is_equivalent) {
686  return false;
687  }
688  }
689  else {
690  if (c1.second.is_equivalent && !c2.second.is_equivalent) {
691  return false;
692  }
693  if (c2.second.is_equivalent && !c1.second.is_equivalent) {
694  return true;
695  }
696  }
697 
698  if (c1.first.first->subject_range < c2.first.first->subject_range)
699  {
700  return false;
701  }
702  if (c2.first.first->subject_range < c1.first.first->subject_range)
703  {
704  return true;
705  }
706  return c1.first.second->query_range < c2.first.second->query_range;
707  }
708 };
709 
711 SAlignment(int s, const CRef<CSeq_align> &al, CAlignCompare &compare,
712  bool is_slice)
713 : source_set(s)
714 , query_strand(eNa_strand_unknown)
715 , subject_strand(eNa_strand_unknown)
716 , length(0)
717 , align(al)
718 , match_level(CAlignCompare::e_NoMatch)
719 , compare_object(compare)
720 {
721  try {
722  if (compare.m_Row != e_Subject) {
726  }
727  if (compare.m_Row != e_Query) {
731  }
732  s_PopulateScores(*align, compare.m_DisambiguitingScores.first, scores.first);
733  s_PopulateScores(*align, compare.m_DisambiguitingScores.second, scores.second, false);
738  switch (compare.m_Mode) {
739  case e_Full:
740  /// If this alignment was created by slicing an input alignment,
741  /// it doesn't have traceback data so mismatches can't be calculated
742  if (!is_slice) {
743  s_GetAlignmentMismatches(*align, *this, compare.m_Row);
744  }
745  // fall through
746 
747  case e_Interval:
748  s_GetAlignmentSpans_Interval(*align, *this, compare.m_Row);
749  break;
750 
751  case e_Exon:
752  s_GetAlignmentSpans_Exon(*align, *this, compare.m_Row);
753  break;
754 
755  case e_Span:
756  s_GetAlignmentSpans_Span(*align, *this, compare.m_Row);
757  break;
758 
759  case e_Intron:
760  s_GetAlignmentSpans_Intron(*align, *this, compare.m_Row);
761  break;
762  }
763  }
764  catch (CException& e) {
765  ERR_POST(Error << "alignment not processed: " << MSerial_AsnText << *align << e);
766  spans.clear();
767  }
768  ITERATE (TAlignmentSpans, it, spans) {
769  length += it->first.GetLength();
770  }
771 }
772 
774 CompareGroup(const SAlignment &o, bool strict_only) const
775 {
776  if (query.AsString() < o.query.AsString()) { return -1; }
777  if (o.query.AsString() < query.AsString()) { return 1; }
778 
779  if (subject.AsString() < o.subject.AsString()) { return -1; }
780  if (o.subject.AsString() < subject.AsString()) { return 1; }
781 
782  if (scores.first < o.scores.first) { return -1; }
783  if (o.scores.first < scores.first) { return 1; }
784 
785  if (strict_only) {
786  return 0;
787  }
788 
789  for (unsigned score_index = 0; score_index < scores.second.size(); ++score_index) {
790  if (scores.second[score_index] && o.scores.second[score_index]) {
791  if (scores.second[score_index] < o.scores.second[score_index]) { return -1; }
792  if (o.scores.second[score_index] < scores.second[score_index]) { return 1; }
793  }
794  }
795 
796  return 0;
797 }
798 
800 {
801  if (m_NextSet1Group.empty()) {
802  if (m_Set1.EndOfData()) {
803  return 2;
804  } else {
805  m_NextSet1Group.push_back(x_NextAlignment(1));
806  }
807  }
808  if (m_NextSet2Group.empty()) {
809  if (m_Set2.EndOfData()) {
810  return 1;
811  } else {
812  m_NextSet2Group.push_back(x_NextAlignment(2));
813  }
814  }
815  int compare_group = m_NextSet1Group.front()
816  ->CompareGroup(*m_NextSet2Group.front(), true);
817  if (compare_group < 0) {
818  return 1;
819  } else if (compare_group > 0) {
820  return 2;
821  } else {
822  return 3;
823  }
824 }
825 
827 x_NextAlignment(int set, bool update_counts)
828 {
829  AutoPtr<SAlignment> align =
830  new SAlignment(set, (set == 1 ? m_Set1 : m_Set2).GetNext(), *this);
831  if (update_counts) {
832  ++(set == 1 ? m_CountSet1 : m_CountSet2);
833  (set == 1 ? m_CountBasesSet1 : m_CountBasesSet2) += align->length;
834  }
835  return align;
836 }
837 
839 {
840  IAlignSource &source = set == 1 ? m_Set1 : m_Set2;
841  list< AutoPtr<SAlignment> > &current_group =
843  list< AutoPtr<SAlignment> > &next_group =
845  current_group.clear();
846  current_group.splice(current_group.end(), next_group);
847  while (!source.EndOfData() && next_group.empty()) {
849  if (current_group.empty() || align->CompareGroup(*current_group.front(), true) == 0)
850  {
851  current_group.push_back(align);
852  } else {
853  next_group.push_back(align);
854  }
855  }
856 }
857 
859 {
860  ITERATE (TAlignmentSpans, it, spans) {
861  if (query) {
862  compare_object.m_BoundariesMap[query].insert(it->second.GetFrom());
863  compare_object.m_BoundariesMap[query].insert(it->second.GetToOpen());
864  }
865  if (subject) {
866  compare_object.m_BoundariesMap[subject].insert(it->first.GetFrom());
867  compare_object.m_BoundariesMap[subject].insert(it->first.GetToOpen());
868  }
869  }
870 }
871 
872 list< AutoPtr<CAlignCompare::SAlignment> > CAlignCompare::SAlignment::
873 BreakOnBoundaries(int row) const
874 {
875  list< AutoPtr<SAlignment> > align_parts;
876  const set<TSeqPos> &boundaries =
877  compare_object.m_BoundariesMap[row == 0 ? query : subject];
878  TSeqRange range = row == 0 ? query_range : subject_range;
879  TSeqPos last_boundary = range.GetFrom();
880  for (set<TSeqPos>::const_iterator it = boundaries.upper_bound(range.GetFrom());
881  it != boundaries.end() && *it <= range.GetToOpen(); ++it)
882  {
883  /// Extract slice, as long as it's not the the entire alignment
884  if (last_boundary > range.GetFrom() || *it < range.GetToOpen()) {
885  AutoPtr<SAlignment> part = Slice(row, last_boundary, *it-1);
886  if (part.get()) {
887  align_parts.push_back(part);
888  }
889  }
890  last_boundary = *it;
891  }
892  if (!align_parts.empty() && last_boundary < range.GetToOpen()) {
893  AutoPtr<SAlignment> part = Slice(row, last_boundary, range.GetTo());
894  if (part.get()) {
895  align_parts.push_back(part);
896  }
897  }
898  return align_parts;
899 }
900 
902 Slice(int row, TSeqPos from, TSeqPos to) const
903 {
904  if (align->GetSegs().IsDisc()) {
905  vector< AutoPtr<SAlignment> > seg_slices;
906  TSeqRange slice_range(min(from, to), max(from, to));
907  ITERATE (CSeq_align_set::Tdata, seg_it, align->GetSegs().GetDisc().Get())
908  {
909  TSeqRange seg_slice_range =
910  slice_range & (*seg_it)->GetSeqRange(row);
911  if (seg_slice_range.Empty()) {
912  continue;
913  }
914  AutoPtr<SAlignment> slice =
915  SAlignment(source_set, *seg_it, compare_object) . Slice(
916  row, seg_slice_range.GetFrom(), seg_slice_range.GetTo());
917  if (slice.get()) {
918  seg_slices.push_back(slice);
919  }
920  }
921  AutoPtr<SAlignment> complete_slice;
922  if (seg_slices.size() == 1) {
923  complete_slice = seg_slices.front();
924  } else if (seg_slices.size() > 1) {
925  CRef<CSeq_align> complete_align(new CSeq_align);
926  complete_slice.reset(new SAlignment(source_set, complete_align,
927  compare_object, true));
928  ITERATE (vector< AutoPtr<SAlignment> >, seg_it, seg_slices) {
929  complete_align->SetSegs().SetDisc().Set().push_back(
930  (*seg_it)->align);
931  complete_slice->query_mismatches += (*seg_it)->query_mismatches;
932  complete_slice->subject_mismatches += (*seg_it)->query_mismatches;
933  }
934  }
935  return complete_slice;
936  }
937 
938  if (!align->GetSegs().IsDenseg()) {
940  "Alignment splitting supported only for Dense-seq and "
941  "Disc-seg alignments");
942  }
943  AutoPtr<SAlignment> slice;
944  CRef<CSeq_align> slice_align(new CSeq_align);
945  slice_align->SetType(align->GetType());
946  if (align->IsSetDim()) {
947  slice_align->SetDim(align->GetDim());
948  }
949  CRef<CDense_seg> slice_seg = align->GetSegs().GetDenseg()
950  .ExtractSlice(row, from, to);
951  bool all_gaps = true;
952  for (int seg = 0; seg < slice_seg->GetNumseg() && all_gaps; ++seg) {
953  if (slice_seg->GetStarts()[seg*2] >= 0 &&
954  slice_seg->GetStarts()[seg*2+1] >= 0)
955  {
956  all_gaps = false;
957  }
958  }
959  if (all_gaps) {
960  /// Slice is completely within gaps, so no alignment
961  return slice;
962  }
963  slice_align->SetSegs().SetDenseg(*slice_seg);
964  ITERATE (CSeq_align::TScore, score_it, align->GetScore()) {
965  if ((*score_it)->GetId().IsStr() &&
966  compare_object.m_DistributiveScores.count(
967  (*score_it)->GetId().GetStr()))
968  {
969  slice_align->SetScore().push_back(*score_it);
970  }
971  }
972  slice.reset(new SAlignment(source_set, slice_align, compare_object, true));
973  /// Special case for full mode; extract the mismatches positions from the
974  /// ones calculated for the full alignment
975  if (compare_object.m_Mode == e_Full) {
976  slice->query_mismatches = query_mismatches;
977  slice->query_mismatches &= slice_align->GetSeqRange(0);
978  slice->subject_mismatches = subject_mismatches;
979  slice->subject_mismatches &= slice_align->GetSeqRange(1);
980  }
981  return slice;
982 }
983 
985 {
986  list< AutoPtr<SAlignment> > orig_set;
987  list< AutoPtr<SAlignment> > &transformed_set =
988  group == 1 ? m_CurrentSet1Group : m_CurrentSet2Group;
989  CSeq_id_Handle id = row == 0 ? transformed_set.front()->query
990  : transformed_set.front()->subject;
991  if (!id) {
992  return;
993  }
994  orig_set.swap(transformed_set);
995  ITERATE (list< AutoPtr<SAlignment> >, it, orig_set) {
996  list< AutoPtr<CAlignCompare::SAlignment> > parts =
997  (*it)->BreakOnBoundaries(row);
998  if (parts.empty()) {
999  transformed_set.push_back(*it);
1000  } else {
1001  transformed_set.splice(transformed_set.end(), parts);
1002  }
1003  }
1004 }
1005 
1007 {
1008  while (!m_Set1.EndOfData()) {
1009  x_NextAlignment(1, false)->PopulateBoundariesMap();
1010  }
1011  m_Set1.Reset();
1012  while (!m_Set2.EndOfData()) {
1013  x_NextAlignment(2, false)->PopulateBoundariesMap();
1014  }
1015  m_Set2.Reset();
1016 }
1017 
1018 vector<const CAlignCompare::SAlignment *> CAlignCompare::NextGroup()
1019 {
1020  int next_group_set = x_DetermineNextGroupSet();
1021  if (next_group_set & 1) {
1022  x_GetCurrentGroup(1);
1023  }
1024  if (next_group_set & 2) {
1025  x_GetCurrentGroup(2);
1026  }
1027 
1028  vector<const SAlignment *> group;
1029  switch (next_group_set) {
1030  case 1:
1031  if (!m_IgnoreNotPresent) {
1035  m_CountBasesOnlySet1 += (*it)->length;
1036  group.push_back(&**it);
1037  }
1038  }
1039  break;
1040 
1041  case 2:
1042  if (!m_IgnoreNotPresent) {
1046  m_CountBasesOnlySet2 += (*it)->length;
1047  group.push_back(&**it);
1048  }
1049  }
1050  break;
1051 
1052  default:
1053  {{
1054  if (!m_BoundariesMap.empty()) {
1055  x_SplitOnOverlaps(1, 0);
1056  x_SplitOnOverlaps(1, 1);
1057  x_SplitOnOverlaps(2, 0);
1058  x_SplitOnOverlaps(2, 1);
1061  }
1062  TAlignPtrSet set1_aligns;
1064  {
1065  set1_aligns.insert(&**it);
1066  }
1067 
1068  TAlignPtrSet set2_aligns;
1070  {
1071  set2_aligns.insert(&**it);
1072  }
1073 
1074  set<SAlignment const*> red_color; // alignments from set2 that have equivalent mate
1075  // from opposite set.
1076  vector<TComp> comparisons;
1077 
1078  ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
1079  SAlignment const* lhs = *set1_it;
1080 
1081  ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
1082  SAlignment const* rhs = *set2_it;
1083  // Check for equivalent alignment.
1084  // In strict mode we do not combine overlapping and equiv. alignments.
1085  if ( m_Strict && red_color.count(rhs) > 0 ) {
1086  continue;
1087  }
1088 
1089  if ( false == s_IsOverlapping(lhs, rhs, m_Row) ) {
1090  continue;
1091  }
1092  // Check for overlap.
1093  comparisons.push_back(TComp(TPtrPair(const_cast<SAlignment*>(lhs), const_cast<SAlignment*>(rhs)),
1094  SComparison(*lhs, *rhs,
1096 
1097  // Post-processing:
1098  // - if two alignments are equivalent
1099  // -- color both alignments in red,
1100  // -- break out of the loop.
1101  TComp const& record = comparisons.back();
1102  SComparison const& comp = record.second;
1103  if ( comp.is_equivalent ) {
1104  red_color.insert(rhs); // set2's alignment.
1105  // In strict mode we do not combine overlapping and equiv. alignments.
1106  if ( m_Strict ) {
1107  break;
1108  }
1109  }
1110  }
1111  }
1112 
1113  std::sort(comparisons.begin(), comparisons.end(), SComp_Less(m_Strict));
1114 
1115  typedef pair<TAlignPtrSet, EMatchLevel> TAlignGroup;
1116 
1117  list<TAlignGroup> groups;
1119 
1120  ITERATE (vector<TComp>, it, comparisons) {
1121  bool is_equivalent = it->second.is_equivalent;
1122  /// This comparison counts if the two alignments are equivalent, or
1123  /// they overlap and we haven't yet seen an equivalence for either
1124  if (is_equivalent ||
1125  (it->second.overlap > 0 &&
1126  it->first.first->match_level != e_Equiv &&
1127  it->first.second->match_level != e_Equiv))
1128  {
1129  list<TAlignGroup>::iterator align1_group = groups.end(),
1130  align2_group = groups.end();
1131  if (set1_aligns.erase(it->first.first)) {
1132  it->first.first->match_level =
1133  is_equivalent ? e_Equiv : e_Overlap;
1134  group.push_back(it->first.first);
1135  if (is_equivalent) {
1136  ++m_CountEquivSet1;
1137  m_CountBasesEquivSet1 += it->second.spans_in_common;
1138  } else {
1140  m_CountBasesOverlapSet1 += it->second.spans_in_common
1141  + it->second.spans_overlap;
1142  m_CountBasesOnlySet1 += it->second.spans_unique_first;
1143  }
1144  } else {
1145  align1_group = group_map[it->first.first];
1146  }
1147  if (set2_aligns.erase(it->first.second)) {
1148  it->first.second->match_level =
1149  is_equivalent ? e_Equiv : e_Overlap;
1150  group.push_back(it->first.second);
1151  if (is_equivalent) {
1152  ++m_CountEquivSet2;
1153  m_CountBasesEquivSet2 += it->second.spans_in_common;
1154  } else {
1156  m_CountBasesOverlapSet2 += it->second.spans_in_common
1157  + it->second.spans_overlap;
1158  m_CountBasesOnlySet2 += it->second.spans_unique_second;
1159  }
1160  } else {
1161  align2_group = group_map[it->first.second];
1162  }
1163  if (align1_group == groups.end() &&
1164  align2_group == groups.end())
1165  {
1166  /// Neither alignemnts was encountered before, so create
1167  /// new group
1168  list<TAlignGroup>::iterator new_group =
1169  groups.insert(groups.end(), TAlignGroup());
1170  new_group->first.insert(it->first.first);
1171  new_group->first.insert(it->first.second);
1172  new_group->second = it->first.first->match_level;
1173  group_map[it->first.first] = new_group;
1174  group_map[it->first.second] = new_group;
1175  ++(is_equivalent ? m_CountEquivGroups
1177  } else if(align1_group == groups.end()) {
1178  /// alignment 1 is new, add it to existing group
1179  align2_group->first.insert(it->first.first);
1180  group_map[it->first.first] = align2_group;
1181  } else if(align2_group == groups.end()) {
1182  /// alignment 2 is new, add it to existing group
1183  align1_group->first.insert(it->first.second);
1184  group_map[it->first.second] = align1_group;
1185  } else if (align1_group != align2_group) {
1186  /// The alignments are in two separate groups; merge them
1187  ITERATE (TAlignPtrSet, group2_it, align2_group->first) {
1188  align1_group->first.insert(*group2_it);
1189  group_map[*group2_it] = align1_group;
1190  }
1191  if (align2_group->second == e_Overlap) {
1193  if (align1_group->second == e_Equiv) {
1194  /// Change the group from equivalence to overlap
1195  align1_group->second = e_Overlap;
1198  }
1199  } else {
1201  }
1202  groups.erase(align2_group);
1203  }
1204  }
1205  }
1206 
1207  ITERATE (list<TAlignGroup>, group_it, groups) {
1208  if (group_it->second == e_NoMatch) {
1209  continue;
1210  }
1211  if (group_it->second == e_Overlap && !m_QualityScores.empty())
1212  {
1213  /// Find which side is better
1214  vector<SAlignment *> best(3, static_cast<SAlignment *>(NULL));
1215  ITERATE (TAlignPtrSet, align_it, group_it->first) {
1216  SAlignment *&side_best = best[(*align_it)->source_set];
1217  if (!side_best || (*align_it)->quality_scores >
1218  side_best->quality_scores)
1219  {
1220  side_best = *align_it;
1221  }
1222  }
1223  if (best[1]->quality_scores != best[2]->quality_scores) {
1224  int better_side =
1225  best[1]->quality_scores > best[2]->quality_scores
1226  ? 1 : 2;
1227  ITERATE (TAlignPtrSet, align_it, group_it->first) {
1228  (*align_it)->match_level =
1229  (*align_it)->source_set == better_side
1231  }
1232  }
1233  }
1234  ITERATE (TAlignPtrSet, align1_it, group_it->first) {
1235  if ((*align1_it)->source_set != 1) {
1236  continue;
1237  }
1238  ITERATE (TAlignPtrSet, align2_it, group_it->first) {
1239  if ((*align2_it)->source_set != 2) {
1240  continue;
1241  }
1242  (*align1_it)->matched_alignments.push_back(*align2_it);
1243  (*align2_it)->matched_alignments.push_back(*align1_it);
1244  }
1245  }
1246  }
1247 
1248  /// Add remaining alignments, for which no match was found, in order
1249  /// of their appearance in alignment comparisons
1250  m_CountOnlySet1 += set1_aligns.size();
1251  m_CountOnlySet2 += set2_aligns.size();
1252  ITERATE (vector<TComp>, comp_it, comparisons) {
1253  if (set1_aligns.empty() && set2_aligns.empty()) {
1254  /// Found best comparison for all alignments
1255  break;
1256  }
1257  if (comp_it->second.overlap == 0) {
1258  if (set1_aligns.erase(comp_it->first.first)) {
1259  group.push_back(comp_it->first.first);
1260  m_CountBasesOnlySet1 += comp_it->first.first->length;
1261  }
1262  if (set2_aligns.erase(comp_it->first.second)) {
1263  group.push_back(comp_it->first.second);
1264  m_CountBasesOnlySet2 += comp_it->first.second->length;
1265  }
1266  }
1267  }
1268  ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
1269  group.push_back(*set1_it);
1270  m_CountBasesOnlySet1 += (*set1_it)->length;
1271  }
1272  ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
1273  group.push_back(*set2_it);
1274  m_CountBasesOnlySet2 += (*set2_it)->length;
1275  }
1276  }}
1277  }
1278 
1279  return group;
1280 }
1281 
1283 {
1284  bool overlap;
1285 
1286  switch ( row ) {
1288  overlap = lhs->query_range.IntersectingWith(rhs->query_range);
1289  break;
1290 
1292  overlap = lhs->subject_range.IntersectingWith(rhs->subject_range);
1293  break;
1294 
1295  default: //CAlignCompare::e_Both:
1296  if ( false == lhs->subject_range.IntersectingWith(rhs->subject_range) &&
1297  false == lhs->query_range.IntersectingWith(rhs->query_range) )
1298  {
1299  overlap = false;
1300  }
1301  else {
1302  overlap = true;
1303  }
1304  break;
1305  }
1306  return overlap;
1307 }
1308 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
USING_SCOPE(objects)
bool s_IsOverlapping(CAlignCompare::SAlignment const *lhs, CAlignCompare::SAlignment const *rhs, CAlignCompare::ERowComparison row)
static bool s_EquivalentScores(const CAlignCompare::TRealScoreSet &scores1, const CAlignCompare::TRealScoreSet &scores2, double real_score_tolerance)
static void s_GetAlignmentMismatches(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
static void s_GetAlignmentSpans_Exon(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
static void s_PopulateScoreSet(const CSeq_align &align, const set< string > &score_set, bool score_set_as_blacklist, CAlignCompare::TIntegerScoreSet &integer_scores, CAlignCompare::TRealScoreSet &real_scores)
pair< TPtrPair, SComparison > TComp
static void s_PopulateExtSet(const CSeq_align &align, const set< string > &ext_set, bool ext_set_as_blacklist, CAlignCompare::TExtSet &exts)
pair< CAlignCompare::SAlignment *, CAlignCompare::SAlignment * > TPtrPair
static bool s_EquivalentExts(const CAlignCompare::TExtSet &exts1, const CAlignCompare::TExtSet &exts2)
void s_PopulateScores(const CSeq_align &align, const vector< string > &score_list, vector< T > &scores, bool required=true)
static void s_UpdateSpans(const TSeqRange &query_range, const TSeqRange &subject_range, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
static void s_GetAlignmentSpans_Interval(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
string s_ConvertToHexString(unsigned char *ptr, unsigned int length)
set< CAlignCompare::SAlignment *, SAlignment_PtrLess > TAlignPtrSet
static void s_GetAlignmentSpans_Intron(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
static void s_GetAlignmentSpans_Span(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
void ConvertSeqAlignToPairwiseAln(CPairwiseAln &pairwise_aln, const objects::CSeq_align &sa, objects::CSeq_align::TDim row_1, objects::CSeq_align::TDim row_2, CAlnUserOptions::EDirection direction=CAlnUserOptions::eBothDirections, const TAlnSeqIdVec *ids=0)
Build pairwise alignment from the selected rows of a seq-align.
Checksum and hash calculation classes.
AutoPtr –.
Definition: ncbimisc.hpp:401
size_t m_CountEquivSet2
ERowComparison m_Row
vector< const SAlignment * > NextGroup()
size_t m_CountOverlapGroups
int x_DetermineNextGroupSet()
Determine whether the next group of alignments should be taken from set 1 or 2.
list< AutoPtr< SAlignment > > m_NextSet2Group
list< AutoPtr< SAlignment > > m_NextSet1Group
size_t m_CountBasesOverlapSet2
list< AutoPtr< SAlignment > > m_CurrentSet2Group
IAlignSource & m_Set1
size_t m_CountBasesOnlySet2
size_t m_CountBasesEquivSet1
map< CSeq_id_Handle, set< TSeqPos > > m_BoundariesMap
size_t m_CountBasesEquivSet2
void x_SplitOnOverlaps(int group, int row)
size_t m_CountOverlapSet1
void PopulateBoundariesMap()
double m_RealScoreTolerance
set< string > m_ExtSet
IAlignSource & m_Set2
bool m_ScoreSetAsBlacklist
size_t m_CountBasesSet1
AutoPtr< SAlignment > x_NextAlignment(int set, bool update_counts=true)
Get next alignment from the correct set.
TDisambiguatingScoreList m_DisambiguitingScores
size_t m_CountBasesOverlapSet1
void x_GetCurrentGroup(int set)
size_t m_CountEquivSet1
size_t m_CountBasesSet2
list< AutoPtr< SAlignment > > m_CurrentSet1Group
set< string > m_ScoreSet
size_t m_CountBasesOnlySet1
size_t m_CountSplitSet2
vector< string > m_QualityScores
size_t m_CountOnlySet2
size_t m_CountEquivGroups
friend struct SAlignment
size_t m_CountOverlapSet2
size_t m_CountSplitSet1
size_t m_CountOnlySet1
CAlignRange Represents an element of pairwise alignment of two sequences.
Definition: align_range.hpp:63
Default IAlnSeqId implementation based on CSeq_id_Handle.
Definition: aln_seqid.hpp:116
CChecksumStreamWriter –.
Definition: checksum.hpp:454
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:747
A pairwise aln is a collection of ranges for a pair of rows.
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
CRef –.
Definition: ncbiobj.hpp:618
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
Writer-based output stream.
Definition: rwstream.hpp:171
virtual void Reset()
virtual bool EndOfData() const =0
size_type size() const
Definition: map.hpp:148
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator upper_bound(const key_type &key) const
Definition: set.hpp:139
size_type size() const
Definition: set.hpp:132
bool empty() const
Definition: set.hpp:133
void erase(iterator pos)
Definition: set.hpp:151
const_iterator end() const
Definition: set.hpp:136
#define T(s)
Definition: common.h:230
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
void GetMD5Digest(unsigned char digest[16]) const
Return calculated MD5 digest.
Definition: checksum.hpp:637
const CChecksum & GetChecksum(void) const
Return checksum.
Definition: checksum.hpp:660
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
@ eUnknown
Definition: app_popup.hpp:72
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
bool Empty(void) const
Definition: range.hpp:148
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Std_seg_.hpp:357
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
Definition: Seq_align_.hpp:989
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
int i
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
#define abs(a)
Definition: ncbi_heapmgr.c:130
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
Reader-writer based streams.
#define row(bind, expected)
Definition: string_bind.c:73
CRef< CSeq_align > align
TDisambiguatingScoreValues scores
vector< double > quality_scores
int CompareGroup(const SAlignment &o, bool strict_only) const
void PopulateBoundariesMap() const
list< AutoPtr< SAlignment > > BreakOnBoundaries(int row) const
SAlignment(int s, const CRef< CSeq_align > &al, CAlignCompare &compare, bool is_slice=false)
TIntegerScoreSet integer_scores
CRangeCollection< TSeqPos > query_mismatches
AutoPtr< SAlignment > Slice(int row, TSeqPos from, TSeqPos to) const
CRangeCollection< TSeqPos > subject_mismatches
bool operator()(const CAlignCompare::SAlignment *ptr1, const CAlignCompare::SAlignment *ptr2) const
bool operator()(const TComp &c1, const TComp &c2) const
SComp_Less(bool strict=false)
size_t spans_in_common
Int8 spans_unique_second
Int8 spans_unique_first
size_t spans_overlap
static string subject
static string query
Modified on Wed Apr 17 13:09:40 2024 by modify_doxy.py rev. 669887