NCBI C++ ToolKit
compare_feats.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: compare_feats.cpp 91306 2020-10-08 11:57:15Z gouriano $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alex Astashyn
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
35 
39 
40 
43 #include <queue>
44 
45 
48 
49 /// Comparison functor for pqueue storing related comparisons
51 {
52 public:
53  bool operator()(const CRef<CCompareFeats>& c1, const CRef<CCompareFeats>& c2) const
54  {
55 #if 0
56  string s1q = "";
57  feature::GetLabel(*c1->GetFeatQ(), &s1q, feature::eContent);
58  string s1t = "";
59  feature::GetLabel(*c1->GetFeatT(), &s1t, feature::eContent);
60  string s2q = "";
61  feature::GetLabel(*c2->GetFeatQ(), &s2q, feature::eContent);
62  string s2t = "";
63  feature::GetLabel(*c2->GetFeatT(), &s2t, feature::eContent);
64 
65 #endif
66 
67 
68  //any match is better than no match
69  if(!c1->IsMatch() && c2->IsMatch()) return true;
70  if(c1->IsMatch() && !c2->IsMatch()) return false;
71  if(!c1->IsMatch() && !c2->IsMatch()) return c1->GetFeatQ().IsNull();
72 
73  //Same type is better
74  bool c1_sameType = c1->IsSameType();
75  bool c2_sameType = c2->IsSameType();
76  if(c1_sameType && !c2_sameType) return false;
77  if(!c1_sameType && c2_sameType) return true;
78 
79 
80  //same product is better
81  try {
82  bool c1_same_product = !c1->GetFeatQ().IsNull() && c1->GetFeatQ()->CanGetProduct()
83  && !c1->GetFeatT().IsNull() && c1->GetFeatT()->CanGetProduct()
85  sequence::GetId(c1->GetFeatQ()->GetProduct(), NULL),
86  sequence::GetId(c1->GetFeatT()->GetProduct(), NULL),
87  NULL);
88  bool c2_same_product = !c2->GetFeatQ().IsNull() && c2->GetFeatQ()->CanGetProduct()
89  && !c2->GetFeatT().IsNull() && c2->GetFeatT()->CanGetProduct()
91  sequence::GetId(c2->GetFeatQ()->GetProduct(), NULL),
92  sequence::GetId(c2->GetFeatT()->GetProduct(), NULL),
93  NULL);
94 
95  if(c1_same_product && !c2_same_product) return false;
96  if(!c1_same_product && c2_same_product) return true;
97  } catch(CException&) {
98  ;
99  }
100 
101 
102  //the similarity score is a composite of the score based on shared sites, and the symmetrical overlap.
103  //(both are 0..1). The constant below is used to give more weight to shared sites score
104  const float k = 0.8f;
105 
106  float score1(0.0f);
107  float score2(0.0f);
108  c1->GetComparison()->GetSplicingSimilarity(score1);
109  c2->GetComparison()->GetSplicingSimilarity(score2);
110 
111  score1 = k * score1 + (1.0 - k) * c1->GetComparison()->GetSymmetricalOverlap();
112  score2 = k * score2 + (1.0 - k) * c2->GetComparison()->GetSymmetricalOverlap();
113 
114  if(score1 < score2) return true;
115  if(score2 < score1) return false;
116 
117  //same subtype is better than different subtypes (I bet we NEVER get to this point)
118  bool c1_sameSubtype = c1->IsSameSubtype();
119  bool c2_sameSubtype = c2->IsSameSubtype();
120  if(c1_sameSubtype && !c2_sameSubtype) return false;
121  if(!c1_sameSubtype && c2_sameSubtype) return true;
122 
123  return false;
124  }
125 private:
126 
127 };
128 
129 
130 
131 
132 
133 
135 {
136  if(!cf.m_feat1.IsNull()) {
137  out << CCompareFeats::s_GetFeatLabel(*cf.m_feat1) << "\t";
138  out << CCompareFeats::s_GetLocLabel(cf.m_feat1->GetLocation(), true) << "\t";
140  } else {
141  out << "\t\t\t";
142  }
143 
144  if(!cf.m_feat2.IsNull()) {
145  out << CCompareFeats::s_GetFeatLabel(*cf.m_feat2) << "\t";
146  out << CCompareFeats::s_GetLocLabel(cf.m_feat2->GetLocation(), true) << "\t";
147  } else {
148  out << "\t\t";
149  }
150 
151  if(!cf.m_unmatched) {
152  out.setf(ios::fixed);
153  out.setf(ios::showpoint);
154  out.precision(2);
155 
156  string sResult;
157  cf.m_compare->GetResult(&sResult);
158  out << cf.m_compare->GetEvidenceString() << "\t";
159  cf.m_compare->GetResult(&sResult);
160  out << sResult << "\t";
161 
162  out << cf.GetMappedIdentity() << "\t";
163  out << cf.m_compare->GetRelativeOverlap() << "\t";
165  } else {
166  out << "\t\t\t\t";
167  }
168  return out;
169 }
170 
171 
172 
174 {
175  CNcbiOstrstream strm;
176  string s_pos1;
177  string s_pos2;
181  } else {
182  if(m_first.m_exon_ordinal1 == 0) {
183  s_pos1 = (m_first.m_result == fCmp_Unknown ? "?" : "~");
184  } else {
186  }
187 
188  if(m_first.m_exon_ordinal2 == 0) {
189  s_pos2 = (m_first.m_result == fCmp_Unknown ? "?" : "~");
190  } else {
192  }
193  }
194 
195  strm << s_pos1 << ":";
196 
198  strm << "strand-mismatch(" << s_pos2 << ")";
199  } else {
200  bool overlap_5p = (m_first.m_result & fCmp_Overlap) && (m_first.m_position_comparison < 0);
201  bool overlap_3p = (m_first.m_result & fCmp_Overlap) && (m_first.m_position_comparison > 0);
202 
203  strm << ((m_first.m_result & (fCmp_5pExtension | fCmp_Superset) || overlap_5p) ? ">" :
204  ((m_first.m_result & (fCmp_5pTruncation | fCmp_Subset) || overlap_3p) ? "<" : ""));
205  strm << s_pos2;
206  strm << ((m_first.m_result & (fCmp_3pTruncation | fCmp_Subset) || overlap_5p) ? ">" :
207  ((m_first.m_result & (fCmp_3pExtension | fCmp_Superset) || overlap_3p) ? "<" : ""));
208  }
209 
210 
211  return CNcbiOstrstreamToString(strm);
212 }
213 
215 {
216  CNcbiOstrstream strm;
217  strm << "[";
218  string sep = "";
219 
221 
222  int ii(0);
223  ITERATE(vector<SIntervalComparisonResult>, it, m_IntComparisons) {
224  SIntervalComparisonResult comp = *it;
225  if(!grp.Add(comp)) {
226  if(grp.IsValid()) {
227  strm << sep << grp.ToString();
228  sep = " ";
229  }
230  grp.Reset(comp);
231  }
232  ii++;
233  }
234 
235  strm << sep << grp.ToString() << "](" << ii << ")";
236  return CNcbiOstrstreamToString(strm);
237 }
238 
239 
240 
241 /// Compare two loc primitives
243 {
244  bool is5p_match = loc1.GetStart(eExtreme_Biological) == loc2.GetStart(eExtreme_Biological);
245  bool is3p_match = loc1.GetStop(eExtreme_Biological) == loc2.GetStop(eExtreme_Biological);
246 
248  m_scope_t, sequence::fCompareOverlapping); //we are comparing mapped query loc vs
249  //the target, hence the target scope
250 
251  switch(cmp) {
252  case sequence::eSame: return fCmp_Match;
254  case sequence::eOverlap: return fCmp_Overlap;
255  case sequence::eContains: return is5p_match ? fCmp_3pExtension : (is3p_match ? fCmp_5pExtension : fCmp_Superset);
256  case sequence::eContained: return is5p_match ? fCmp_3pTruncation : (is3p_match ? fCmp_5pTruncation : fCmp_Subset);
257  default: return fCmp_Unknown;
258  }
259 }
260 
261 
262 
263 /// Cross-compare the intervals on both locs
265 {
266  m_IntComparisons.clear(); //our job here is to refill this
267 
268 
269  // loc1 and loc2 will be the locations that we will compare.
270  // If all intervals on all locations are on the same sequence we sort the intervals.
271  // (because we need to know about 3'|5' terminality) Hence, the exon comparisons will be
272  // in sorted order regardless of their positional order in the loc-mix.
273  //
274  // If some intervals refer to other bioseqs we assume that our locs have them in order.
275  // The fact is stored in this->m_sameBioseq
276  CRef<CSeq_loc> loc1;
277  CRef<CSeq_loc> loc2;
278 
279  try {
280  const CSeq_id& seq_id1 = sequence::GetId(*m_loc1, m_scope_t);
281  const CSeq_id& seq_id2 = sequence::GetId(*m_loc2, m_scope_t);
282  this->m_sameBioseq = sequence::IsSameBioseq(seq_id1, seq_id2, m_scope_t);
283  } catch(...) { //GetId may throw if intervals refer to multiple bioseqs
284  this->m_sameBioseq = false;
285  }
286 
287  if(false && this->m_sameBioseq) { //disabled sorting
288  //this "merge" is actually just a sort
291  } else {
292  loc1.Reset(new CSeq_loc);
293  loc2.Reset(new CSeq_loc);
294  loc1->Assign(*m_loc1);
295  loc2->Assign(*m_loc2);
296  }
297 
298 
299  //If we have a packed int, then when iterating, CSeq_loc_CI::GetSeq_loc() will
300  //fetch the whole packed int instead of an individual interval which we need.
301  //Hence,
302  loc1->ChangeToMix();
303  loc2->ChangeToMix();
304 
305 
307 
308 
309  //To avoid issues if some interval points to another sequence or they are not sorted (as a result of
310  //mapping or otherwise), we compare every interval against every other interval
311  //leading to O(exon_count1*exon_count2) complexity.
312  //TODO: optimize to O(exon_count1+exon_count2) complexity after self-mapping seq-loc filter is implemented.
313 
314  set<unsigned> loc2_reported_set; //keep track of matched exons on loc2 so we can report the unmatched
315  unsigned it1_exon_ordinal = 1;
316 
317 
318 
319  int adjust_for_strand = IsReverse(sequence::GetStrand(*loc1, m_scope_t)) ? -1 : 1;
320 
321  //have to use eEmpty_Allow to conserve the numbering of exons in the presence of non-mapping intervals
323  it1;
324  ++it1, ++it1_exon_ordinal)
325  {
326  CConstRef<CSeq_loc> ci_loc1 = it1.GetRangeAsSeq_loc();
327  unsigned it2_exon_ordinal = 1;
328  bool loc1_found_Overlap = false;
329 
330  int it1_cmp_it2 = 0; //positive iff loc1 is 5' further and does not overlap loc1
331 
332 
334  it2;
335  ++it2, ++it2_exon_ordinal)
336  {
337  CConstRef<CSeq_loc> ci_loc2 = it2.GetRangeAsSeq_loc();
338  FCompareLocs cmp_res = x_CompareInts(*ci_loc1, *ci_loc2);
339 
340  try {
341  it1_cmp_it2 = adjust_for_strand *
342  (ci_loc1->GetStart(eExtreme_Biological) >
343  ci_loc2->GetStop(eExtreme_Biological) ? 1 : -1);
344  } catch (...) {
345  ; //reuse the last value
346  }
347 
348 
349  // if no overlap and the segment on the other loc hasn't been reported and we already passed it
350  if((cmp_res == fCmp_Unknown || cmp_res == fCmp_NoOverlap)
351  && loc2_reported_set.find(it2_exon_ordinal) == loc2_reported_set.end()
352  && it1_cmp_it2 > 0)
353  {
354  loc2_reported_set.insert(it2_exon_ordinal);
355 
357  0 //no corresponding exon on loc1
358  , it2_exon_ordinal //for this exon on loc2
359  , cmp_res, it1_cmp_it2);
360 
361  m_IntComparisons.push_back(sRes);
362 
363  } else if (cmp_res != fCmp_Unknown && cmp_res != fCmp_NoOverlap) {
364  //also check the matching of directions; if problem - report it as strand mismatch
365  if(!m_sameStrand) {
366  cmp_res = fCmp_StrandDifferent;
367  }
368 
369  SIntervalComparisonResult sRes(it1_exon_ordinal, it2_exon_ordinal, cmp_res, it1_cmp_it2);
370  m_IntComparisons.push_back(sRes);
371 
372  loc2_reported_set.insert(it2_exon_ordinal);
373  loc1_found_Overlap = true;
374  }
375  }
376 
377  if(!loc1_found_Overlap) {
378  SIntervalComparisonResult sRes(it1_exon_ordinal, 0, fCmp_NoOverlap);
379  m_IntComparisons.push_back(sRes);
380  }
381  }
382 
383  //add the missing / unknown intervals from the loc2
384  unsigned it2_exon_ordinal = 1;
385  for(CSeq_loc_CI it2(*loc2); it2; ++it2, ++it2_exon_ordinal) {
386  if(loc2_reported_set.find(it2_exon_ordinal) == loc2_reported_set.end()) {
387  SIntervalComparisonResult sRes(0, it2_exon_ordinal, fCmp_NoOverlap, 0);
388  m_IntComparisons.push_back(sRes);
389  }
390  }
391 
392 
393 
394 
395  //compute the stats
396  for(vector<SIntervalComparisonResult>::iterator it = m_IntComparisons.begin();
397  it != m_IntComparisons.end();
398  ++it)
399  {
400 
401  if(m_counts.missing_5p == 0 && it->missing_first()) {
402  m_counts.extra_5p++;
403  } else if(m_counts.extra_5p == 0 && it->missing_second()) {
405  } else {
406  break;
407  }
408  }
409 
410  for(vector<SIntervalComparisonResult>::reverse_iterator it = m_IntComparisons.rbegin();
411  it != m_IntComparisons.rend();
412  ++it)
413  {
414  if(m_counts.missing_3p == 0 && it->missing_first()) {
415  m_counts.extra_3p++;
416  } else if(m_counts.extra_3p == 0 && it->missing_second()) {
418  } else {
419  break;
420  }
421  }
422 
423  for(vector<SIntervalComparisonResult>::iterator it = m_IntComparisons.begin(); it != m_IntComparisons.end(); ++it) {
424  m_counts.loc1_int = std::max(m_counts.loc1_int, it->m_exon_ordinal1);
425  m_counts.loc2_int = std::max(m_counts.loc2_int, it->m_exon_ordinal2);
426 
427  if(it->missing_first()) {
428  (it->m_result & fCmp_Unknown) ? m_counts.unknown++ : m_counts.missing++;
429  } else if(it->missing_second()) {
430  (it->m_result & fCmp_Unknown) ? m_counts.unknown++ : m_counts.extra++;
431  } else {
432  (it->m_result & fCmp_Match) ? m_counts.matched++ : m_counts.partially_matched++;
433  }
434  }
435 }
436 
437 /// Recompute m_len_seqloc_overlap, m_len_seqloc1, and m_len_seqloc2
439 {
442 
443  //fix: if have strand mismatches - the overlaps are forced to zero
444  if(!m_sameStrand) {
445  merged_loc1->SetEmpty();
446  merged_loc2->SetEmpty();
447  }
448 
449 
450  CRef<CSeq_loc> subtr_loc1 = sequence::Seq_loc_Subtract(*merged_loc1, *merged_loc2, CSeq_loc::fMerge_All, m_scope_t);
451 
452  TSeqPos subtr_len;
453  try {
454  subtr_len = sequence::GetLength(*subtr_loc1, m_scope_t);
455  } catch (...) {
456  subtr_len = 0;
457  }
458 
459  try {
461  } catch(...) {
462  m_len_seqloc1 = 0;
463  }
464 
465  try {
467  } catch(...) {
468  m_len_seqloc2 = 0;
469  }
470 
471  m_len_seqloc_overlap = m_len_seqloc1 - subtr_len;
472 
473 
474 
475  //Compute shared sites score
476  m_shared_sites_score = 0.0f;
479 
480  try {
481  merged_loc1->ChangeToMix();
482  merged_loc2->ChangeToMix();
483 
484  TSeqPos terminal_start = min(sequence::GetStart(*merged_loc1, NULL), sequence::GetStart(*merged_loc2, NULL));
485  TSeqPos terminal_stop = max(sequence::GetStop(*merged_loc1, NULL), sequence::GetStop(*merged_loc2, NULL));
486 
487  //if splice site matches exactly, it gets the score of 1.
488  //if it is not exact, it linearly drops down to zero at thr.
489  const float terminal_jitter_thr = 20.0f;
490  const float splice_jitter_thr = 5.0f;
491 
492  for(CSeq_loc_CI ci1(*m_loc1); ci1; ++ci1) {
493  CConstRef<CSeq_loc> ci_loc1 = ci1.GetRangeAsSeq_loc();
494  TSeqPos seg1_start = sequence::GetStart(*ci_loc1, NULL);
495  TSeqPos seg1_stop = sequence::GetStop(*ci_loc1, NULL);
496  float best_match_start = 0.0f;
497  float best_match_stop = 0.0f;
499  for(CSeq_loc_CI ci2(*merged_loc2); ci2; ++ci2) {
500  CConstRef<CSeq_loc> ci_loc2 = ci2.GetRangeAsSeq_loc();
501  if(m_loc1_interval_count == 1) {
502  m_loc2_interval_count++; //compute only once in this loop
503  }
504  ENa_strand strand1 = sequence::GetStrand(*ci_loc1, NULL);
505  ENa_strand strand2 = sequence::GetStrand(*ci_loc2, NULL);
506  bool same_strand = strand1 == strand2
507  || strand1 == eNa_strand_both
508  || strand2 == eNa_strand_both
509  || (strand1 == eNa_strand_unknown && strand2 != eNa_strand_minus)
510  || (strand2 == eNa_strand_unknown && strand1 != eNa_strand_minus);
511  if(!same_strand) {
512  continue;
513  }
514 
515  TSeqPos seg2_start = sequence::GetStart(*ci_loc2, NULL);
516  TSeqPos seg2_stop = sequence::GetStop(*ci_loc2, NULL);
517 
518  float thr = (seg1_start == terminal_start
519  || seg1_stop == terminal_stop
520  || seg2_start == terminal_start
521  || seg2_stop == terminal_stop ) ? terminal_jitter_thr : splice_jitter_thr;
522  float match_start = max(0.0f, 1.0f - abs((long)seg1_start - (long)seg2_start) / thr);
523  best_match_start = max(match_start, best_match_start);
524  float match_stop = max(0.0f, 1.0f - abs((long)seg1_stop - (long)seg2_stop) / thr);
525  best_match_stop = max(match_stop, best_match_stop);
526  }
527  m_shared_sites_score += best_match_start;
528  m_shared_sites_score += best_match_stop;
529  }
530 
532  } catch (CException&) {
533  ;
534  }
535 
536  m_cachedOverlapValues = true;
537 }
538 
540 {
541  TCompareLocsFlags result_flags(0);
542 
543  CNcbiOstrstream strm;
544 
545  //
546  //Fuzz: to be implemented. set the fCmp_Fuzz flag if necessary; do not return;
547  //
548 
549  if(!m_sameStrand) {
550  if(str_result) *str_result = "strand mismatch; ";
551  return result_flags | fCmp_StrandDifferent;
553  if(str_result) *str_result = "complete match; ";
554  return result_flags | fCmp_Match;
555  }
556 
557  if(m_counts.matched > 0 ) {
558  strm << m_counts.matched << " exact; ";
559  }
560 
561  if(m_counts.partially_matched > 0) {
562  strm << m_counts.partially_matched << " partial; ";
563  }
564 
565  //if we have no mismatches internally, report mismatches on the ends if we have them
566  //otherwise just report the total mismatches
567  if(m_counts.missing_internal() == 0 && m_counts.extra_internal() == 0) {
568  if(m_counts.extra_5p > 0) strm << m_counts.extra_5p << " novel @5'; ";
569  if(m_counts.extra_3p > 0) strm << m_counts.extra_3p << " novel @3'; ";
570  if(m_counts.missing_5p > 0) strm << m_counts.missing_5p << " missing @5'; ";
571  if(m_counts.missing_3p > 0) strm << m_counts.missing_3p << " missing @3'; ";
572  } else {
573  if(m_counts.missing != 0) strm << m_counts.missing << " missing; ";
574  if(m_counts.extra != 0) strm << m_counts.extra << " novel; ";
575  }
576 
577  //set the corresponding flags
578  if(m_counts.extra_5p > 0) result_flags |= fCmp_intsExtra_5p;
579  if(m_counts.missing_5p > 0) result_flags |= fCmp_intsMissing_5p;
580  if(m_counts.extra_3p > 0) result_flags |= fCmp_intsExtra_3p;
581  if(m_counts.missing_3p > 0) result_flags |= fCmp_intsMissing_3p;
582  if(m_counts.missing_internal() > 0) result_flags |= fCmp_intsMissing_internal;
583  if(m_counts.extra_internal() > 0) result_flags |= fCmp_intsExtra_internal;
584 
585 
586  //report extensions / truncatinos of the terminal exons
587  SIntervalComparisonResult terminal5p_comparison = m_IntComparisons.front();
588  SIntervalComparisonResult terminal3p_comparison = m_IntComparisons.back();
589 
590  if(terminal5p_comparison.m_result == fCmp_5pExtension) {
591  result_flags |= fCmp_5pExtension;
592  strm << "5'extended; ";
593  } else if(terminal5p_comparison.m_result == fCmp_5pTruncation) {
594  result_flags |= fCmp_5pTruncation;
595  strm << "5'truncated; ";
596  }
597 
598  if(terminal3p_comparison.m_result == fCmp_3pExtension) {
599  result_flags |= fCmp_3pExtension;
600  strm << "3'extended; ";
601  } else if(terminal3p_comparison.m_result == fCmp_3pTruncation) {
602  result_flags |= fCmp_3pTruncation;
603  strm << "3'truncated; ";
604  }
605 
606  if(!result_flags) {
609 
610  switch(cmp) {
611  case sequence::eSame:
612  result_flags |= fCmp_Match;
613  strm << "complete match; ";
614  break;
615 
616  case sequence::eNoOverlap:
618  result_flags |= fCmp_RegionOverlap;
619  strm << "region overlap; ";
620  } else {
621  result_flags |= fCmp_NoOverlap;
622  strm << "no overlap; ";
623  }
624  break;
625 
626  case sequence::eOverlap:
627  result_flags |= fCmp_Overlap;
628  strm << "overlap; ";
629  break;
630 
631  case sequence::eContains:
632  result_flags |= fCmp_Superset;
633  strm << "superset; ";
634  break;
635 
637  result_flags |= fCmp_Subset;
638  strm << "subset; ";
639  break;
640 
641  default:
642  break;
643  }
644  }
645 
646  if(!result_flags) {
647  result_flags |= fCmp_Unknown;
648  strm << "unknown; ";
649  }
650 
651  if(str_result) {
652  *str_result = CNcbiOstrstreamToString(strm);
653  }
654 
655  return result_flags;
656 }
657 
658 
659 
660 //return feature's gene_id/locus_id or prelocuslink gene number
662 {
663  //normally locus_id is in feat's dbxref
664  if(feat.IsSetDbxref()) {
665  ITERATE (CSeq_feat::TDbxref, dbxref, feat.GetDbxref()) {
666  if ((*dbxref)->GetDb() == "GeneID" || (*dbxref)->GetDb() == "LocusID") {
667  return (*dbxref)->GetTag().GetId();
668  }
669  }
670  }
671 
672 
673  //but sometimes it is in Db
674  if(feat.CanGetData() )
675  {
676  if(feat.GetData().IsGene()) {
677  ITERATE (CSeq_feat::TDbxref, dbxref, feat.GetData().GetGene().GetDb()) {
678  if ((*dbxref)->GetDb() == "GeneID" || (*dbxref)->GetDb() == "LocusID") {
679  return (*dbxref)->GetTag().GetId();
680  }
681  }
682 
683  //for prelocuslink gene annots the gene number is here:
684  try {
685  return NStr::StringToInt(feat.GetData().GetGene().GetLocus());
686  } catch (...) {};
687  } else if(feat.GetData().IsRna()) {
688  /*for prelocuslink merged ASN, rnas look like this:
689  *
690  Seq-feat ::= {
691  data rna {
692  type mRNA,
693  ext name "67485"
694  },
695  product whole local str "MmUn_53691_37.35221.67485.m",
696  ...
697 
698  * The gene_id is the number is 35221
699  */
700 
701 
702 
703  try {
704  std::vector<string> tokens;
705  string label = "";
707  NStr::Split(label, ".", tokens);
708 
709  if(tokens.size() == 4 && (tokens[3] == "m" || tokens[3] == "p")) {
710  int num1 = NStr::StringToInt(tokens[1]);
711  (void) NStr::StringToInt(tokens[2]); //make sure this one is a number too
712  return num1;
713  }
714  } catch (...) {}
715  }
716  }
717 
718  return 0;
719 }
720 
721 
722 
723 /// Return the next group of comparisons on the region (return true iff found any)
724 /// A group is a set of features on the query region where each feature overlaps at least one other feature in the group.
725 /// (normally related gene, introns, exons, mRNAs and CDSes).
726 /// Comparison for each feature contains best match from the other sequence and their relationship.
727 /// (may be more than one best)
728 /// If cannot find matching (overlapping) feature of the same type on the target, choose the one(s) of the semantically
729 /// closest type.
730 /// TODO: need to report features on target unaccounted for in the reported comparisons
732 {
733  vComparisons.clear();
734  CRef<CSeq_loc> group_loc_q(new CSeq_loc);
735  CRef<CSeq_loc> group_loc_t(new CSeq_loc);
736  group_loc_q->SetNull();
737  group_loc_t->SetNull();
738 
739  _TRACE("Starting next comparison group");
740 
741  for(; m_loc_q_ci; ++m_loc_q_ci) {
742  CConstRef<CSeq_feat> feat1(&m_loc_q_ci->GetMappedFeature()); //original feat could be on segments, so self-mapper will happily remap to nothing
743 
744  //get raw matches for this feat
745  vector<CRef<CCompareFeats> > feat_matches;
746  x_GetPutativeMatches(feat_matches, feat1);
747 
748  //compute the combined location of the raw matches
749  CSeq_loc aggregate_match_loc_t;
750  aggregate_match_loc_t.SetNull();
751  ITERATE(vector<CRef<CCompareFeats> >, it, feat_matches) {
752  if(!(*it)->GetFeatT().IsNull() && !(*it)->GetSelfLocT().IsNull()) {
753  aggregate_match_loc_t.SetMix();
754  aggregate_match_loc_t.Add(*(*it)->GetSelfLocT());
755  }
756  }
757 
758 
759  if(!group_loc_q->IsNull()
760  && sequence::Compare(*group_loc_q, feat1->GetLocation(),
762  {
763  //the feature on query does not overlap anything in the current overlap group:
764  //We might think that we've reached the next group; however, we want to keep
765  //multiple non-overlapping features on Q matching the same larger feat on T in the same
766  //group to avoid the ambiguity in selecting best matches. Hence we also require
767  //the same on the target side:
768 
769  if(group_loc_t->IsNull() || sequence::Compare(*group_loc_t, aggregate_match_loc_t,
771  break;
772  }
773  }
774 
775  vComparisons.insert(vComparisons.end(), feat_matches.begin(), feat_matches.end());
776 
777  //update group locs
778  CConstRef<CSeq_loc> feat1_self_range_loc = x_GetSelfLoc(feat1->GetLocation(), m_scope_q, true);
779  group_loc_q = sequence::Seq_loc_Add(*group_loc_q, *feat1_self_range_loc, CSeq_loc::fMerge_SingleRange, m_scope_q);
780  group_loc_t = sequence::Seq_loc_Add(*group_loc_t, aggregate_match_loc_t, CSeq_loc::fMerge_SingleRange, m_scope_t);
781 
782 
783 #if _DEBUG
784  string label = "";
785  feature::GetLabel(*feat1, &label, feature::fFGL_Both, m_scope_q);
786  _TRACE(" " + label);
787 
788  label = "";
789  group_loc_q->GetLabel(&label);
790  _TRACE(" group_loc_q: " + label);
791 
792  label = "";
793  group_loc_t->GetLabel(&label);
794  _TRACE(" group_loc_t: " + label);
795 #endif
796 
797  }
798 
799  if(m_comp_options & CCompareSeqRegions::fSelectBest) {SelectMatches(vComparisons);}
800 
801  if(!vComparisons.empty()) {
802  return true;
803  }
804 
805  if(m_already_processed_unmatched_targets) {
806  return false;
807  }
808 
809  //process unmatched targets
810  double dummy(0.0f);
811  CConstRef<CSeq_loc> tgt_loc = m_mapper->Map(*this->m_loc_q, &dummy);
812 
813  _TRACE("Processing unmatched targets");
814  SAnnotSelector sel = m_selector_t; //because original is const
815  sel.SetOverlapIntervals();
816  for(CFeat_CI it2(*m_scope_t, *tgt_loc, sel); it2; ++it2) {
817  CConstRef<CSeq_feat> feat(&it2->GetMappedFeature());
818  string loc_label;
819  feat->GetLocation().GetLabel(&loc_label);
820  if(m_seen_targets.find(loc_label) != m_seen_targets.end()) {
821  continue;
822  }
823 
824  CConstRef<CSeq_loc> feat_self_loc = x_GetSelfLoc(feat->GetLocation(), m_scope_t, false);
825 
826  //compute the remapped ratio
827  CRef<CSeq_loc> subtr_loc = sequence::Seq_loc_Subtract(*feat_self_loc, *tgt_loc, CSeq_loc::fMerge_All, m_scope_t);
828 
829  TSeqPos len_subtr = subtr_loc->Which() == CSeq_loc::e_not_set
830  || subtr_loc->IsNull()
831  || subtr_loc->IsEmpty() ? 0 : sequence::GetLength(*subtr_loc, m_scope_t);
832 
833  TSeqPos feat_len = feat_self_loc->Which() == CSeq_loc::e_not_set
834  || feat_self_loc->IsNull()
835  || feat_self_loc->IsEmpty() ? 0 : sequence::GetLength(*feat_self_loc, m_scope_t);
836 
837  double mapped = feat_len == 0 ? 0.0f : 1.0 - (len_subtr / feat_len);
838 
839 
841  *feat
842  , *feat_self_loc
843  , mapped
844  , m_scope_t
845  ));
846  vComparisons.push_back(cf);
847 
848  }
849  m_already_processed_unmatched_targets = true;
850 
851  _TRACE("Finished processing this group");
852  return !vComparisons.empty();
853 }
854 
855 
857  vector<CRef<CCompareFeats> >& vComparisons,
858  CConstRef<CSeq_feat> feat1)
859 {
860  double mapped_identity(0);
861  CConstRef<CSeq_loc> feat1_mapped_loc = m_mapper->Map(feat1->GetLocation(), &mapped_identity);
862  CConstRef<CSeq_loc> feat1_mapped_range_loc = sequence::Seq_loc_Merge(*feat1_mapped_loc, CSeq_loc::fMerge_SingleRange, m_scope_t);
863  CConstRef<CSeq_loc> feat1_self_loc = x_GetSelfLoc(feat1->GetLocation(), m_scope_q, false);
864  CConstRef<CSeq_loc> feat1_self_range_loc = x_GetSelfLoc(feat1->GetLocation(), m_scope_q, true);
865 
866  _ASSERT(!feat1_mapped_loc.IsNull());
867  _ASSERT(!feat1_mapped_range_loc.IsNull());
868  _ASSERT(!feat1_self_loc.IsNull());
869  _ASSERT(!feat1_self_range_loc.IsNull());
870 
871 
872 
873 
874  int feat1_gene_id = s_GetGeneId(*feat1);
875 
876 #if 0
877  if(feat1_gene_id == 0) {
878  ERR_POST(Info << "Unable to determine gene_id for");
879  NcbiCerr << MSerial_AsnText << *feat1;
880  }
881 #endif
882 
883  bool had_some_matches = false;
884  for(CFeat_CI it2(*m_scope_t, *feat1_mapped_loc, m_selector_t); it2; ++it2) {
885  CConstRef<CSeq_feat> feat_t(&it2->GetMappedFeature());
886 
887  if((m_comp_options & CCompareSeqRegions::fDifferentGenesOnly)
888  && feat1_gene_id == s_GetGeneId(*feat_t))
889  {
890  continue;
891  }
892 
893  string loc_label = "";
894  feat_t->GetLocation().GetLabel(&loc_label);
895  this->m_seen_targets.insert(loc_label);
896 
897 
898  if(m_comp_options & CCompareSeqRegions::fSameTypeOnly
899  && feat_t->GetData().Which() != feat1->GetData().Which())
900  {
901  continue;
902  }
903 
904 
905  bool usingRangeComparison = feat_t->GetData().GetSubtype() == CSeqFeatData::eSubtype_gene
907 
908  CConstRef<CSeq_loc> feat_t_self_loc = x_GetSelfLoc(feat_t->GetLocation(), m_scope_t, usingRangeComparison);
910  *feat1
911  , usingRangeComparison ? *feat1_mapped_range_loc : *feat1_mapped_loc
912  , mapped_identity
913  , usingRangeComparison ? *feat1_self_range_loc : *feat1_self_loc
914  , m_scope_q
915  , *feat_t
916  , *feat_t_self_loc
917  , m_scope_t));
918 
919 
920  vComparisons.push_back(cf);
921  had_some_matches = true;
922 
923  }
924 
925  if(!had_some_matches) {
926  string s = "";
928 
930  *feat1
931  , *feat1_mapped_loc
932  , mapped_identity
933  , *feat1_self_loc
934  , m_scope_q));
935 
936  _ASSERT(!cf->GetSelfLocQ().IsNull());
937  _ASSERT(!cf->GetMappedLocQ().IsNull());
938  vComparisons.push_back(cf);
939  }
940 
941 }
942 
944  const CSeq_loc& loc,
945  CScope* scope,
946  bool merge_single_range)
947 {
948  CRef<CSeq_loc> new_loc;
949 
950  if(!sequence::IsOneBioseq(loc, scope)) {
951  CSeq_loc_Mapper& mapper = (scope == m_scope_q ? *m_self_mapper_q : *m_self_mapper_t);
952  new_loc = mapper.Map(loc);
953  }
954 
955  if(merge_single_range){
956  new_loc = sequence::Seq_loc_Merge(
957  (new_loc.IsNull() ? loc : *new_loc),
959  scope);
960  }
961 
962  return new_loc.IsNull() ? CConstRef<CSeq_loc>(&loc) : new_loc;
963 }
964 
965 
966 
968 {
969 public:
971  {
972  if(f1 == f2) {
973  return false;
974  }
975 
976  if(f1.IsNull() && !f2.IsNull()) {
977  return true;
978  }
979 
980  if(!f1.IsNull() && f2.IsNull()) {
981  return false;
982  }
983 
984  //compare by feat-ids, if have them
985  CConstRef<CObject_id> obj_id1(f1->CanGetId() && f1->GetId().IsLocal() ? &f1->GetId().GetLocal() : NULL);
986  CConstRef<CObject_id> obj_id2(f2->CanGetId() && f2->GetId().IsLocal() ? &f2->GetId().GetLocal() : NULL);
987  if(obj_id1.IsNull() && !obj_id2.IsNull()) {
988  return true;
989  } else if(!obj_id1.IsNull() && obj_id2.IsNull()) {
990  return false;
991  } else if(!obj_id1.IsNull() && !obj_id2.IsNull()) {
992  return *obj_id1 < *obj_id2;
993  }
994 
995 
996  //compare by locations
997  try {
998  int res = f1->Compare(*f2);
999 
1000  if(res != 0) {
1001  return res < 0;
1002  }
1003  } catch (...) {
1004  //Compare fails on multi-seq-id features
1005  }
1006 
1007 
1008  //Compare does not always go all the way (or throws), so here we manually try to distinguish by products and labels
1009  //Potential problem: may not be transitive?
1010 
1011  string s1 = "";
1012  if(f1->CanGetProduct()) {
1013  f1->GetProduct().GetLabel(&s1);
1014  }
1015 
1016  string s2 = "";
1017  if(f2->CanGetProduct()) {
1018  f2->GetProduct().GetLabel(&s2);
1019  }
1020 
1021  if(s1 != s2) {
1022  return s1 < s2;
1023  }
1024 
1025  s1 = "";
1027 
1028  s2 = "";
1030 
1031  return s1 < s2;
1032  }
1033 };
1034 
1036 {
1037  typedef priority_queue<CRef<CCompareFeats>,
1038  vector<CRef<CCompareFeats> >,
1040  > TMatchesQueue;
1041 
1042  //Note: used to use default key comparator that assumed that the same feats
1043  //have same addresses. However, that is not always the case, so
1044  //we have to use custom content-based comparator, SFeats_OpLess
1045  typedef map<CConstRef<CSeq_feat>, TMatchesQueue, SFeats_OpLess > TMatchesMap;
1046 
1047  TMatchesMap q_map;
1048  TMatchesMap t_map;
1049 
1050  int i = 0;
1051  ITERATE(vector<CRef<CCompareFeats> >, it, vComparisons) {
1052  CRef<CCompareFeats> cf = *it;
1053 
1054 
1055  if(!cf->GetFeatQ().IsNull()) {
1056  q_map[cf->GetFeatQ()].push(cf);
1057  }
1058 
1059  if(!cf->GetFeatT().IsNull()) {
1060  t_map[cf->GetFeatT()].push(cf);
1061  }
1062 
1063  ++i;
1064  }
1065 
1066 #if 0
1067  i = 0;
1068  ERR_POST(Info << "q->t");
1069  ITERATE(TMatchesMap, it, q_map) {
1070  const CConstRef<CSeq_feat> feat = it->first;
1071  const CConstRef<CCompareFeats> best_match = it->second.top();
1072 
1073  string s0 = "";
1075 
1076  string s1 = "";
1077  if(!best_match.IsNull() && best_match->IsMatch()) {
1078  feature::GetLabel(*best_match->GetFeatT(), &s1, feature::fFGL_Both);
1079  }
1080  ERR_POST(Info << "Best match for " << s0 << " : " << s1 << ", out of " << it->second.size());
1081  ++i;
1082  }
1083 
1084  i = 0;
1085  ERR_POST(Info << "t->q");
1086  ITERATE(TMatchesMap, it, t_map) {
1087  const CConstRef<CSeq_feat> feat = it->first;
1088  const CConstRef<CCompareFeats> best_match = it->second.top();
1089 
1090  string s0 = "";
1092 
1093  string s1 = "";
1094  if(!best_match.IsNull() && best_match->IsMatch()) {
1095  feature::GetLabel(*best_match->GetFeatQ(), &s1, feature::fFGL_Both);
1096  }
1097  ERR_POST(Info << "Best match for " << s0 << " : " << s1 << ", out of " << it->second.size());
1098 
1099  }
1100  ++i;
1101  ERR_POST(Info << "");
1102 #endif
1103 
1104  set<CRef<CCompareFeats> > compset;
1105 
1106  ITERATE(vector<CRef<CCompareFeats> >, it, vComparisons) {
1107  CRef<CCompareFeats> cf = *it;
1108  if(q_map[cf->GetFeatQ()].top() == cf && t_map[cf->GetFeatT()].top() == cf) {
1109  cf->SetIrrelevance(0);
1110  compset.insert(cf);
1111  }
1112  }
1113 
1114  ITERATE(vector<CRef<CCompareFeats> >, it, vComparisons) {
1115  CRef<CCompareFeats> cf = *it;
1116  if(compset.find(cf) == compset.end() &&
1117  (q_map[cf->GetFeatQ()].top() == cf || cf->GetFeatQ().IsNull()))
1118  {
1119  cf->SetIrrelevance(1);
1120  compset.insert(cf);
1121  }
1122  }
1123 
1124  ITERATE(vector<CRef<CCompareFeats> >, it, vComparisons) {
1125  CRef<CCompareFeats> cf = *it;
1126  if(compset.find(cf) == compset.end() &&
1127  (t_map[cf->GetFeatT()].top() == cf || cf->GetFeatT().IsNull()))
1128  {
1129  cf->SetIrrelevance(2);
1130  compset.insert(cf);
1131  }
1132  }
1133 
1134  vComparisons.clear();
1135  ITERATE(set<CRef<CCompareFeats> >, it, compset) {
1136 
1137  //do not report non-matches if we want different genes only
1138  //this should really be filtered out earlier, in GetPutativeMatches but for some reason
1139  //that removes 99% of comparisons - need to look into it
1140  //cout << **it;
1141  //if((*it)->GetFeatT().IsNull() && (m_comp_options & CCompareSeqRegions::fDifferentGenesOnly)) {
1142  // continue;
1143  // }
1144  vComparisons.push_back(*it);
1145  }
1146 }
1147 
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
bool SameOrientation(ENa_strand a, ENa_strand b)
Definition: Na_strand.hpp:83
unsigned dummy
Definition: block_cipher.h:0
CCompareFeats represens a result of comparison of two features.
CConstRef< CSeq_feat > m_feat1
CConstRef< CSeq_loc > m_feat1_mapped_loc
static string s_GetLocLabel(const CSeq_loc &loc, bool merged=false)
CRef< CCompareSeq_locs > m_compare
static string s_GetFeatLabel(const CSeq_feat &gene_feat, feature::TFeatLabelFlags type=feature::fFGL_Both)
CConstRef< CSeq_feat > m_feat2
double GetMappedIdentity() const
static int s_GetGeneId(const CSeq_feat &feat)
CConstRef< CSeq_loc > x_GetSelfLoc(const CSeq_loc &loc, CScope *scope, bool merge_single_range)
bool NextComparisonGroup(vector< CRef< CCompareFeats > > &v)
Return the next group of comparisons on the region (return true iff found any) A group is a set of fe...
void SelectMatches(vector< CRef< CCompareFeats > > &v)
void x_GetPutativeMatches(vector< CRef< CCompareFeats > > &v, CConstRef< CSeq_feat > q_feat)
double GetRelativeOverlap() const
Relative overlap is defined as ratio of the length of the overlap to the length of the shorter featur...
vector< SIntervalComparisonResult > m_IntComparisons
string GetEvidenceString() const
The evidence string is a whitespace-separated list of exon comparisons Each exon comparison is a pair...
TSeqPos m_len_seqloc_overlap
void x_ComputeOverlapValues() const
Recompute m_len_seqloc_overlap, m_len_seqloc1, and m_len_seqloc2.
TCompareLocsFlags GetResult(string *str_out=NULL) const
str_out will contain human-readable summary of the internal comparison
CRef< CSeq_loc > m_loc2
FCompareLocs x_CompareInts(const CSeq_loc &loc1, const CSeq_loc &loc2) const
Compare two exons.
ResultCounts m_counts
CRef< CSeq_loc > m_loc1
double GetSymmetricalOverlap() const
Symmetrical overlap is defined as length(intersection(loc1, loc2) / (length(loc1) + length(loc2)) int...
@ fCmp_Subset
comparison loc is a subset of the reference loc; some interval boundaries do not match
@ fCmp_5pExtension
5' terminal interval extended (other splice junction matches)
@ fCmp_intsExtra_internal
comparinos loc has extra interval(s) internally
@ fCmp_5pTruncation
5' terminal interval truncated (other splice junction matches)
@ fCmp_Superset
comparison loc is a superset of the reference loc; some interval boundaries do not match
@ fCmp_intsExtra_5p
comparinos loc has extra interval(s) at 5' end
@ fCmp_RegionOverlap
overlap of the extremes
@ fCmp_intsMissing_3p
comparison loc is missing interval(s) at 3' end
@ fCmp_3pExtension
3' terminal interval extended (other splice junction matches)
@ fCmp_3pTruncation
3' terminal interval truncated (other splice junction matches)
@ fCmp_NoOverlap
seq_locs do not overlap at all
@ fCmp_StrandDifferent
different strand
@ fCmp_Unknown
failed to compare
@ fCmp_Overlap
at least one interval overlaps
@ fCmp_intsMissing_internal
comparison loc is missing interval(s) internally
@ fCmp_intsMissing_5p
comparison loc is missing interval(s) at 5' end
@ fCmp_Match
all junctions match (fuzz-agnostic)
@ fCmp_intsExtra_3p
comparinos loc has extra interval(s) at 3' end
void x_Compare()
Process the seq_locs and generate the m_IntComparisons vector; Recompute the counts.
CFeat_CI –.
Definition: feat_ci.hpp:64
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
int Compare(const CSeq_feat &f2) const
Compare relative order of this feature and feature f2, ordering first by features' coordinates,...
Definition: Seq_feat.hpp:242
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
USING_SCOPE(objects)
CNcbiOstream & operator<<(CNcbiOstream &out, const CCompareFeats &cf)
static Uint4 match_start
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
string GetLabel(const CSeq_id &id)
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
void ChangeToMix(void)
Definition: Seq_loc.cpp:3633
void SetNull(void)
Override all setters to incorporate cache invalidation.
Definition: Seq_loc.hpp:960
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ eOrder_Positional
Definition: Seq_loc.hpp:461
@ eEmpty_Allow
ignore empty locations
Definition: Seq_loc.hpp:458
@ fMerge_SingleRange
Definition: Seq_loc.hpp:332
@ fMerge_All
Definition: Seq_loc.hpp:331
@ eContent
Definition: feature.hpp:87
@ fFGL_Both
Definition: feature.hpp:74
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
ECompare
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
int TestForOverlap(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
Calls TestForOverlap64() and if the result is greater than kMax_Int truncates it to kMax_Int.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Simple
any overlap of extremes
@ eContains
First CSeq_loc contains second.
@ eOverlap
CSeq_locs overlap.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
SAnnotSelector & SetOverlapIntervals(void)
Check overlapping of individual intervals.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCerr
Definition: ncbistre.hpp:544
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static const char label[]
const TDb & GetDb(void) const
Get the Db member data.
Definition: Gene_ref_.hpp:743
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
E_Choice Which(void) const
Which variant is currently selected.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: Seq_feat_.hpp:919
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
bool CanGetId(void) const
Check if it is safe to call GetId method.
Definition: Seq_feat_.hpp:898
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TGene & GetGene(void) const
Get the variant data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
bool IsRna(void) const
Check if variant Rna is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
int i
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
This helper struct is used to accumulate the neighboring comparisons of the same class,...
bool Add(const SIntervalComparisonResult &r)
if the comparison is neighboring and of the same class, set the terminal compariosn to it and return ...
void Reset(const SIntervalComparisonResult &r)
SAnnotSelector –.
Comparison functor for pqueue storing related comparisons.
bool operator()(const CRef< CCompareFeats > &c1, const CRef< CCompareFeats > &c2) const
bool operator()(CConstRef< CSeq_feat > f1, CConstRef< CSeq_feat > f2) const
#define _ASSERT
CRef< CTestThread > thr[k_NumThreadsMax]
Definition: test_mt.cpp:267
Modified on Wed May 29 18:34:34 2024 by modify_doxy.py rev. 669887