NCBI C++ ToolKit
seq_loc_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_loc_util.cpp 100589 2023-08-14 14:23:37Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Clifford Clausen, Aaron Ucko, Aleksey Grichenko
27 *
28 * File Description:
29 * Seq-loc utilities requiring CScope
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbi_limits.hpp>
34 
35 #include <serial/iterator.hpp>
36 
40 #include <objects/seq/Bioseq.hpp>
41 #include <objects/seq/MolInfo.hpp>
42 
49 
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/scope.hpp>
53 #include <objmgr/seqdesc_ci.hpp>
57 #include <objmgr/impl/synonyms.hpp>
58 #include <objmgr/error_codes.hpp>
59 
60 #include <util/range_coll.hpp>
61 
62 #include <algorithm>
63 
64 #define NCBI_USE_ERRCODE_X ObjMgr_SeqUtil
65 
68 BEGIN_SCOPE(sequence)
69 
70 
72 {
73  if ( !scope ) {
75  }
76 #if 0
77  CBioseq_Handle hnd = scope->GetBioseqHandle(id);
78  if ( !hnd ) {
80  }
81  return hnd.GetBioseqLength();
82 #else
83  return scope->GetSequenceLength(id);
84 #endif
85 }
86 
87 
88 TSeqPos GetLength(const CSeq_loc& loc, CScope* scope)
89 {
90  switch (loc.Which()) {
91  case CSeq_loc::e_Pnt:
92  return 1;
93  case CSeq_loc::e_Int:
94  return loc.GetInt().GetLength();
95  case CSeq_loc::e_Null:
96  case CSeq_loc::e_Empty:
97  return 0;
98  case CSeq_loc::e_Whole:
99  return GetLength(loc.GetWhole(), scope);
101  return loc.GetPacked_int().GetLength();
102  case CSeq_loc::e_Mix:
103  return GetLength(loc.GetMix(), scope);
104  case CSeq_loc::e_Packed_pnt: // just a bunch of points
105  return TSeqPos(loc.GetPacked_pnt().GetPoints().size());
106  case CSeq_loc::e_Bond: // return number of points
107  return loc.GetBond().IsSetB() + loc.GetBond().IsSetA();
108  case CSeq_loc::e_not_set: //can't calculate length
109  case CSeq_loc::e_Feat:
110  case CSeq_loc::e_Equiv: // unless actually the same length...
111  default:
112  NCBI_THROW(CObjmgrUtilException, eUnknownLength,
113  "Unable to determine length");
114  }
115 }
116 
117 
118 namespace {
119  struct SCoverageCollector {
120  SCoverageCollector(const CSeq_loc& loc, CScope* scope)
121  {
122  Add(loc, scope);
123  }
124  void Add(const CSeq_loc& loc, CScope* scope)
125  {
126  switch (loc.Which()) {
127  case CSeq_loc::e_Pnt:
128  Add(loc.GetPnt());
129  return;
130  case CSeq_loc::e_Int:
131  Add(loc.GetInt());
132  return;
133  case CSeq_loc::e_Null:
134  case CSeq_loc::e_Empty:
135  return;
136  case CSeq_loc::e_Whole:
137  AddWhole(loc.GetWhole(), scope);
138  return;
139  case CSeq_loc::e_Mix:
140  Add(loc.GetMix(), scope);
141  return;
143  Add(loc.GetPacked_int());
144  return;
146  Add(loc.GetPacked_pnt());
147  return;
148  case CSeq_loc::e_Bond:
149  Add(loc.GetBond().GetA());
150  if ( loc.GetBond().IsSetB() ) {
151  Add(loc.GetBond().GetB());
152  }
153  return;
154  case CSeq_loc::e_not_set: //can't calculate coverage
155  case CSeq_loc::e_Feat:
156  case CSeq_loc::e_Equiv: // unless actually the same length...
157  default:
158  NCBI_THROW(CObjmgrUtilException, eUnknownLength,
159  "Unable to determine coverage");
160  }
161  }
162  void Add(const CSeq_id_Handle& idh, TSeqPos from, TSeqPos to)
163  {
164  m_Intervals[idh] += TRange(from, to);
165  }
166  void Add(const CSeq_id& id, TSeqPos from, TSeqPos to)
167  {
168  Add(CSeq_id_Handle::GetHandle(id), from, to);
169  }
170  void AddWhole(const CSeq_id& id, CScope* scope)
171  {
172  Add(id, 0, GetLength(id, scope)-1);
173  }
174  void Add(const CSeq_point& seq_pnt)
175  {
176  Add(seq_pnt.GetId(), seq_pnt.GetPoint(), seq_pnt.GetPoint());
177  }
178  void Add(const CSeq_interval& seq_int)
179  {
180  Add(seq_int.GetId(), seq_int.GetFrom(), seq_int.GetTo());
181  }
182  void Add(const CPacked_seqint& packed_int)
183  {
184  ITERATE ( CPacked_seqint::Tdata, it, packed_int.Get() ) {
185  Add(**it);
186  }
187  }
188  void Add(const CPacked_seqpnt& packed_pnt)
189  {
190  CSeq_id_Handle idh =
191  CSeq_id_Handle::GetHandle(packed_pnt.GetId());
192  ITERATE(CPacked_seqpnt::TPoints, it, packed_pnt.GetPoints()) {
193  Add(idh, *it, *it);
194  }
195  }
196  void Add(const CSeq_loc_mix& mix, CScope* scope)
197  {
198  ITERATE ( CSeq_loc_mix::Tdata, it, mix.Get() ) {
199  Add(**it, scope);
200  }
201  }
202 
203  TSeqPos GetCoverage(void) const
204  {
205  TSeqPos coverage = 0;
206  ITERATE ( TIntervals, it, m_Intervals ) {
207  ITERATE ( TRanges, it2, it->second ) {
208  coverage += it2->GetLength();
209  }
210  }
211  return coverage;
212  }
213 
214  private:
215  typedef CRange<TSeqPos> TRange;
217  typedef map<CSeq_id_Handle, TRanges> TIntervals;
218  TIntervals m_Intervals;
219  };
220 }
221 
222 
223 TSeqPos GetCoverage(const CSeq_loc& loc, CScope* scope)
224 {
225  switch (loc.Which()) {
226  case CSeq_loc::e_Pnt:
227  return 1;
228  case CSeq_loc::e_Int:
229  return loc.GetInt().GetLength();
230  case CSeq_loc::e_Null:
231  case CSeq_loc::e_Empty:
232  return 0;
233  case CSeq_loc::e_Whole:
234  return GetLength(loc.GetWhole(), scope);
236  case CSeq_loc::e_Mix:
238  case CSeq_loc::e_Bond:
239  return SCoverageCollector(loc, scope).GetCoverage();
240  case CSeq_loc::e_not_set: // can't calculate length
241  case CSeq_loc::e_Feat:
242  case CSeq_loc::e_Equiv: // unless actually the same length...
243  default:
244  NCBI_THROW(CObjmgrUtilException, eUnknownLength,
245  "Unable to determine length");
246  }
247 }
248 
249 
251 {
252  TSeqPos length = 0;
253 
254  ITERATE( CSeq_loc_mix::Tdata, i, mix.Get() ) {
255  TSeqPos ret = GetLength((**i), scope);
256  if (ret < numeric_limits<TSeqPos>::max()) {
257  length += ret;
258  }
259  }
260  return length;
261 }
262 
263 
264 bool IsValid(const CSeq_point& pt, CScope* scope)
265 {
266  if (static_cast<TSeqPos>(pt.GetPoint()) >=
267  GetLength(pt.GetId(), scope) )
268  {
269  return false;
270  }
271 
272  return true;
273 }
274 
275 
276 bool IsValid(const CPacked_seqpnt& pts, CScope* scope)
277 {
278  TSeqPos length = GetLength(pts.GetId(), scope);
280  if (*it >= length) {
281  return false;
282  }
283  }
284  return true;
285 }
286 
287 
288 bool IsValid(const CSeq_interval& interval, CScope* scope)
289 {
290  if (interval.GetFrom() > interval.GetTo() ||
291  interval.GetTo() >= GetLength(interval.GetId(), scope))
292  {
293  return false;
294  }
295 
296  return true;
297 }
298 
299 
300 bool IsSameBioseq(const CSeq_id& id1, const CSeq_id& id2, CScope* scope,
301  CScope::EGetBioseqFlag get_flag)
302 {
303  if ( id1.Match(id2) ) {
304  return true;
305  }
308  scope, get_flag);
309 }
310 
311 
312 bool IsSameBioseq(const CSeq_id_Handle& id1, const CSeq_id_Handle& id2, CScope* scope,
313  CScope::EGetBioseqFlag get_flag)
314 {
315  // Compare CSeq_ids directly
316  if (id1 == id2) {
317  return true;
318  }
319 
320  // Compare handles
321  return scope && scope->IsSameBioseq(id1, id2, get_flag);
322 }
323 
324 
325 static const CSeq_id* s_GetId(const CSeq_loc& loc, CScope* scope,
326  string* msg = NULL)
327 {
328  const CSeq_id* sip = NULL;
329  if (msg != NULL) {
330  msg->erase();
331  }
332 
333  for (CSeq_loc_CI it(loc, CSeq_loc_CI::eEmpty_Allow); it; ++it) {
334  const CSeq_id& id = it.GetSeq_id();
335  if (id.Which() == CSeq_id::e_not_set) {
336  continue;
337  }
338  if (sip == NULL) {
339  sip = &id;
340  } else {
341  if (!IsSameBioseq(*sip, id, scope)) {
342  if (msg != NULL) {
343  *msg = "Location contains segments on more than one bioseq.";
344  }
345  sip = NULL;
346  break;
347  }
348  }
349  }
350 
351  if (sip == NULL && msg != NULL && msg->empty()) {
352  *msg = "Location contains no IDs.";
353  }
354 
355  return sip;
356 }
357 
358 
359 const CSeq_id& GetId(const CSeq_loc& loc, CScope* scope)
360 {
361  string msg;
362  const CSeq_id* sip = s_GetId(loc, scope, &msg);
363 
364  if (sip == NULL) {
365  NCBI_THROW(CObjmgrUtilException, eNotUnique, msg);
366  }
367 
368  return *sip;
369 }
370 
371 
373 {
374  CSeq_id_Handle retval;
375 
376  try {
377  if (!loc.IsNull()) {
378  retval = CSeq_id_Handle::GetHandle(GetId(loc, scope));
379  }
380  } catch (CObjmgrUtilException&) {}
381 
382  return retval;
383 }
384 
385 
386 bool IsOneBioseq(const CSeq_loc& loc, CScope* scope)
387 {
388  return loc.GetId() != NULL || s_GetId(loc, scope) != NULL;
389 }
390 
391 
392 inline
393 static ENa_strand s_GetStrand(const CSeq_loc& loc)
394 {
395  switch (loc.Which()) {
396  case CSeq_loc::e_Bond:
397  {
398  const CSeq_bond& bond = loc.GetBond();
399  ENa_strand a_strand = bond.GetA().IsSetStrand() ?
400  bond.GetA().GetStrand() : eNa_strand_unknown;
401  ENa_strand b_strand = eNa_strand_unknown;
402  if ( bond.IsSetB() ) {
403  b_strand = bond.GetB().IsSetStrand() ?
404  bond.GetB().GetStrand() : eNa_strand_unknown;
405  }
406 
407  if ( a_strand == eNa_strand_unknown &&
408  b_strand != eNa_strand_unknown ) {
409  a_strand = b_strand;
410  } else if ( a_strand != eNa_strand_unknown &&
411  b_strand == eNa_strand_unknown ) {
412  b_strand = a_strand;
413  }
414 
415  return (a_strand != b_strand) ? eNa_strand_other : a_strand;
416  }
417  case CSeq_loc::e_Whole:
418  return eNa_strand_both;
419  case CSeq_loc::e_Int:
420  return loc.GetInt().IsSetStrand() ? loc.GetInt().GetStrand() :
422  case CSeq_loc::e_Pnt:
423  return loc.GetPnt().IsSetStrand() ? loc.GetPnt().GetStrand() :
426  return loc.GetPacked_pnt().IsSetStrand() ?
429  {
431  bool strand_set = false;
433  ENa_strand istrand = (*i)->IsSetStrand() ? (*i)->GetStrand() :
435  if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) {
436  strand = eNa_strand_plus;
437  strand_set = true;
438  } else if (strand == eNa_strand_plus &&
439  istrand == eNa_strand_unknown) {
440  strand_set = true;
441  } else if (!strand_set) {
442  strand = istrand;
443  strand_set = true;
444  } else if (istrand != strand) {
445  return eNa_strand_other;
446  }
447  }
448  return strand;
449  }
450  case CSeq_loc::e_Mix:
451  {
453  bool strand_set = false;
454  ITERATE(CSeq_loc_mix::Tdata, it, loc.GetMix().Get()) {
455  if ((*it)->IsNull() || (*it)->IsEmpty()) {
456  continue;
457  }
458  ENa_strand istrand = GetStrand(**it);
459  if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) {
460  strand = eNa_strand_plus;
461  strand_set = true;
462  } else if (strand == eNa_strand_plus &&
463  istrand == eNa_strand_unknown) {
464  strand_set = true;
465  } else if (!strand_set) {
466  strand = istrand;
467  strand_set = true;
468  } else if (istrand != strand) {
469  return eNa_strand_other;
470  }
471  }
472  return strand;
473  }
474  default:
475  return eNa_strand_unknown;
476  }
477 }
478 
479 
480 
481 ENa_strand GetStrand(const CSeq_loc& loc, CScope* scope)
482 {
483  switch (loc.Which()) {
484  case CSeq_loc::e_Int:
485  if (loc.GetInt().IsSetStrand()) {
486  return loc.GetInt().GetStrand();
487  }
488  break;
489 
490  case CSeq_loc::e_Whole:
491  return eNa_strand_both;
492 
493  case CSeq_loc::e_Pnt:
494  if (loc.GetPnt().IsSetStrand()) {
495  return loc.GetPnt().GetStrand();
496  }
497  break;
498 
500  return loc.GetPacked_pnt().IsSetStrand() ?
502 
503  default:
504  if (!IsOneBioseq(loc, scope)) {
505  return eNa_strand_unknown; // multiple bioseqs
506  } else {
507  return s_GetStrand(loc);
508  }
509  }
510 
511  /// default to unknown strand
512  return eNa_strand_unknown;
513 }
514 
515 
517 {
518  // Throw CObjmgrUtilException if loc does not represent one CBioseq
519  GetId(loc, scope);
520 
521  return loc.GetStart(ext);
522 }
523 
524 
526 {
527  // Throw CObjmgrUtilException if loc does not represent one CBioseq
528  GetId(loc, scope);
529 
530  if (loc.IsWhole() && scope != NULL) {
531  CBioseq_Handle seq = GetBioseqFromSeqLoc(loc, *scope);
532  if (seq) {
533  return seq.GetBioseqLength() - 1;
534  }
535  }
536  return loc.GetStop(ext);
537 }
538 
539 
540 void ChangeSeqId(CSeq_id* id, bool best, CScope* scope)
541 {
542  // Return if no scope
543  if (!scope || !id) {
544  return;
545  }
546 
547  // Get CBioseq represented by *id
548  CBioseq_Handle bsh = scope->GetBioseqHandle(*id);
549  if (!bsh) {
550  return;
551  }
553  bsh.GetBioseqCore();
554 
555  // Get pointer to the best/worst id of *seq
556  const CSeq_id* tmp_id;
557  if (best) {
558  tmp_id = FindBestChoice(seq->GetId(), CSeq_id::BestRank).GetPointer();
559  } else {
560  tmp_id = FindBestChoice(seq->GetId(), CSeq_id::WorstRank).GetPointer();
561  }
562 
563  // Change the contents of *id to that of *tmp_id
564  id->Reset();
565  id->Assign(*tmp_id);
566 }
567 
568 
569 void ChangeSeqLocId(CSeq_loc* loc, bool best, CScope* scope)
570 {
571  if (!scope) {
572  return;
573  }
574 
575  for (CTypeIterator<CSeq_id> id(Begin(*loc)); id; ++id) {
576  ChangeSeqId(&(*id), best, scope);
577  }
578 }
579 
580 
582 (const CBioseq_Handle& bsh,
583  const CSeq_loc& loc)
584 {
585  try {
587  CConstRef<CSeq_loc> mapped_loc = mapper.Map(loc);
588  if (!mapped_loc) {
589  return false;
590  }
591 
592  // Check that loc segments are in order
593  CSeq_loc::TRange last_range;
594  bool first = true;
595  for (CSeq_loc_CI lit(*mapped_loc); lit; ++lit) {
596  if (first) {
597  last_range = lit.GetRange();
598  first = false;
599  continue;
600  }
601  if (lit.GetStrand() == eNa_strand_minus) {
602  if (last_range.GetTo() < lit.GetRange().GetTo()) {
603  return true;
604  }
605  } else {
606  if (last_range.GetFrom() > lit.GetRange().GetFrom()) {
607  return true;
608  }
609  }
610  last_range = lit.GetRange();
611  }
612  } catch (CException&) {
613  // exception will be thrown if references far sequence and not remote fetching
614  }
615  return false;
616 }
617 
618 
620 (const CBioseq& seq,
621  const CSeq_loc& loc,
622  CScope* scope)
623 {
624  if (scope) {
625  return BadSeqLocSortOrder (scope->GetBioseqHandle(seq), loc);
626  } else {
627  return false;
628  }
629 }
630 
631 
633 {
635 
636  ENa_strand strand = GetStrand(loc, scope);
637  if (strand == eNa_strand_unknown || strand == eNa_strand_other) {
638  rtn = eSeqLocCheck_warning;
639  }
640 
642  for (;lit; ++lit) {
643  switch (lit->Which()) {
644  case CSeq_loc::e_Int:
645  if (!IsValid(lit->GetInt(), scope)) {
646  rtn = eSeqLocCheck_error;
647  }
648  break;
650  {
652  for(;sit; ++sit) {
653  if (!IsValid(*sit, scope)) {
654  rtn = eSeqLocCheck_error;
655  break;
656  }
657  }
658  break;
659  }
660  case CSeq_loc::e_Pnt:
661  if (!IsValid(lit->GetPnt(), scope)) {
662  rtn = eSeqLocCheck_error;
663  }
664  break;
666  if (!IsValid(lit->GetPacked_pnt(), scope)) {
667  rtn = eSeqLocCheck_error;
668  }
669  break;
670  default:
671  break;
672  }
673  }
674  return rtn;
675 }
676 
677 
678 TSeqPos LocationOffset(const CSeq_loc& outer, const CSeq_loc& inner,
679  EOffsetType how, CScope* scope)
680 {
681  SRelLoc rl(outer, inner, scope);
682  if (rl.m_Ranges.empty()) {
683  return (TSeqPos)-1;
684  }
685  bool want_reverse = false;
686  {{
687  bool outer_is_reverse = IsReverse(GetStrand(outer, scope));
688  switch (how) {
689  case eOffset_FromStart:
690  want_reverse = false;
691  break;
692  case eOffset_FromEnd:
693  want_reverse = true;
694  break;
695  case eOffset_FromLeft:
696  want_reverse = outer_is_reverse;
697  break;
698  case eOffset_FromRight:
699  want_reverse = !outer_is_reverse;
700  break;
701  }
702  }}
703  if (want_reverse) {
704  // Subtract 1 to compensate comparing 0-based coordinates to a
705  // 1-based length.
706  return GetLength(outer, scope) - rl.m_Ranges.back()->GetTo() - 1;
707  } else {
708  return rl.m_Ranges.front()->GetFrom();
709  }
710 }
711 
712 
714  unsigned int& retval,
715  bool is_first,
716  bool is_last,
717  CScope& scope)
718 {
719  if (itv.IsSetFuzz_from()) {
720  const CInt_fuzz& fuzz = itv.GetFuzz_from();
721  if (fuzz.Which() == CInt_fuzz::e_Lim) {
722  CInt_fuzz::ELim lim = fuzz.GetLim();
723  if (lim == CInt_fuzz::eLim_gt) {
724  retval |= eSeqlocPartial_Limwrong;
725  } else if (lim == CInt_fuzz::eLim_lt ||
726  lim == CInt_fuzz::eLim_unk) {
727  if (itv.IsSetStrand() &&
728  itv.GetStrand() == eNa_strand_minus) {
729  if ( is_last ) {
730  retval |= eSeqlocPartial_Stop;
731  } else {
732  retval |= eSeqlocPartial_Internal;
733  }
734  if (itv.GetFrom() != 0) {
735  if ( is_last ) {
736  retval |= eSeqlocPartial_Nostop;
737  } else {
738  retval |= eSeqlocPartial_Nointernal;
739  }
740  }
741  } else {
742  if ( is_first ) {
743  retval |= eSeqlocPartial_Start;
744  } else {
745  retval |= eSeqlocPartial_Internal;
746  }
747  if (itv.GetFrom() != 0) {
748  if ( is_first ) {
749  retval |= eSeqlocPartial_Nostart;
750  } else {
751  retval |= eSeqlocPartial_Nointernal;
752  }
753  }
754  }
755  }
756  } else if (fuzz.Which() == CInt_fuzz::e_Range) {
757  // range
758  if (itv.IsSetStrand() && itv.GetStrand() == eNa_strand_minus) {
759  if (is_last) {
760  retval |= eSeqlocPartial_Stop;
761  }
762  } else {
763  if (is_first) {
764  retval |= eSeqlocPartial_Start;
765  }
766  }
767  }
768  }
769 
770  if (itv.IsSetFuzz_to()) {
771  const CInt_fuzz& fuzz = itv.GetFuzz_to();
772  CInt_fuzz::ELim lim = fuzz.IsLim() ?
773  fuzz.GetLim() : CInt_fuzz::eLim_unk;
774  if (lim == CInt_fuzz::eLim_lt) {
775  retval |= eSeqlocPartial_Limwrong;
776  } else if (lim == CInt_fuzz::eLim_gt ||
777  lim == CInt_fuzz::eLim_unk) {
778  CBioseq_Handle hnd =
779  scope.GetBioseqHandle(itv.GetId());
780  bool miss_end = false;
781  if ( hnd ) {
782  if (itv.GetTo() != hnd.GetBioseqLength() - 1) {
783  miss_end = true;
784  }
785  }
786  if (itv.IsSetStrand() &&
787  itv.GetStrand() == eNa_strand_minus) {
788  if ( is_first ) {
789  retval |= eSeqlocPartial_Start;
790  } else {
791  retval |= eSeqlocPartial_Internal;
792  }
793  if (miss_end) {
794  if ( is_first /* was last */) {
795  retval |= eSeqlocPartial_Nostart;
796  } else {
797  retval |= eSeqlocPartial_Nointernal;
798  }
799  }
800  } else {
801  if ( is_last ) {
802  retval |= eSeqlocPartial_Stop;
803  } else {
804  retval |= eSeqlocPartial_Internal;
805  }
806  if ( miss_end ) {
807  if ( is_last ) {
808  retval |= eSeqlocPartial_Nostop;
809  } else {
810  retval |= eSeqlocPartial_Nointernal;
811  }
812  }
813  }
814  }
815  }
816 }
817 
818 
819 int SeqLocPartialCheck(const CSeq_loc& loc, CScope* scope)
820 {
821  unsigned int retval = 0;
822  if (!scope) {
823  return retval;
824  }
825 
826  // Find first and last Seq-loc
827  const CSeq_loc *first = 0, *last = 0;
828  for ( CSeq_loc_CI loc_iter(loc); loc_iter; ++loc_iter ) {
829  if ( first == 0 ) {
830  first = &(loc_iter.GetEmbeddingSeq_loc());
831  }
832  last = &(loc_iter.GetEmbeddingSeq_loc());
833  }
834  if (!first) {
835  return retval;
836  }
837 
839  while ( i2 ) {
840  const CSeq_loc* slp = &(i2.GetEmbeddingSeq_loc());
841  switch (slp->Which()) {
842  case CSeq_loc::e_Null:
843  if (slp == first) {
844  retval |= eSeqlocPartial_Start;
845  } else if (slp == last) {
846  retval |= eSeqlocPartial_Stop;
847  } else {
848  retval |= eSeqlocPartial_Internal;
849  }
850  break;
851  case CSeq_loc::e_Int:
852  {
853  SeqIntPartialCheck(slp->GetInt(), retval,
854  slp == first, slp == last, *scope);
855  }
856  break;
858  {
859  const CPacked_seqint::Tdata& ints = slp->GetPacked_int().Get();
860  const CSeq_interval* first_int =
861  ints.empty() ? 0 : ints.front().GetPointer();
862  const CSeq_interval* last_int =
863  ints.empty() ? 0 : ints.back().GetPointer();
864  ITERATE(CPacked_seqint::Tdata, it, ints) {
865  SeqIntPartialCheck(**it, retval,
866  slp == first && *it == first_int,
867  slp == last && *it == last_int,
868  *scope);
869  ++i2;
870  }
871  continue;
872  }
873  case CSeq_loc::e_Pnt:
874  if (slp->GetPnt().IsSetFuzz()) {
875  const CInt_fuzz& fuzz = slp->GetPnt().GetFuzz();
876  if (fuzz.Which() == CInt_fuzz::e_Lim) {
877  CInt_fuzz::ELim lim = fuzz.GetLim();
878  if (lim == CInt_fuzz::eLim_gt ||
879  lim == CInt_fuzz::eLim_lt ||
880  lim == CInt_fuzz::eLim_unk) {
881  if (slp == first) {
882  retval |= eSeqlocPartial_Start;
883  } else if (slp == last) {
884  retval |= eSeqlocPartial_Stop;
885  } else {
886  retval |= eSeqlocPartial_Internal;
887  }
888  }
889  }
890  }
891  break;
893  if (slp->GetPacked_pnt().IsSetFuzz()) {
894  const CInt_fuzz& fuzz = slp->GetPacked_pnt().GetFuzz();
895  if (fuzz.Which() == CInt_fuzz::e_Lim) {
896  CInt_fuzz::ELim lim = fuzz.GetLim();
897  if (lim == CInt_fuzz::eLim_gt ||
898  lim == CInt_fuzz::eLim_lt ||
899  lim == CInt_fuzz::eLim_unk) {
900  if (slp == first) {
901  retval |= eSeqlocPartial_Start;
902  } else if (slp == last) {
903  retval |= eSeqlocPartial_Stop;
904  } else {
905  retval |= eSeqlocPartial_Internal;
906  }
907  }
908  }
909  }
910  break;
911  case CSeq_loc::e_Whole:
912  {
913  // Get the Bioseq referred to by Whole
914  CBioseq_Handle bsh = scope->GetBioseqHandle(slp->GetWhole());
915  if ( !bsh ) {
916  break;
917  }
918  // Check for CMolInfo on the biodseq
919  CSeqdesc_CI di( bsh, CSeqdesc::e_Molinfo );
920  if ( !di ) {
921  // If no CSeq_descr, nothing can be done
922  break;
923  }
924  // First try to loop through CMolInfo
925  const CMolInfo& mi = di->GetMolinfo();
926  if (!mi.IsSetCompleteness()) {
927  ++i2;
928  continue;
929  }
930  switch (mi.GetCompleteness()) {
932  if (slp == first) {
933  retval |= eSeqlocPartial_Start;
934  } else {
935  retval |= eSeqlocPartial_Internal;
936  }
937  break;
939  if (slp == last) {
940  retval |= eSeqlocPartial_Stop;
941  } else {
942  retval |= eSeqlocPartial_Internal;
943  }
944  break;
946  retval |= eSeqlocPartial_Other;
947  break;
949  retval |= eSeqlocPartial_Start;
950  retval |= eSeqlocPartial_Stop;
951  break;
952  default:
953  break;
954  }
955  break;
956  }
957  default:
958  break;
959  }
960  ++i2;
961  }
962  return retval;
963 }
964 
965 /////////////////////////////////////////////////////////////////////
966 //
967 // Implementation of SeqLocRevCmpl()
968 //
969 
971 {
973  return GetReverseComplement( loc, &helper );
974 }
975 
976 /////////////////////////////////////////////////////////////////////
977 //
978 // Implementation of Compare()
979 //
980 
981 
983 
984 // Map the id to it's 'main' synonym.
986 {
987  TSynMap::const_iterator syn_it = syns.find(id);
988  if (syn_it != syns.end()) {
989  // known id
990  return syn_it->second;
991  }
992  // Unknown id, try to find a match
993  ITERATE(TSynMap, sit, syns) {
994  if ( IsSameBioseq(sit->first, id, scope) ) {
995  // Found a synonym
996  CSeq_id_Handle map_to = sit->second;
997  syns[id] = map_to;
998  return map_to;
999  }
1000  }
1001  syns[id] = id;
1002  return id;
1003 }
1004 
1005 
1007 typedef list<TRangeInfo> TRangeInfoList;
1009 
1010 // Convert the seq-loc to TRangeInfos. The id map is used to
1011 // normalize ids.
1013  TRangeInfoMap& infos,
1014  TSynMap& syns,
1015  CScope* scope)
1016 {
1017  CSeq_loc_CI it(loc,
1019  for ( ; it; ++it) {
1020  TRangeInfo info;
1021  if ( it.IsWhole() ) {
1022  info.SetOpen(0, GetLength(it.GetSeq_id(), scope));
1023  }
1024  else {
1025  info.SetOpen(it.GetRange().GetFrom(), it.GetRange().GetToOpen());
1026  }
1027  CSeq_id_Handle id = s_GetSynHandle(it.GetSeq_id_Handle(), syns, scope);
1028  infos[id].push_back(info);
1029  }
1031  info->second.sort();
1032  }
1033 }
1034 
1035 
1037  const CSeq_loc& you,
1038  TSynMap& syns,
1039  CScope* scope)
1040 {
1041  TRangeInfoMap me_infos, you_infos;
1042  s_SeqLocToRangeInfoMap(me, me_infos, syns, scope);
1043  s_SeqLocToRangeInfoMap(you, you_infos, syns, scope);
1044 
1045  // Check if locs are equal. The ranges are sorted now, so the original
1046  // order is not important.
1047  if (me_infos.size() == you_infos.size()) {
1048  bool equal = true;
1049  TRangeInfoMap::const_iterator mid_it = me_infos.begin();
1050  TRangeInfoMap::const_iterator yid_it = you_infos.begin();
1051  for ( ; mid_it != me_infos.end(); ++mid_it, ++yid_it) {
1052  _ASSERT(yid_it != you_infos.end());
1053  if (mid_it->first != yid_it->first ||
1054  mid_it->second.size() != yid_it->second.size()) {
1055  equal = false;
1056  break;
1057  }
1058  TRangeInfoList::const_iterator mit = mid_it->second.begin();
1059  TRangeInfoList::const_iterator yit = yid_it->second.begin();
1060  for ( ; mit != mid_it->second.end(); ++mit, ++yit) {
1061  _ASSERT(yit != yid_it->second.end());
1062  if (*mit != *yit) {
1063  equal = false;
1064  break;
1065  }
1066  }
1067  if ( !equal ) break;
1068  }
1069  if ( equal ) {
1070  return eSame;
1071  }
1072  }
1073 
1074  // Check if 'me' is contained or overlapping with 'you'.
1075  bool me_contained = true;
1076  bool overlap = false;
1077 
1078  ITERATE(TRangeInfoMap, mid_it, me_infos) {
1079  TRangeInfoMap::const_iterator yid_it = you_infos.find(mid_it->first);
1080  if (yid_it == you_infos.end()) {
1081  // The id is missing from 'you'.
1082  me_contained = false;
1083  if ( overlap ) {
1084  break; // nothing else to check
1085  }
1086  continue; // check for overlap
1087  }
1088  ITERATE(TRangeInfoList, mit, mid_it->second) {
1089  // current range is contained in 'you'?
1090  bool mit_contained = false;
1091  ITERATE(TRangeInfoList, yit, yid_it->second) {
1092  if (yit->GetToOpen() > mit->GetFrom() &&
1093  yit->GetFrom() < mit->GetToOpen()) {
1094  overlap = true;
1095  if (yit->GetFrom() <= mit->GetFrom() &&
1096  yit->GetToOpen() >= mit->GetToOpen()) {
1097  mit_contained = true;
1098  break;
1099  }
1100  }
1101  }
1102  if ( !mit_contained ) {
1103  me_contained = false;
1104  if ( overlap ) break; // nothing else to check
1105  }
1106  }
1107  if (!me_contained && overlap) {
1108  break; // nothing else to check
1109  }
1110  }
1111 
1112  // Reverse check: if 'you' is contained in 'me'.
1113  bool you_contained = true;
1114 
1115  ITERATE(TRangeInfoMap, yid_it, you_infos) {
1116  TRangeInfoMap::const_iterator mid_it = me_infos.find(yid_it->first);
1117  if (mid_it == me_infos.end()) {
1118  // The id is missing from 'me'.
1119  you_contained = false;
1120  break; // nothing else to check
1121  }
1122  ITERATE(TRangeInfoList, yit, yid_it->second) {
1123  // current range is contained in 'me'?
1124  bool yit_contained = false;
1125  ITERATE(TRangeInfoList, mit, mid_it->second) {
1126  if (mit->GetFrom() <= yit->GetFrom() &&
1127  mit->GetToOpen() >= yit->GetToOpen()) {
1128  yit_contained = true;
1129  break;
1130  }
1131  }
1132  if ( !yit_contained ) {
1133  you_contained = false;
1134  break;
1135  }
1136  }
1137  if ( !you_contained ) {
1138  break; // nothing else to check
1139  }
1140  }
1141 
1142  // Always prefere 'eContains' over 'eContained'.
1143  if ( you_contained ) {
1144  return eContains;
1145  }
1146  if ( me_contained ) {
1147  return eContained;
1148  }
1149  return overlap ? eOverlap : eNoOverlap;
1150 }
1151 
1152 
1153 bool s_CheckAbutting(const CSeq_loc& loc1,
1154  const CSeq_loc& loc2,
1155  TSynMap& syns,
1156  CScope* scope,
1157  ESeqLocExtremes ext)
1158 {
1159  // Compare biological end of loc1 and start of loc2.
1160  // If not abutting, use Compare.
1161  CSeq_loc_CI it1_last(loc1,
1163  ext == eExtreme_Positional ?
1165  it1_last.SetPos(it1_last.GetSize() - 1);
1166  CSeq_loc_CI it2_first(loc2,
1168  ext == eExtreme_Positional ?
1170 
1171  CSeq_id_Handle id1_last = s_GetSynHandle(it1_last.GetSeq_id_Handle(), syns, scope);
1172  CSeq_id_Handle id2_first = s_GetSynHandle(it2_first.GetSeq_id_Handle(), syns, scope);
1173 
1174  // Empty and whole locations can not abut.
1175  if (it1_last.IsEmpty() || it2_first.IsEmpty() ||
1176  it1_last.IsWhole() || it2_first.IsWhole() ||
1177  id1_last != id2_first) {
1178  return false;
1179  }
1180 
1181  if (ext == eExtreme_Positional) {
1182  return it1_last.GetRange().GetToOpen() == it2_first.GetRange().GetFrom();
1183  }
1184 
1185  // Plus strand
1186  if (!IsReverse(it1_last.GetStrand()) &&
1187  !IsReverse(it2_first.GetStrand()) &&
1188  it1_last.GetRange().GetToOpen() == it2_first.GetRange().GetFrom()) {
1189  return true;
1190  }
1191  // Minus strand
1192  if (IsReverse(it1_last.GetStrand()) &&
1193  IsReverse(it2_first.GetStrand()) &&
1194  it1_last.GetRange().GetFrom() == it2_first.GetRange().GetToOpen()) {
1195  return true;
1196  }
1197 
1198  return false;
1199 }
1200 
1201 
1203  const CSeq_loc& you,
1204  CScope* scope)
1205 {
1206  TSynMap syns;
1207  return s_CompareOverlapping(me, you, syns, scope);
1208 }
1209 
1210 
1212  const CSeq_loc& loc2,
1213  CScope* scope,
1215 {
1216  TSynMap syns;
1217 
1218  bool abut = false;
1219  ECompare ret = eNoOverlap;
1220 
1221  if (flags & fCompareAbutting) {
1222  abut = s_CheckAbutting(loc1, loc2, syns, scope,
1225  }
1226  if (flags & fCompareOverlapping) {
1227  ret = s_CompareOverlapping(loc1, loc2, syns, scope);
1228  }
1229  if ( abut ) {
1230  if (ret == eNoOverlap) {
1231  ret = eAbutting;
1232  }
1233  else {
1234  ret = eAbutAndOverlap;
1235  }
1236  }
1237  return ret;
1238 }
1239 
1240 
1241 /////////////////////////////////////////////////////////////////////
1242 //
1243 // Implementation of TestForOverlap()
1244 //
1245 
1246 bool s_Test_Strands(ENa_strand strand1, ENa_strand strand2)
1247 {
1248  // Check strands. Overlapping rules for strand:
1249  // - equal strands overlap
1250  // - "both" overlaps with any other
1251  // - "unknown" overlaps with any other except "minus"
1252  // - "other" indicates mixed strands and needs a closer look
1253  if (strand1 == eNa_strand_other || strand2 == eNa_strand_other) {
1254  return false;
1255  }
1256  return strand1 == strand2
1257  || strand1 == eNa_strand_both
1258  || strand2 == eNa_strand_both
1259  || (strand1 == eNa_strand_unknown && strand2 != eNa_strand_minus)
1260  || (strand2 == eNa_strand_unknown && strand1 != eNa_strand_minus);
1261 }
1262 
1263 
1264 inline
1266 {
1267  return x < 0 ? -x : x;
1268 }
1269 
1270 
1271 typedef pair<TRangeInfo, TRangeInfo> TRangeInfoByStrand;
1273 
1274 // Convert the seq-loc to TTotalRangeInfoMap, which stores total
1275 // ranges by id and strand. The id map is used to normalize ids.
1277  TTotalRangeInfoMap& infos,
1278  TSynMap& syns,
1279  CScope* scope)
1280 {
1281  CSeq_loc_CI it(loc,
1283  for ( ; it; ++it) {
1284  TRangeInfo rg;
1285  if ( it.IsWhole() ) {
1286  rg.SetOpen(0, GetLength(it.GetSeq_id(), scope));
1287  }
1288  else {
1289  rg.SetOpen(it.GetRange().GetFrom(), it.GetRange().GetToOpen());
1290  }
1291  CSeq_id_Handle id = s_GetSynHandle(it.GetSeq_id_Handle(), syns, scope);
1292  if ( IsReverse(it.GetStrand()) ) {
1293  infos[id].second.CombineWith(rg);
1294  }
1295  else {
1296  infos[id].first.CombineWith(rg);
1297  }
1298  }
1299 }
1300 
1301 
1302 int TestForOverlap(const CSeq_loc& loc1,
1303  const CSeq_loc& loc2,
1305  TSeqPos circular_len,
1306  CScope* scope)
1307 {
1308  Int8 ret = TestForOverlap64(loc1, loc2, type, circular_len, scope);
1309  return ret <= kMax_Int ? int(ret) : kMax_Int;
1310 }
1311 
1312 
1313 typedef pair<TRangeInfoList, TRangeInfoList> TRangeInfoListByStrand;
1315 
1316 // Convert the seq-loc to TRangeInfos for each strand. The id map is used to
1317 // normalize ids.
1319  TRangeInfoMapByStrand& infos,
1320  TSynMap& syns,
1321  CScope* scope)
1322 {
1323  CSeq_loc_CI it(loc,
1325  for ( ; it; ++it) {
1326  TRangeInfo info;
1327  if ( it.IsWhole() ) {
1328  info.SetOpen(0, GetLength(it.GetSeq_id(), scope));
1329  }
1330  else {
1331  info.SetOpen(it.GetRange().GetFrom(), it.GetRange().GetToOpen());
1332  }
1333  CSeq_id_Handle id = s_GetSynHandle(it.GetSeq_id_Handle(), syns, scope);
1334  if (it.IsSetStrand() && IsReverse(it.GetStrand())) {
1335  infos[id].second.push_back(info);
1336  }
1337  else {
1338  infos[id].first.push_back(info);
1339  }
1340  }
1342  info->second.first.sort();
1343  info->second.second.sort();
1344  }
1345 }
1346 
1347 
1349 {
1350  bool circular;
1352 };
1353 
1355 
1357  TTopologyMap& topologies,
1359  CScope* scope)
1360 {
1361  TTopologyMap::const_iterator found = topologies.find(idh);
1362  if (found != topologies.end()) {
1363  return found->second;
1364  }
1366  info.circular = false;
1367  info.length = kInvalidSeqPos;
1368  if ( scope ) {
1369  CBioseq_Handle bh = scope->GetBioseqHandle(idh);
1370  if ( bh ) {
1371  if ((flags & fOverlap_IgnoreTopology) == 0) {
1372  info.circular = (bh.IsSetInst_Topology() &&
1374  }
1375  info.length = bh.GetBioseqLength();
1376  }
1377  }
1378  topologies[idh] = info;
1379  return info;
1380 }
1381 
1382 
1383 // Convert the seq-loc to TRangeInfos using extremes for each bioseq,
1384 // strand and ordered set of ranges. The id map is used to normalize ids.
1386  TRangeInfoMapByStrand& infos,
1387  TSynMap& syns,
1388  TTopologyMap& topologies,
1390  CScope* scope)
1391 {
1392  CSeq_loc_CI it(loc,
1394  if ( !it ) return;
1395 
1396  CSeq_id_Handle last_id = s_GetSynHandle(it.GetSeq_id_Handle(), syns, scope);
1397  TRangeInfo last_rg;
1398  bool last_reverse = it.IsSetStrand() && IsReverse(it.GetStrand());
1399  // In case of circular bioseq allow to cross zero only once.
1400  bool crossed_zero = false;
1401 
1402  TRangeInfo total_range;
1403  for ( ; it; ++it) {
1404  CSeq_id_Handle id = s_GetSynHandle(it.GetSeq_id_Handle(), syns, scope);
1405  TRangeInfo rg = it.GetRange();
1406  STopologyInfo topo =
1407  s_GetTopology(id, topologies, flags, scope);
1408  bool reverse = it.IsSetStrand() && IsReverse(it.GetStrand());
1409  bool break_range = reverse != last_reverse || id != last_id;
1410 
1411  bool bad_order = false;
1412  // Don't try to check the order if id or strand have just changed.
1413  // Also don't check it for the first range.
1414  if ( !break_range && !last_rg.Empty() ) {
1415  if ( reverse ) {
1416  if (rg.GetFrom() > last_rg.GetFrom()) {
1417  bad_order = true;
1418  if ( topo.circular ) {
1419  if ( !crossed_zero ) {
1420  total_range.SetFrom(0);
1421  }
1422  crossed_zero = true;
1423  }
1424  }
1425  }
1426  else {
1427  if (rg.GetFrom() < last_rg.GetFrom()) {
1428  bad_order = true;
1429  if ( topo.circular ) {
1430  if ( !crossed_zero ) {
1431  total_range.SetToOpen(topo.length != kInvalidSeqPos ?
1433  crossed_zero = true;
1434  }
1435  }
1436  }
1437  }
1438  }
1439  if (break_range || bad_order) {
1440  // Push the next total range, start the new one
1441  if ( last_reverse ) {
1442  infos[last_id].second.push_back(total_range);
1443  }
1444  else {
1445  infos[last_id].first.push_back(total_range);
1446  }
1447  total_range = TRangeInfo::GetEmpty();
1448  if ( crossed_zero ) {
1449  if ( reverse ) {
1450  rg.SetToOpen(topo.length != kInvalidSeqPos ?
1452  }
1453  else {
1454  rg.SetFrom(0);
1455  }
1456  }
1457  crossed_zero = false;
1458  }
1459  last_rg = rg;
1460  total_range.CombineWith(rg);
1461  last_id = id;
1462  last_reverse = reverse;
1463  }
1464  if ( !total_range.Empty() ) {
1465  if ( last_reverse ) {
1466  infos[last_id].second.push_back(total_range);
1467  }
1468  else {
1469  infos[last_id].first.push_back(total_range);
1470  }
1471  }
1473  info->second.first.sort();
1474  info->second.second.sort();
1475  }
1476 }
1477 
1478 
1480  const TRangeInfoList& ranges2)
1481 {
1482  Int8 diff = 0;
1483  ITERATE(TRangeInfoList, rg_it1, ranges1) {
1484  TRangeInfo rg = *rg_it1;
1485  ITERATE(TRangeInfoList, rg_it2, ranges2) {
1486  if (rg_it2->GetFrom() > rg.GetTo()) break;
1487  if ( !rg.IntersectingWith(*rg_it2) ) continue;
1488  if (rg_it2->GetFrom() > rg.GetFrom()) {
1489  diff += static_cast<Int8>(rg_it2->GetFrom() - rg.GetFrom());
1490  }
1491  if (rg_it2->GetTo() < rg.GetTo()) {
1492  rg.SetFrom(rg_it2->GetToOpen());
1493  }
1494  else {
1495  rg = TRangeInfo::GetEmpty();
1496  break;
1497  }
1498  }
1499  if (rg.IsWhole()) return numeric_limits<Int8>::max();
1500  diff += rg.GetLength();
1501  }
1502  return diff;
1503 }
1504 
1505 
1506 // Calculate sum of all subranges from ranges1 not covered by ranges2.
1508  const TRangeInfoMapByStrand& ranges2)
1509 {
1510  Int8 diff = 0;
1511  ITERATE(TRangeInfoMapByStrand, id_it1, ranges1) {
1512  TRangeInfoMapByStrand::const_iterator id_it2 = ranges2.find(id_it1->first);
1513  if (id_it2 != ranges2.end()) {
1514  Int8 diff_plus = s_GetUncoveredLength(id_it1->second.first, id_it2->second.first);
1515  Int8 diff_minus = s_GetUncoveredLength(id_it1->second.second, id_it2->second.second);
1516  if (diff_plus == numeric_limits<Int8>::max()) return diff_plus;
1517  if (diff_minus == numeric_limits<Int8>::max()) return diff_minus;
1518  diff += diff_plus + diff_minus;
1519  }
1520  else {
1521  ITERATE(TRangeInfoList, rg_it, id_it1->second.first) {
1522  if (rg_it->IsWhole()) return numeric_limits<Int8>::max();
1523  diff += rg_it->GetLength();
1524  }
1525  ITERATE(TRangeInfoList, rg_it, id_it1->second.second) {
1526  if (rg_it->IsWhole()) return numeric_limits<Int8>::max();
1527  diff += rg_it->GetLength();
1528  }
1529  }
1530  }
1531  return diff;
1532 }
1533 
1534 
1535 bool s_Test_Subset(const CSeq_loc& loc1,
1536  const CSeq_loc& loc2,
1537  CScope* scope)
1538 {
1539  TSynMap syns;
1540  TRangeInfoMapByStrand rm1, rm2;
1541  s_SeqLocToRangeInfoMapByStrand(loc1, rm1, syns, scope);
1542  s_SeqLocToRangeInfoMapByStrand(loc2, rm2, syns, scope);
1543  ITERATE(TRangeInfoMapByStrand, id_it2, rm2) {
1544  // For each id from loc2 find an entry in loc1.
1545  TRangeInfoMapByStrand::iterator id_it1 = rm1.find(id_it2->first);
1546  if (id_it1 == rm1.end()) {
1547  return false; // unmatched id in loc2
1548  }
1549  const TRangeInfoList& rglist1_plus = id_it1->second.first;
1550  const TRangeInfoList& rglist1_minus = id_it1->second.second;
1551  const TRangeInfoList& rglist2_plus = id_it2->second.first;
1552  const TRangeInfoList& rglist2_minus = id_it2->second.second;
1553  ITERATE(TRangeInfoList, it2, rglist2_plus) {
1554  bool contained = false;
1555  ITERATE(TRangeInfoList, it1, rglist1_plus) {
1556  // Already missed the rage on loc2?
1557  if (!contained && it1->GetFrom() > it2->GetFrom()) {
1558  return false;
1559  }
1560  // found a contaning range?
1561  if (it1->IsWhole() ||
1562  (it1->GetFrom() <= it2->GetFrom() &&
1563  it1->GetTo() >= it2->GetTo())) {
1564  contained = true;
1565  break;
1566  }
1567  }
1568  if ( !contained ) return false;
1569  }
1570  ITERATE(TRangeInfoList, it2, rglist2_minus) {
1571  bool contained = false;
1572  ITERATE(TRangeInfoList, it1, rglist1_minus) {
1573  // Already missed the rage on loc2?
1574  if (!contained && it1->GetFrom() > it2->GetFrom()) {
1575  return false;
1576  }
1577  // found a contaning range?
1578  if (it1->IsWhole() ||
1579  (it1->GetFrom() <= it2->GetFrom() &&
1580  it1->GetTo() >= it2->GetTo())) {
1581  contained = true;
1582  break;
1583  }
1584  }
1585  if ( !contained ) return false;
1586  }
1587  }
1588  return true;
1589 }
1590 
1591 
1593  CSeq_loc_CI it2,
1594  bool minus_strand,
1595  CScope* scope,
1596  bool single_id)
1597 {
1598  // Check intervals one by one
1599  while ( it1 && it2 ) {
1600  bool same_it_id = single_id;
1601  if ( !same_it_id ) {
1602  if ( !IsSameBioseq(it1.GetSeq_id(), it2.GetSeq_id(), scope) ) {
1603  return false;
1604  }
1605  }
1606  if ( !s_Test_Strands(it1.GetStrand(), it2.GetStrand()) ) {
1607  return false;
1608  }
1609  if ( minus_strand ) {
1610  if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom() ) {
1611  // The last interval from loc2 may be shorter than the
1612  // current interval from loc1
1613  if (it1.GetRange().GetFrom() > it2.GetRange().GetFrom() ||
1614  ++it2) {
1615  return false;
1616  }
1617  break;
1618  }
1619  }
1620  else {
1621  if (it1.GetRange().GetTo() != it2.GetRange().GetTo() ) {
1622  // The last interval from loc2 may be shorter than the
1623  // current interval from loc1
1624  if (it1.GetRange().GetTo() < it2.GetRange().GetTo() ||
1625  ++it2) {
1626  return false;
1627  }
1628  break;
1629  }
1630  }
1631  // Go to the next interval start
1632  if ( !(++it2) ) {
1633  break;
1634  }
1635  if ( !(++it1) ) {
1636  return false; // loc1 has not enough intervals
1637  }
1638  if ( minus_strand ) {
1639  if (it1.GetRange().GetTo() != it2.GetRange().GetTo()) {
1640  return false;
1641  }
1642  }
1643  else {
1644  if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom()) {
1645  return false;
1646  }
1647  }
1648  }
1649  return true;
1650 }
1651 
1652 
1653 // Test for overlap using extremes rather than ranges (simple, contained).
1654 // Used for multi-id, multi-strand and out-of-order locations.
1656  const CSeq_loc& loc2,
1658  TSynMap& syns,
1659  TTopologyMap& topologies,
1661  CScope* scope)
1662 {
1663  // Here we accept only two overlap types.
1665 
1666  TRangeInfoMapByStrand ranges1, ranges2;
1667 
1668  s_SeqLocToTotalRangesInfoMapByStrand(loc1, ranges1, syns, topologies, flags, scope);
1669  s_SeqLocToTotalRangesInfoMapByStrand(loc2, ranges2, syns, topologies, flags, scope);
1670 
1671  bool overlap = false;
1672  ITERATE(TRangeInfoMapByStrand, id_it2, ranges2) {
1673  TRangeInfoMapByStrand::const_iterator id_it1 = ranges1.find(id_it2->first);
1674  if (id_it1 == ranges1.end()) {
1675  if (type == eOverlap_Contained) {
1676  // loc2 has parts not contained in loc1
1677  return -1;
1678  }
1679  else { // eOverlap_Simple
1680  continue; // next id_it2
1681  }
1682  }
1683  // Found the same id in loc1 - check ranges.
1684  // Plus strand
1685  ITERATE(TRangeInfoList, rg_it2, id_it2->second.first) {
1686  bool contained = false;
1687  ITERATE(TRangeInfoList, rg_it1, id_it1->second.first) {
1688  if ( !rg_it2->IntersectingWith(*rg_it1) ) {
1689  // Ranges are sorted, we can stop as soon as
1690  // we go beyond the right end of rg2.
1691  if (rg_it2->GetTo() < rg_it1->GetFrom()) break;
1692  continue;
1693  }
1694  overlap = true;
1695  if (type == eOverlap_Contained) {
1696  if (rg_it2->GetFrom() >= rg_it1->GetFrom() &&
1697  rg_it2->GetTo() <= rg_it1->GetTo()) {
1698  contained = true;
1699  break;
1700  }
1701  }
1702  else { // eOverlap_Simple
1703  break; // found overlap, go to next range from loc2
1704  }
1705  }
1706  if (type == eOverlap_Contained) {
1707  if ( !contained ) return -1;
1708  }
1709  else if ( overlap ) break;
1710  }
1711  // Munis strand
1712  ITERATE(TRangeInfoList, rg_it2, id_it2->second.second) {
1713  bool contained = false;
1714  ITERATE(TRangeInfoList, rg_it1, id_it1->second.second) {
1715  if ( !rg_it2->IntersectingWith(*rg_it1) ) {
1716  // Ranges are sorted, we can stop as soon as
1717  // we go beyond the right end of rg2.
1718  if (rg_it2->GetTo() < rg_it1->GetFrom()) break;
1719  continue;
1720  }
1721  overlap = true;
1722  if (type == eOverlap_Contained) {
1723  if (rg_it2->GetFrom() >= rg_it1->GetFrom() &&
1724  rg_it2->GetTo() <= rg_it1->GetTo()) {
1725  contained = true;
1726  break;
1727  }
1728  }
1729  else { // eOverlap_Simple
1730  break; // found overlap, go to next range from loc2
1731  }
1732  }
1733  if (type == eOverlap_Contained) {
1734  if ( !contained ) return -1;
1735  }
1736  else if ( overlap ) break;
1737  }
1738  }
1739 
1740  // Now it's time to calculate quality of the overlap. Take into
1741  // account that a single range from one location may contain/overlap
1742  // multiple ranges from another location.
1743  if (type == eOverlap_Contained) {
1744  // There should be no subranges in ranges2 not covered by ranges1.
1745  return s_GetUncoveredLength(ranges1, ranges2);
1746  }
1747  if (type == eOverlap_Simple && overlap) {
1748  Int8 diff1 = s_GetUncoveredLength(ranges1, ranges2);
1749  Int8 diff2 = s_GetUncoveredLength(ranges2, ranges1);
1750  if (diff1 == numeric_limits<Int8>::max()) return diff1;
1751  if (diff2 == numeric_limits<Int8>::max()) return diff2;
1752  return diff1 + diff2;
1753  }
1754  return -1;
1755 }
1756 
1757 
1759  const CSeq_loc& loc2,
1760  TSynMap& syns,
1761  TTopologyMap& topologies,
1763  CScope* scope)
1764 {
1765  TRangeInfoMapByStrand ranges1, ranges2;
1766 
1767  s_SeqLocToRangeInfoMapByStrand(loc1, ranges1, syns, scope);
1768  s_SeqLocToRangeInfoMapByStrand(loc2, ranges2, syns, scope);
1769 
1770  bool overlap = false;
1771  ITERATE(TRangeInfoMapByStrand, id_it1, ranges1) {
1772  TRangeInfoMapByStrand::const_iterator id_it2 = ranges2.find(id_it1->first);
1773  if (id_it2 == ranges2.end()) continue;
1774  // Plus strand ranges
1775  ITERATE(TRangeInfoList, rg_it1, id_it1->second.first) {
1776  ITERATE(TRangeInfoList, rg_it2, id_it2->second.first) {
1777  if ( rg_it1->IntersectingWith(*rg_it2) ) {
1778  overlap = true;
1779  break;
1780  }
1781  }
1782  if ( overlap ) break;
1783  }
1784  if ( overlap ) break;
1785  // Minus strand ranges
1786  ITERATE(TRangeInfoList, rg_it1, id_it1->second.second) {
1787  ITERATE(TRangeInfoList, rg_it2, id_it2->second.second) {
1788  if ( rg_it1->IntersectingWith(*rg_it2) ) {
1789  overlap = true;
1790  break;
1791  }
1792  }
1793  if ( overlap ) break;
1794  }
1795  if ( overlap ) break;
1796  }
1797 
1798  if ( !overlap ) return -1;
1799 
1800  ranges1.clear();
1801  ranges2.clear();
1803  syns, topologies, flags, scope);
1805  syns, topologies, flags, scope);
1806 
1807  Int8 diff1 = s_GetUncoveredLength(ranges1, ranges2);
1808  Int8 diff2 = s_GetUncoveredLength(ranges2, ranges1);
1809  if (diff1 == numeric_limits<Int8>::max()) return diff1;
1810  if (diff2 == numeric_limits<Int8>::max()) return diff2;
1811  return diff1 + diff2;
1812 }
1813 
1814 
1816  const CSeq_loc& loc2,
1819  TSeqPos circular_len,
1820  CScope* scope)
1821 {
1822  if (circular_len == 0) {
1823  circular_len = kInvalidSeqPos;
1824  }
1825  // Do not allow conflicting values.
1826  if (circular_len != kInvalidSeqPos && (flags & fOverlap_IgnoreTopology)) {
1827  NCBI_THROW(CObjmgrUtilException, eBadSequenceType,
1828  "Circular length can not be combined with no-topology flag.");
1829  }
1830 
1831  const CSeq_loc* ploc1 = &loc1;
1832  const CSeq_loc* ploc2 = &loc2;
1833  const CSeq_id *id1 = NULL;
1834  const CSeq_id *id2 = NULL;
1835  id1 = loc1.GetId();
1836  id2 = loc2.GetId();
1837  TSynMap syns;
1838  TTopologyMap topologies;
1839  bool single_seq = true;
1840 
1841  // Get seq-ids. They should be cached by GetTotalRange() above and should
1842  // not be null.
1843  if (id1 && id2) {
1844  if ( !IsSameBioseq(*id1, *id2, scope) ) return -1;
1845  // Use known id and topology if there's just one sequence.
1848  syns[idh1] = idh1;
1849  if (idh2 != idh1) {
1850  syns[idh2] = idh1;
1851  }
1852  if (circular_len != kInvalidSeqPos) {
1853  STopologyInfo topo;
1854  topo.circular = true;
1855  topo.length = circular_len;
1856  topologies[idh1] = topo;
1857  }
1858  }
1859  else {
1860  if (flags & fOverlap_NoMultiSeq) {
1861  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1862  "Multi-bioseq locations are disabled by the flags.");
1863  }
1864  // Multi-id locations - no circular_len allowed
1865  if ( circular_len != kInvalidSeqPos &&
1866  type != eOverlap_Subset ) {
1867  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1868  "Circular bioseq length can not be specified "
1869  "for multi-bioseq locations.");
1870  }
1871  single_seq = false;
1872  }
1873 
1874  // Shortcut - if strands do not intersect, don't even look at the ranges.
1875  ENa_strand strand1 = GetStrand(loc1);
1876  ENa_strand strand2 = GetStrand(loc2);
1877  if ( !s_Test_Strands(strand1, strand2) ) {
1878  // For multi-seq strand is unknown rather than other -
1879  // can not use this test.
1880  if (single_seq &&
1881  strand1 != eNa_strand_other && strand2 != eNa_strand_other ) {
1882  // singular but incompatible strands
1883  return -1;
1884  }
1885  // There is a possibility of multiple strands that needs to be
1886  // checked too (if allowed by the flags).
1887  if (flags & fOverlap_NoMultiStrand) {
1888  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1889  "Multi-strand locations are disabled by the flags.");
1890  }
1891  }
1892 
1893  switch (type) {
1894  case eOverlap_Simple:
1895  return s_Test_Extremes(loc1, loc2, eOverlap_Simple,
1896  syns, topologies, flags, scope);
1897  case eOverlap_Contains:
1898  swap(ploc1, ploc2);
1899  // Go on to the next case
1900  case eOverlap_Contained:
1901  return s_Test_Extremes(*ploc1, *ploc2, eOverlap_Contained,
1902  syns, topologies, flags, scope);
1903  case eOverlap_SubsetRev:
1904  swap(ploc1, ploc2);
1905  // continue to eOverlap_Subset case
1906  case eOverlap_Subset:
1907  if ( !s_Test_Subset(*ploc1, *ploc2, scope) ) return -1;
1908  return Int8(GetCoverage(*ploc1, scope)) -
1909  Int8(GetCoverage(*ploc2, scope));
1910  case eOverlap_CheckIntRev:
1911  swap(ploc1, ploc2);
1912  // Go on to the next case
1914  {
1915  // Check intervals' boundaries.
1916  CSeq_loc_CI it1(*ploc1);
1917  CSeq_loc_CI it2(*ploc2);
1918  if (!it1 || !it2) {
1919  return -1;
1920  }
1921  TSeqPos loc2start = it2.GetRange().GetFrom();
1922  TSeqPos loc2end = it2.GetRange().GetTo();
1923  bool loc2rev = it2.GetStrand() == eNa_strand_minus;
1924  bool single_id = (id1 && id2);
1925  for ( ; it1; ++it1) {
1926  // If there are multiple ids per seq-loc, check each pair.
1927  if ( !single_id ) {
1928  if ( !IsSameBioseq(it1.GetSeq_id(), it2.GetSeq_id(),
1929  scope) ) continue;
1930  }
1931  // Find the first range in loc1 containing the first range
1932  // of loc2. s_Test_CheckIntervals will do the rest.
1933  if (it1.GetRange().GetFrom() <= loc2start &&
1934  it1.GetRange().GetTo() >= loc2end &&
1935  s_Test_CheckIntervals(it1, it2, loc2rev, scope, single_id)) {
1936  // GetLength adds up all strands/seqs, but for this overlap
1937  // type it's ok.
1938  return Int8(GetLength(*ploc1, scope)) -
1939  Int8(GetLength(*ploc2, scope));
1940  }
1941  }
1942  return -1;
1943  }
1944  case eOverlap_Interval:
1945  return s_Test_Interval(loc1, loc2, syns, topologies, flags, scope);
1946  }
1947  return -1;
1948 }
1949 
1950 
1952  const CSeq_loc& loc2,
1954  TSeqPos circular_len,
1955  CScope* scope)
1956 {
1957  return s_TestForOverlapEx(loc1, loc2, type, fOverlap_Default, circular_len, scope);
1958 }
1959 
1960 
1962  const CSeq_loc& loc2,
1964  CScope* scope,
1966 {
1967  return s_TestForOverlapEx(loc1, loc2, type, flags, kInvalidSeqPos, scope);
1968 }
1969 
1970 
1971 /////////////////////////////////////////////////////////////////////
1972 //
1973 // Seq-loc operations
1974 //
1975 
1976 class CDefaultSynonymMapper : public ISynonymMapper
1977 {
1978 public:
1979  CDefaultSynonymMapper(CScope* scope);
1980  virtual ~CDefaultSynonymMapper(void);
1981 
1982  virtual CSeq_id_Handle GetBestSynonym(const CSeq_id& id);
1983 
1984 private:
1986 
1990 };
1991 
1992 
1994  : m_IdMapper(CSeq_id_Mapper::GetInstance()),
1995  m_Scope(scope)
1996 {
1997  return;
1998 }
1999 
2000 
2002 {
2003  return;
2004 }
2005 
2006 
2008 {
2010  if ( !m_Scope || id.Which() == CSeq_id::e_not_set ) {
2011  return idh;
2012  }
2013  TSynonymMap::iterator id_syn = m_SynMap.find(idh);
2014  if (id_syn != m_SynMap.end()) {
2015  return id_syn->second;
2016  }
2017  CSeq_id_Handle best;
2018  int best_rank = CSeq_id::kMaxScore;
2020 #ifdef _DEBUG
2021  TGi gi = ZERO_GI;
2022 #endif
2023  ITERATE(CSynonymsSet, syn_it, *syn_set) {
2024  CSeq_id_Handle synh = syn_set->GetSeq_id_Handle(syn_it);
2025 #ifdef _DEBUG
2026  if (synh.IsGi()) {
2027  gi = synh.GetGi();
2028  }
2029 #endif
2030  int rank = synh.GetSeqId()->BestRankScore();
2031  if (rank < best_rank) {
2032  best = synh;
2033  best_rank = rank;
2034  }
2035  }
2036  if ( !best ) {
2037  // Synonyms set was empty
2038  m_SynMap[idh] = idh;
2039  return idh;
2040  }
2041  ITERATE(CSynonymsSet, syn_it, *syn_set) {
2042  m_SynMap[syn_set->GetSeq_id_Handle(syn_it)] = best;
2043  }
2044 #ifdef _DEBUG
2045  const CTextseq_id* txt_id = best.GetSeqId()->GetTextseq_Id();
2046  if (txt_id && !txt_id->IsSetVersion() && gi != ZERO_GI) {
2047  ERR_POST("Using version-less accession " << txt_id->GetAccession()
2048  << " instead of GI " << gi);
2049  }
2050 #endif
2051  return best;
2052 }
2053 
2054 
2056 {
2057 public:
2058  CDefaultLengthGetter(CScope* scope);
2059  virtual ~CDefaultLengthGetter(void);
2060 
2061  virtual TSeqPos GetLength(const CSeq_id& id);
2062 
2063 protected:
2065 };
2066 
2067 
2069  : m_Scope(scope)
2070 {
2071  return;
2072 }
2073 
2074 
2076 {
2077  return;
2078 }
2079 
2080 
2082 {
2083  if (id.Which() == CSeq_id::e_not_set) {
2084  return 0;
2085  }
2086  CBioseq_Handle bh;
2087  if ( m_Scope ) {
2088  bh = m_Scope->GetBioseqHandle(id);
2089  }
2090  if ( !bh ) {
2092  "Can not get length of whole location");
2093  }
2094  return bh.GetBioseqLength();
2095 }
2096 
2097 
2100  CScope* scope)
2101 {
2102  CDefaultSynonymMapper syn_mapper(scope);
2103  return loc.Merge(flags, &syn_mapper);
2104 }
2105 
2106 
2108  const CSeq_loc& loc2,
2110  CScope* scope)
2111 {
2112  CDefaultSynonymMapper syn_mapper(scope);
2113  return loc1.Add(loc2, flags, &syn_mapper);
2114 }
2115 
2116 
2118  const CSeq_loc& loc2,
2120  CScope* scope)
2121 {
2122  CDefaultSynonymMapper syn_mapper(scope);
2123  CDefaultLengthGetter len_getter(scope);
2124  return loc1.Subtract(loc2, flags, &syn_mapper, &len_getter);
2125 }
2126 
2127 
2128 END_SCOPE(sequence)
static CRef< CScope > m_Scope
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
ESeqLocExtremes
Used to determine the meaning of a location's Start/Stop positions.
Definition: Na_strand.hpp:61
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
vector< TRangeWithFuzz > TRanges
Definition: Seq_loc.cpp:4277
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
CBioseq_Handle –.
CDefaultLengthGetter(CScope *scope)
virtual ~CDefaultLengthGetter(void)
virtual TSeqPos GetLength(const CSeq_id &id)
virtual ~CDefaultSynonymMapper(void)
CDefaultSynonymMapper(CScope *scope)
map< CSeq_id_Handle, CSeq_id_Handle > TSynonymMap
CRef< CSeq_id_Mapper > m_IdMapper
virtual CSeq_id_Handle GetBestSynonym(const CSeq_id &id)
Exceptions for objmgr/util library.
TSeqPos GetLength(void) const
Wraps up any functionality needed that might be outside the scope of this library.
CScope –.
Definition: scope.hpp:92
TSeqPos GetLength(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
Interface for getting bioseq length.
Definition: Seq_loc.hpp:420
Interface for mapping IDs to the best synonym.
Definition: Seq_loc.hpp:408
size_type size() const
Definition: map.hpp:148
container_type::iterator iterator
Definition: map.hpp:54
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
static uch flags
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
CConstRef< CSeq_id > GetSeqId(void) const
int BestRankScore(void) const
Definition: Seq_id.hpp:754
bool IsGi(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:776
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
TGi GetGi(void) const
@ kMaxScore
Definition: Seq_id.hpp:733
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
Definition: Seq_loc.cpp:5037
bool IsSetStrand(void) const
Get strand.
Definition: Seq_loc.hpp:1049
CSeq_id_Handle GetSeq_id_Handle(void) const
Definition: Seq_loc.hpp:1035
bool IsWhole(void) const
True if the current location is a whole sequence.
Definition: Seq_loc.hpp:1077
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5087
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
Definition: Seq_loc.cpp:2573
bool IsEmpty(void) const
True if the current location is empty.
Definition: Seq_loc.hpp:1084
void SetPos(size_t pos)
Set iterator's position.
Definition: Seq_loc.cpp:2642
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
size_t GetSize(void) const
Get number of ranges.
Definition: Seq_loc.cpp:2636
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
int TOpFlags
Definition: Seq_loc.hpp:336
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ eOrder_Biological
Iterate sub-locations in positional order.
Definition: Seq_loc.hpp:462
@ eOrder_Positional
Definition: Seq_loc.hpp:461
@ eEmpty_Allow
ignore empty locations
Definition: Seq_loc.hpp:458
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
bool BadSeqLocSortOrder(const CBioseq_Handle &bsh, const CSeq_loc &loc)
Returns true if the order of Seq_locs is bad, otherwise, false.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ESeqLocCheck
SeqLocCheck results.
EOverlapType
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope, TOverlapFlags flags)
Updated version of TestForOverlap64().
int TOverlapFlags
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how, CScope *scope)
returns (TSeqPos)-1 if the locations don't overlap
int TCompareFlags
void ChangeSeqLocId(CSeq_loc *loc, bool best, CScope *scope)
Change each of the CSeq_ids embedded in a CSeq_loc to the best or worst CSeq_id accoring to the value...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
ECompare Compare(const CSeq_loc &me, const CSeq_loc &you, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
TSeqPos GetCoverage(const CSeq_loc &loc, CScope *scope)
Get number of unique bases in the location.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
Int8 TestForOverlap64(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len, CScope *scope)
64-bit version of TestForOverlap() Check if the two locations have ovarlap of a given type.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
void ChangeSeqId(CSeq_id *id, bool best, CScope *scope)
Change a CSeq_id to the one for the CBioseq that it represents that has the best rank or worst rank a...
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
ECompare
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
int TestForOverlap(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len, CScope *scope)
Calls TestForOverlap64() and if the result is greater than kMax_Int truncates it to kMax_Int.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag)
Determines if two CSeq_ids represent the same CBioseq.
EOffsetType
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *)
Get reverse complement of the seq-loc (?)
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ eSeqlocPartial_Nostart
@ eSeqlocPartial_Nostop
@ eSeqlocPartial_Internal
@ eSeqlocPartial_Nointernal
@ eSeqlocPartial_Other
@ eSeqlocPartial_Stop
@ eSeqlocPartial_Limwrong
@ eSeqlocPartial_Start
@ fCompareAbutting
Check if seq-locs are abutting (loc2 follows loc1)
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ fComparePositional
Use positional coordinates (ignore strands) when looking for abutting locations.
@ eSeqLocCheck_ok
@ eSeqLocCheck_error
@ eSeqLocCheck_warning
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eOverlap
CSeq_locs overlap.
@ eSame
CSeq_locs contain each other.
@ eAbutting
Abutting seq-locs.
@ eContained
First CSeq_loc contained by second.
@ eAbutAndOverlap
Seq-locs do both abut and overlap.
@ eNoOverlap
CSeq_locs do not overlap or abut.
@ fOverlap_NoMultiSeq
Throw if locations reference multiple bioseqs.
@ fOverlap_NoMultiStrand
Throw if locations reference multiple strands.
@ fOverlap_IgnoreTopology
Ignore sequence topology (circularity)
@ fOverlap_Default
Enable multi-id, multi-strand, check topology.
@ eOffset_FromLeft
relative to low-numbered end
@ eOffset_FromRight
relative to high-numbered end
@ eOffset_FromEnd
relative to end of location
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
CBioseq_Handle GetBioseqFromSeqLoc(const CSeq_loc &loc, CScope &scope, CScope::EGetBioseqFlag flag=CScope::eGetBioseq_Loaded)
Retrieve the Bioseq Handle from a location.
Definition: sequence.cpp:308
TRanges m_Ranges
Definition: sequence.hpp:1149
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
EGetBioseqFlag
Definition: scope.hpp:125
bool IsSameBioseq(const CSeq_id_Handle &id1, const CSeq_id_Handle &id2, EGetBioseqFlag get_flag)
Check if two seq-ids are resolved to the same Bioseq.
Definition: scope.cpp:168
@ eSeqMap_Up
map from segments to the top level bioseq
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
TInst_Topology GetInst_Topology(void) const
bool IsSetInst_Topology(void) const
#define kMax_Int
Definition: ncbi_limits.h:184
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
position_type GetLength(void) const
Definition: range.hpp:158
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType & CombineWith(const TThisType &r)
Definition: range.hpp:345
TThisType & SetToOpen(position_type toOpen)
Definition: range.hpp:175
position_type GetToOpen(void) const
Definition: range.hpp:138
static TThisType GetEmpty(void)
Definition: range.hpp:306
bool Empty(void) const
Definition: range.hpp:148
TThisType & SetOpen(position_type from, position_type toOpen)
Definition: range.hpp:184
bool IsWhole(void) const
Definition: range.hpp:284
static position_type GetWholeToOpen(void)
Definition: range.hpp:260
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
ELim
some limit value
Definition: Int_fuzz_.hpp:209
E_Choice Which(void) const
Which variant is currently selected.
Definition: Int_fuzz_.hpp:547
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
const TB & GetB(void) const
Get the B member data.
Definition: Seq_bond_.hpp:243
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
TStrand GetStrand(void) const
Get the Strand member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
bool IsSetA(void) const
connection to a least one residue Check if a value has been assigned to A data member.
Definition: Seq_bond_.hpp:201
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
list< CRef< CSeq_loc > > Tdata
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
vector< TSeqPos > TPoints
const TA & GetA(void) const
Get the A member data.
Definition: Seq_bond_.hpp:213
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_point_.hpp:331
const TPoints & GetPoints(void) const
Get the Points member data.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
bool IsSetB(void) const
other end may not be available Check if a value has been assigned to B data member.
Definition: Seq_bond_.hpp:231
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
const TAccession & GetAccession(void) const
Get the Accession member data.
const TBond & GetBond(void) const
Get the variant data.
Definition: Seq_loc_.cpp:326
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
@ e_Equiv
equivalent sets of locations
Definition: Seq_loc_.hpp:106
@ e_Empty
to NULL one Seq-id in a collection
Definition: Seq_loc_.hpp:99
@ e_Feat
indirect, through a Seq-feat
Definition: Seq_loc_.hpp:108
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_partial
partial but no details given
Definition: MolInfo_.hpp:157
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
Definition of all error codes used in objmgr libraries (xobjmgr.lib, xobjutil.lib and others).
int i
static MDB_envinfo info
Definition: mdb_load.c:37
T max(T x_, T y_)
CSeq_loc * GetReverseComplement(const CSeq_loc &loc, CReverseComplementHelper *helper)
Get reverse complement of the seq-loc (?).
bool s_Test_Strands(ENa_strand strand1, ENa_strand strand2)
void s_SeqLocToRangeInfoMap(const CSeq_loc &loc, TRangeInfoMap &infos, TSynMap &syns, CScope *scope)
ECompare s_CompareOverlapping(const CSeq_loc &me, const CSeq_loc &you, TSynMap &syns, CScope *scope)
bool s_CheckAbutting(const CSeq_loc &loc1, const CSeq_loc &loc2, TSynMap &syns, CScope *scope, ESeqLocExtremes ext)
CSeq_id_Handle s_GetSynHandle(CSeq_id_Handle id, TSynMap &syns, CScope *scope)
void SeqIntPartialCheck(const CSeq_interval &itv, unsigned int &retval, bool is_first, bool is_last, CScope &scope)
map< CSeq_id_Handle, TRangeInfoListByStrand > TRangeInfoMapByStrand
void s_SeqLocToRangeInfoMapByStrand(const CSeq_loc &loc, TRangeInfoMapByStrand &infos, TSynMap &syns, CScope *scope)
Int8 s_Test_Extremes(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSynMap &syns, TTopologyMap &topologies, TOverlapFlags flags, CScope *scope)
static ENa_strand s_GetStrand(const CSeq_loc &loc)
CRange< TSeqPos > TRangeInfo
STopologyInfo s_GetTopology(CSeq_id_Handle idh, TTopologyMap &topologies, TOverlapFlags flags, CScope *scope)
map< CSeq_id_Handle, STopologyInfo > TTopologyMap
Int8 s_Test_Interval(const CSeq_loc &loc1, const CSeq_loc &loc2, TSynMap &syns, TTopologyMap &topologies, TOverlapFlags flags, CScope *scope)
list< TRangeInfo > TRangeInfoList
void s_SeqLocToTotalRangeInfoMap(const CSeq_loc &loc, TTotalRangeInfoMap &infos, TSynMap &syns, CScope *scope)
bool s_Test_Subset(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
pair< TRangeInfoList, TRangeInfoList > TRangeInfoListByStrand
void s_SeqLocToTotalRangesInfoMapByStrand(const CSeq_loc &loc, TRangeInfoMapByStrand &infos, TSynMap &syns, TTopologyMap &topologies, TOverlapFlags flags, CScope *scope)
map< CSeq_id_Handle, TRangeInfoByStrand > TTotalRangeInfoMap
pair< TRangeInfo, TRangeInfo > TRangeInfoByStrand
bool s_Test_CheckIntervals(CSeq_loc_CI it1, CSeq_loc_CI it2, bool minus_strand, CScope *scope, bool single_id)
map< CSeq_id_Handle, TRangeInfoList > TRangeInfoMap
map< CSeq_id_Handle, CSeq_id_Handle > TSynMap
static const CSeq_id * s_GetId(const CSeq_loc &loc, CScope *scope, string *msg=NULL)
Int8 s_TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TOverlapFlags flags, TSeqPos circular_len, CScope *scope)
Int8 AbsInt8(Int8 x)
Int8 s_GetUncoveredLength(const TRangeInfoList &ranges1, const TRangeInfoList &ranges2)
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
Definition: sequence.hpp:1124
Definition: type.c:6
#define _ASSERT
#define const
Definition: zconf.h:232
Modified on Wed Apr 17 13:09:16 2024 by modify_doxy.py rev. 669887