NCBI C++ ToolKit
loc_edit.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: loc_edit.cpp 95056 2021-10-01 16:40:21Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * functions for editing and working with locations
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
37 #include <objects/seq/Seqdesc.hpp>
40 #include <objects/seq/Seq_ext.hpp>
41 #include <objects/seq/Seq_inst.hpp>
55 #include <objmgr/bioseq_handle.hpp>
57 #include <objmgr/util/sequence.hpp>
58 #include <objmgr/seq_vector.hpp>
59 
63 
65 {
66  string best_id(kEmptyStr);
67 
68  // Best seq_id
70  if (sid_hl) {
71  CConstRef <CSeq_id> new_id = sid_hl.GetSeqId();
72  if (new_id) {
73  best_id = sid_hl.GetSeqId()->AsFastaString();
74  }
75  }
76  else best_id = sid.AsFastaString();
77 
78  return best_id;
79 };
80 
81 static const string strand_symbol[] = {"", "", "c","b", "r"};
82 string PrintSeqIntUseBestID(const CSeq_interval& seq_int, CScope& scope, bool range_only)
83 {
84  string location(kEmptyStr);
85 
86  // Best seq_id
87  if (!range_only) {
88  location = PrintBestSeqId(seq_int.GetId(), scope) + ":";
89  }
90 
91  // strand
93  this_strand
94  = (seq_int.CanGetStrand()) ? seq_int.GetStrand(): eNa_strand_unknown;
95  location += strand_symbol[(int)this_strand];
96  int from, to;
97  string lab_from(kEmptyStr), lab_to(kEmptyStr);
98  if (eNa_strand_minus == this_strand
99  || eNa_strand_both_rev == this_strand) {
100  to = seq_int.GetFrom();
101  from = seq_int.GetTo();
102  if (seq_int.CanGetFuzz_to()) {
103  const CInt_fuzz& f_from = seq_int.GetFuzz_to();
104  f_from.GetLabel(&lab_from, from, false);
105  }
106  else lab_from = NStr::IntToString(++from);
107  if (seq_int.CanGetFuzz_from()) {
108  const CInt_fuzz& f_to = seq_int.GetFuzz_from();
109  f_to.GetLabel(&lab_to, to);
110  }
111  else lab_to = NStr::IntToString(++to);
112  }
113  else {
114  to = seq_int.GetTo();
115  from = seq_int.GetFrom();
116  if (seq_int.CanGetFuzz_from()) {
117  const CInt_fuzz& f_from = seq_int.GetFuzz_from();
118  f_from.GetLabel(&lab_from, from, false);
119  }
120  else lab_from = NStr::IntToString(++from);
121  if (seq_int.CanGetFuzz_to()) {
122  const CInt_fuzz& f_to = seq_int.GetFuzz_to();
123  f_to.GetLabel(&lab_to, to);
124  }
125  else lab_to = NStr::IntToString(++to);
126  }
127  location += lab_from + "-" + lab_to;
128  return location;
129 };
130 
131 string PrintPntAndPntsUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
132 {
133  string location(kEmptyStr);
134 
135  // Best seq_id
136  if (!range_only) {
137  if (seq_loc.IsPnt()) {
138  location = PrintBestSeqId(seq_loc.GetPnt().GetId(), scope) + ":";
139  }
140  else if (seq_loc.IsPacked_pnt()) {
141  location = PrintBestSeqId(seq_loc.GetPacked_pnt().GetId(), scope) + ":";
142  }
143  }
144 
145  if (!location.empty()) {
146  string strtmp;
147  seq_loc.GetLabel(&strtmp);
148  location += strtmp.substr(strtmp.find(":")+1);
149  }
150  return location;
151 }
152 
153 string SeqLocPrintUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
154 {
155  string location(kEmptyStr);
156  if (seq_loc.IsInt()) {
157  location = PrintSeqIntUseBestID(seq_loc.GetInt(), scope, range_only);
158  }
159  else if (seq_loc.IsMix() || seq_loc.IsEquiv()) {
160  location = "(";
161  const list <CRef <CSeq_loc> >* seq_loc_ls;
162  if (seq_loc.IsMix()) {
163  seq_loc_ls = &(seq_loc.GetMix().Get());
164  }
165  else {
166  seq_loc_ls = &(seq_loc.GetEquiv().Get());
167  }
168  ITERATE (list <CRef <CSeq_loc> >, it, *seq_loc_ls) {
169  if (it == seq_loc.GetMix().Get().begin()) {
170  location += SeqLocPrintUseBestID(**it, scope, range_only);
171  }
172  else location += SeqLocPrintUseBestID(**it, scope, true);
173  location += ", ";
174  }
175  if (!location.empty()) {
176  location = location.substr(0, location.size()-2);
177  }
178  location += ")";
179  }
180  else if (seq_loc.IsPacked_int()) {
181  location = "(";
182  ITERATE (list <CRef <CSeq_interval> >, it, seq_loc.GetPacked_int().Get()) {
183  if (it == seq_loc.GetPacked_int().Get().begin()) {
184  location += PrintSeqIntUseBestID(**it, scope, range_only);
185  }
186  else {
187  location += PrintSeqIntUseBestID(**it, scope, true);
188  }
189  location += ", ";
190  }
191  if (!location.empty()) {
192  location = location.substr(0, location.size()-2);
193  }
194  location += ")";
195  }
196  else if (seq_loc.IsPnt() || seq_loc.IsPacked_pnt()) {
197  location = PrintPntAndPntsUseBestID(seq_loc, scope, range_only);
198  }
199  else if (seq_loc.IsBond()) {
200  CSeq_loc tmp_loc;
201  tmp_loc.SetBond().Assign(seq_loc.GetBond().GetA());
202  location = PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
203  if (seq_loc.GetBond().CanGetB()) {
204  tmp_loc.SetBond().Assign(seq_loc.GetBond().GetB());
205  location
206  += "=" + PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
207  }
208  }
209  else {
210  seq_loc.GetLabel(&location);
211  }
212  return location;
213 }
214 
215 
217 {
218  bool rval = false;
219 
220  ENa_strand strand = loc.GetStrand();
221  if (strand == eNa_strand_minus) {
222  if (bsh && loc.GetStart(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
223  rval = true;
224  }
225  } else {
226  if (loc.GetStart(eExtreme_Biological) == 0) {
227  rval = true;
228  }
229  }
230  return rval;
231 }
232 
233 
235 {
236  bool rval = false;
237  ENa_strand strand = loc.GetStrand();
238 
239  if (strand == eNa_strand_minus) {
240  if (loc.GetStop(eExtreme_Biological) == 0) {
241  rval = true;
242  }
243  } else {
244  if (bsh && loc.GetStop(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
245  rval = true;
246  }
247  }
248  return rval;
249 }
250 
251 
252 bool CLocationEditPolicy::Is5AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
253 {
254  bool rval = false;
255  confident = true;
256  CSeq_loc_CI first_l(loc);
257  if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
258  // positive strand, just need to know if it's at zero
259  rval = (first_l.GetRange().GetFrom() == 0);
260  } else {
261  // negative strand
262  try {
263  CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
264  rval = (first_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
265  } catch (CException&) {
266  confident = false;
267  }
268  }
269  return rval;
270 }
271 
272 
273 bool CLocationEditPolicy::Is3AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
274 {
275  bool rval = false;
276  confident = true;
277  CSeq_loc_CI last_l(loc);
278  size_t num_intervals = last_l.GetSize();
279  last_l.SetPos(num_intervals - 1);
280  if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
281  // positive strand
282  try {
283  CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
284  if (bsh) {
285  rval = (last_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
286  } else {
287  confident = false;
288  }
289  } catch (CException&) {
290  confident = false;
291  }
292  } else {
293  // negative strand, just need to know if it's at zero
294  rval = (last_l.GetRange().GetFrom() == 0);
295  }
296  return rval;
297 }
298 
299 
301 (const CSeq_feat& orig_feat,
302  CScope& scope,
303  bool& do_set_5_partial,
304  bool& do_clear_5_partial) const
305 {
306  do_set_5_partial = false;
307  do_clear_5_partial = false;
308  const CSeq_loc& loc = orig_feat.GetLocation();
309 
310  switch (m_PartialPolicy5) {
312  // do nothing
313  break;
314  case ePartialPolicy_eSet:
315  if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
316  do_set_5_partial = true;
317  } else if (m_Extend5) {
318  bool confident = false;
319  bool at_5 = Is5AtEndOfSeq(loc, scope, confident);
320  if (confident && !at_5) {
321  do_set_5_partial = true;
322  }
323  }
324  break;
326  if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
327  bool confident = false;
328  if (Is5AtEndOfSeq(loc, scope, confident) && confident) {
329  do_set_5_partial = true;
330  }
331  }
332  break;
335  && orig_feat.GetData().IsCdregion()) {
336  string transl_prot;
337  bool confident = true;
338  try {
339  CSeqTranslator::Translate(orig_feat, scope, transl_prot,
340  false, // do not include stop codons
341  false); // do not remove trailing X/B/Z
342 
343  } catch ( const CException& ) {
344  confident = false;
345  }
346  if (confident && !NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
347  do_set_5_partial = true;
348  }
349  }
350  break;
353  && orig_feat.GetData().IsCdregion()
354  && orig_feat.GetData().GetCdregion().IsSetFrame()
356  && orig_feat.GetData().GetCdregion().GetFrame() != CCdregion::eFrame_one) {
357  do_set_5_partial = true;
358  }
359  break;
362  do_clear_5_partial = true;
363  }
364  break;
367  bool confident = false;
368  if (!Is5AtEndOfSeq(loc, scope, confident) && confident) {
369  do_clear_5_partial = true;
370  }
371  }
372  break;
375  && orig_feat.GetData().IsCdregion()
376  && (!orig_feat.GetData().GetCdregion().IsSetFrame() ||
378  orig_feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_one)) {
379  string transl_prot;
380  bool confident = true;
381  try {
382  CSeqTranslator::Translate(orig_feat, scope, transl_prot,
383  false, // do not include stop codons
384  false); // do not remove trailing X/B/Z
385 
386  } catch ( const CException& ) {
387  confident = false;
388  }
389  if (confident && NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
390  do_clear_5_partial = true;
391  }
392  }
393  break;
394  }
395  return do_set_5_partial || do_clear_5_partial;
396 }
397 
398 
400 (const CSeq_feat& orig_feat,
401  CScope& scope,
402  bool& do_set_3_partial,
403  bool& do_clear_3_partial) const
404 {
405  do_set_3_partial = false;
406  do_clear_3_partial = false;
407  const CSeq_loc& loc = orig_feat.GetLocation();
408 
409  switch (m_PartialPolicy3) {
411  // do nothing
412  break;
413  case ePartialPolicy_eSet:
415  do_set_3_partial = true;
416  } else if (m_Extend3) {
417  bool confident = false;
418  bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
419  if (confident && !at_3) {
420  do_set_3_partial = true;
421  }
422  }
423  break;
426  bool confident = false;
427  bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
428  if (at_3 && confident) {
429  do_set_3_partial = true;
430  }
431  }
432  break;
435  && orig_feat.GetData().IsCdregion()) {
436  string transl_prot;
437  bool confident = true;
438  try {
439  CSeqTranslator::Translate(orig_feat, scope, transl_prot,
440  true, // include stop codons
441  false); // do not remove trailing X/B/Z
442 
443  } catch ( const CException& ) {
444  confident = false;
445  }
446  if (confident && !NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
447  do_set_3_partial = true;
448  }
449  }
450  break;
452  // not allowed for 3' end
453  break;
456  do_clear_3_partial = true;
457  }
458  break;
461  bool confident = false;
462  bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
463  if (!at_3 && confident) {
464  do_clear_3_partial = true;
465  }
466  }
467  break;
470  && orig_feat.GetData().IsCdregion()) {
471  string transl_prot;
472  bool confident = true;
473  try {
474  CSeqTranslator::Translate(orig_feat, scope, transl_prot,
475  true, // include stop codons
476  false); // do not remove trailing X/B/Z
477 
478  } catch ( const CException& ) {
479  confident = false;
480  }
481  if (confident && NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
482  do_clear_3_partial = true;
483  }
484  }
485  break;
486  }
487  return do_set_3_partial || do_clear_3_partial;
488 }
489 
490 
492 {
493  CSeq_loc_CI first_l(loc);
494  CConstRef<CSeq_loc> first_loc = first_l.GetRangeAsSeq_loc();
495 
496  TSeqPos loc_start = first_loc->GetStart(eExtreme_Biological);
497  bool partial_start = first_loc->IsPartialStart(eExtreme_Biological);
498  ENa_strand strand = first_loc->IsSetStrand() ? first_loc->GetStrand() : eNa_strand_plus;
499  CRef<CSeq_loc> new_loc;
500 
501  CRef<CSeq_id> id(new CSeq_id());
502  id->Assign(first_l.GetSeq_id());
503 
504  if (pos < loc_start && strand != eNa_strand_minus) {
505  CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_start - 1, strand));
506  add->SetPartialStart(partial_start, eExtreme_Positional);
508  } else if (pos > loc_start && strand == eNa_strand_minus) {
509  CRef<CSeq_loc> add(new CSeq_loc(*id, loc_start + 1, pos, strand));
510  add->SetPartialStop(partial_start, eExtreme_Positional);
512  }
513  return new_loc;
514 }
515 
516 
518 {
519  CSeq_loc_CI last_l(loc);
520  size_t num_intervals = last_l.GetSize();
521  last_l.SetPos(num_intervals - 1);
522  CConstRef<CSeq_loc> last_loc = last_l.GetRangeAsSeq_loc();
523 
524  TSeqPos loc_stop = last_loc->GetStop(eExtreme_Biological);
525  bool partial_stop = last_loc->IsPartialStop(eExtreme_Biological);
526  ENa_strand strand = last_loc->IsSetStrand() ? last_loc->GetStrand() : eNa_strand_plus;
527  CRef<CSeq_loc> new_loc;
528 
529  CRef<CSeq_id> id(new CSeq_id());
530  id->Assign(last_l.GetSeq_id());
531 
532  if (pos > loc_stop && strand != eNa_strand_minus) {
533  CRef<CSeq_loc> add(new CSeq_loc(*id, loc_stop + 1, pos, strand));
534  add->SetPartialStop(partial_stop, eExtreme_Positional);
536  } else if (pos < loc_stop && strand == eNa_strand_minus) {
537  CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_stop - 1, strand));
538  add->SetPartialStart(partial_stop, eExtreme_Positional);
540  }
541  return new_loc;
542 }
543 
544 
545 
546 CRef<CSeq_loc> SeqLocExtend(const CSeq_loc& loc, size_t pos, CScope* scope)
547 {
548  TSeqPos loc_start = loc.GetStart(eExtreme_Positional);
549  TSeqPos loc_stop = loc.GetStop(eExtreme_Positional);
550  bool partial_start = loc.IsPartialStart(eExtreme_Positional);
551  bool partial_stop = loc.IsPartialStop(eExtreme_Positional);
552  ENa_strand strand = loc.GetStrand();
553  CRef<CSeq_loc> new_loc;
554 
555  if (pos < loc_start) {
556  CRef<CSeq_id> id(new CSeq_id());
557  id->Assign(*(loc.GetId()));
558  CRef<CSeq_loc> add(
559  new CSeq_loc(*id, static_cast<TSeqPos>(pos), loc_start - 1, strand));
560  add->SetPartialStart(partial_start, eExtreme_Positional);
561  new_loc = sequence::Seq_loc_Add(
562  loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
563  } else if (pos > loc_stop) {
564  CRef<CSeq_id> id(new CSeq_id());
565  id->Assign(*(loc.GetId()));
566  CRef<CSeq_loc> add(new CSeq_loc(
567  *id, loc_stop + 1, static_cast<TSeqPos>(pos), strand));
568  add->SetPartialStop(partial_stop, eExtreme_Positional);
569  new_loc = sequence::Seq_loc_Add(
570  loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
571  }
572  return new_loc;
573 }
574 
575 
577 {
581  return false;
582  }
583 
584  bool any_change = false;
585 
586  // make changes to 5' end
587  bool do_set_5_partial = false;
588  bool do_clear_5_partial = false;
589  any_change |= Interpret5Policy(feat, scope, do_set_5_partial, do_clear_5_partial);
590  if (do_set_5_partial) {
591  feat.SetLocation().SetPartialStart(true, eExtreme_Biological);
592  if (m_Extend5) {
593  // extend end
594  Extend5(feat, scope);
595  }
596  } else if (do_clear_5_partial) {
597  feat.SetLocation().SetPartialStart(false, eExtreme_Biological);
598  }
599 
600  // make changes to 3' end
601  bool do_set_3_partial = false;
602  bool do_clear_3_partial = false;
603  any_change |= Interpret3Policy(feat, scope, do_set_3_partial, do_clear_3_partial);
604  if (do_set_3_partial) {
605  feat.SetLocation().SetPartialStop(true, eExtreme_Biological);
606  if (m_Extend3) {
607  // extend end
608  Extend3(feat, scope);
609  }
610  } else if (do_clear_3_partial) {
611  feat.SetLocation().SetPartialStop(false, eExtreme_Biological);
612  }
613 
614  // make merge changes
615  switch (m_MergePolicy) {
617  {
618  // remove NULLS between, if present
619  bool changed = false;
620  CRef<CSeq_loc> new_loc = ConvertToJoin(feat.GetLocation(), changed);
621  if (changed) {
622  feat.SetLocation().Assign(*new_loc);
623  any_change = true;
624  }
625  }
626  break;
628  {
629  // add NULLS between if not present
630  bool changed = false;
631  CRef<CSeq_loc> new_loc = ConvertToOrder(feat.GetLocation(), changed);
632  if (changed) {
633  feat.SetLocation().Assign(*new_loc);
634  any_change = true;
635  }
636  }
637  break;
639  {
642  feat.SetLocation().Assign(*new_loc);
643  any_change = true;
644  }
645  }
646  break;
648  break;
649  }
650 
652 
653  return any_change;
654 }
655 
656 
658 {
659  if (orig_loc.Which() == CSeq_loc::e_Mix) {
660  ITERATE(CSeq_loc_mix::Tdata, it, orig_loc.GetMix().Get()) {
661  if ((*it)->IsNull()) {
662  return true;
663  }
664  }
665  }
666  return false;
667 }
668 
669 
671 {
672  changed = false;
673  CRef<CSeq_loc> new_loc(new CSeq_loc());
674  if (!HasNulls(orig_loc)) {
675  new_loc->Assign(orig_loc);
676  } else {
677  CSeq_loc_CI ci(orig_loc);
678  new_loc->SetMix();
679  while (ci) {
681  if (subloc && !subloc->IsNull()) {
682  CRef<CSeq_loc> add(new CSeq_loc());
683  add->Assign(*subloc);
684  new_loc->SetMix().Set().push_back(add);
685  }
686  ++ci;
687  }
688  changed = true;
689  }
690  return new_loc;
691 }
692 
693 
695 {
696  changed = false;
697  CRef<CSeq_loc> new_loc(new CSeq_loc());
698  if (HasNulls(orig_loc)) {
699  new_loc->Assign(orig_loc);
700  return new_loc;
701  }
702  switch(orig_loc.Which()) {
703  case CSeq_loc::e_not_set:
704  case CSeq_loc::e_Null:
705  case CSeq_loc::e_Empty:
706  case CSeq_loc::e_Whole:
707  case CSeq_loc::e_Int:
708  case CSeq_loc::e_Pnt:
709  case CSeq_loc::e_Bond:
710  case CSeq_loc::e_Feat:
711  case CSeq_loc::e_Equiv:
712  new_loc->Assign(orig_loc);
713  break;
716  case CSeq_loc::e_Mix:
717  {
718  new_loc->SetMix();
719  CSeq_loc_CI ci(orig_loc);
721  first->Assign(*(ci.GetRangeAsSeq_loc()));
722  new_loc->SetMix().Set().push_back(first);
723  ++ci;
724  while (ci) {
725  CRef<CSeq_loc> null_loc(new CSeq_loc());
726  null_loc->SetNull();
727  new_loc->SetMix().Set().push_back(null_loc);
728  CRef<CSeq_loc> add(new CSeq_loc());
729  add->Assign(*(ci.GetRangeAsSeq_loc()));
730  new_loc->SetMix().Set().push_back(add);
731  ++ci;
732  }
733  changed = true;
734  }
735  break;
736 
737  }
738  return new_loc;
739 }
740 
741 void AdjustFrameFor5Extension(CSeq_feat& feat, size_t diff)
742 {
743  // adjust frame to maintain consistency
744  if (diff % 3 > 0 && feat.GetData().IsCdregion()) {
745  int orig_frame = 1;
746  if (feat.GetData().GetCdregion().IsSetFrame()) {
748  orig_frame = 2;
749  } else if (feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_three) {
750  orig_frame = 3;
751  }
752  }
754  switch ((orig_frame + diff % 3) % 3) {
755  case 1:
756  new_frame = CCdregion::eFrame_not_set;
757  break;
758  case 2:
759  new_frame = CCdregion::eFrame_two;
760  break;
761  case 0:
762  new_frame = CCdregion::eFrame_three;
763  break;
764  }
765  feat.SetData().SetCdregion().SetFrame(new_frame);
766  }
767 }
768 
769 
771 {
772  bool extend = false;
773 
774  bool confident = false;
775  if (!Is5AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
776  int diff = 0;
777  CSeq_loc_CI first_l(feat.GetLocation());
778  if (first_l.IsSetStrand() && first_l.GetStrand() == eNa_strand_minus) {
779  CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
780  diff = bsh.GetInst().GetLength() - first_l.GetRange().GetTo() - 1;
781  CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
782  if (new_loc) {
783  feat.SetLocation().Assign(*new_loc);
784  extend = true;
785  } else {
786  diff = 0;
787  }
788  } else {
789  diff = first_l.GetRange().GetFrom();
790  CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), 0, &scope);
791  if (new_loc) {
792  feat.SetLocation().Assign(*new_loc);
793  extend = true;
794  } else {
795  diff = 0;
796  }
797  }
798 
799  // adjust frame to maintain consistency
800  AdjustFrameFor5Extension(feat, diff);
801  }
802  return extend;
803 }
804 
806 {
807  bool extend = false;
808 
809  bool confident = false;
810  if (!Is3AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
811  CSeq_loc_CI last_l(feat.GetLocation());
812  size_t num_intervals = last_l.GetSize();
813  last_l.SetPos(num_intervals - 1);
814 
815  ENa_strand strand = last_l.GetStrand();
816  if (strand == eNa_strand_minus) {
817  CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), 0, &scope);
818  if (new_loc) {
819  feat.SetLocation().Assign(*new_loc);
820  extend = true;
821  }
822  } else {
823  CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
824  CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
825  if (new_loc) {
826  feat.SetLocation().Assign(*new_loc);
827  extend = true;
828  }
829  }
830  }
831  return extend;
832 }
833 
834 
835 bool ApplyPolicyToFeature(const CLocationEditPolicy& policy, const CSeq_feat& orig_feat,
836  CScope& scope, bool adjust_gene, bool retranslate_cds)
837 {
838  CRef<CSeq_feat> new_feat(new CSeq_feat());
839  new_feat->Assign(orig_feat);
840 
841  bool any_change = policy.ApplyPolicyToFeature(*new_feat, scope);
842  if (any_change) {
843  CSeq_feat_Handle fh = scope.GetSeq_featHandle(orig_feat);
844  // This is necessary, to make sure that we are in "editing mode"
845  const CSeq_annot_Handle& annot_handle = fh.GetAnnot();
846  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
847  CSeq_feat_EditHandle feh(fh);
848 
849  // adjust gene feature
850  if (adjust_gene) {
851  CConstRef<CSeq_feat> old_gene = sequence::GetOverlappingGene(orig_feat.GetLocation(), scope);
852  if (old_gene) {
853  TSeqPos feat_start = new_feat->GetLocation().GetStart(eExtreme_Biological);
854  TSeqPos feat_stop = new_feat->GetLocation().GetStop(eExtreme_Biological);
855  CRef<CSeq_feat> new_gene(new CSeq_feat());
856  new_gene->Assign(*old_gene);
857  bool gene_change = false;
858  // adjust ends of gene to match ends of feature
859  CRef<CSeq_loc> new_loc = SeqLocExtend5(new_gene->GetLocation(), feat_start, &scope);
860  if (new_loc) {
861  new_gene->SetLocation().Assign(*new_loc);
862  gene_change = true;
863  }
864  new_loc = SeqLocExtend3(new_gene->GetLocation(), feat_stop, &scope);
865  if (new_loc) {
866  new_gene->SetLocation().Assign(*new_loc);
867  gene_change = true;
868  }
869  if (gene_change) {
870  CSeq_feat_Handle gh = scope.GetSeq_featHandle(*old_gene);
871  // This is necessary, to make sure that we are in "editing mode"
872  const CSeq_annot_Handle& ah = gh.GetAnnot();
874  CSeq_feat_EditHandle geh(gh);
875  geh.Replace(*new_gene);
876  }
877  }
878  }
879  feh.Replace(*new_feat);
880 
881  // retranslate or resynch if coding region
882  if (new_feat->IsSetProduct() && new_feat->GetData().IsCdregion()) {
883  if (!retranslate_cds || !feature::RetranslateCDS(*new_feat, scope)) {
884  CSeq_loc_CI l(new_feat->GetLocation());
885  feature::AdjustForCDSPartials(*new_feat, scope);
886  }
887  }
888  }
889  return any_change;
890 }
891 
892 
894 {
895  // flip strand
896  interval.FlipStrand();
897  if (interval.IsSetId()) {
898  CBioseq_Handle bsh = scope.GetBioseqHandle(interval.GetId());
899  if (bsh) {
900  if (interval.IsSetFrom()) {
901  interval.SetFrom(bsh.GetInst_Length() - interval.GetFrom() - 1);
902  }
903  if (interval.IsSetTo()) {
904  interval.SetTo(bsh.GetInst_Length() - interval.GetTo() - 1);
905  }
906 
907  // reverse from and to
908  if (interval.IsSetFrom()) {
909  TSeqPos from = interval.GetFrom();
910  if (interval.IsSetTo()) {
911  interval.SetFrom(interval.GetTo());
912  } else {
913  interval.ResetFrom();
914  }
915  interval.SetTo(from);
916  } else if (interval.IsSetTo()) {
917  interval.SetFrom(interval.GetTo());
918  interval.ResetTo();
919  }
920 
921  if (interval.IsSetFuzz_from()) {
922  interval.SetFuzz_from().Negate(bsh.GetInst_Length());
923  }
924  if (interval.IsSetFuzz_to()) {
925  interval.SetFuzz_to().Negate(bsh.GetInst_Length());
926  }
927 
928  // swap fuzz
929  if (interval.IsSetFuzz_from()) {
931  swap->Assign(interval.GetFuzz_from());
932  if (interval.IsSetFuzz_to()) {
933  interval.SetFuzz_from().Assign(interval.GetFuzz_to());
934  } else {
935  interval.ResetFuzz_from();
936  }
937  interval.SetFuzz_to(*swap);
938  } else if (interval.IsSetFuzz_to()) {
939  interval.SetFuzz_from().Assign(interval.GetFuzz_to());
940  interval.ResetFuzz_to();
941  }
942  }
943  }
944 }
945 
946 
948 {
949  // flip strand
950  pnt.FlipStrand();
951  if (pnt.IsSetId()) {
952  CBioseq_Handle bsh = scope.GetBioseqHandle(pnt.GetId());
953  if (bsh) {
954  if (pnt.IsSetPoint()) {
955  pnt.SetPoint(bsh.GetInst_Length() - pnt.GetPoint() - 1);
956  }
957  if (pnt.IsSetFuzz()) {
958  pnt.SetFuzz().Negate(bsh.GetInst_Length());
959  }
960  }
961  }
962 }
963 
964 
966 {
967  // flip strand
968  ppnt.FlipStrand();
969  CBioseq_Handle bsh = scope.GetBioseqHandle(ppnt.GetId());
970  if (bsh) {
971  // flip fuzz
972  if (ppnt.IsSetFuzz()) {
973  ppnt.SetFuzz().Negate(bsh.GetInst_Length());
974  }
975  //complement points
976  if (ppnt.IsSetPoints()) {
977  vector<int> new_pnts;
979  new_pnts.push_back(bsh.GetInst_Length() - *it - 1);
980  }
981  ppnt.ResetPoints();
982  ITERATE(vector<int>, it, new_pnts) {
983  ppnt.SetPoints().push_back(*it);
984  }
985  }
986  }
987 
988 }
989 
990 
992 {
993  switch (loc.Which()) {
994  case CSeq_loc::e_Empty:
995  case CSeq_loc::e_Whole:
996  case CSeq_loc::e_Feat:
997  case CSeq_loc::e_Null:
998  case CSeq_loc::e_not_set:
999  // do nothing
1000  break;
1001  case CSeq_loc::e_Int:
1002  ReverseComplementLocation(loc.SetInt(), scope);
1003  loc.InvalidateCache();
1004  break;
1005  case CSeq_loc::e_Pnt:
1006  ReverseComplementLocation(loc.SetPnt(), scope);
1007  loc.InvalidateCache();
1008  break;
1009  case CSeq_loc::e_Bond:
1010  if (loc.GetBond().IsSetA()) {
1011  ReverseComplementLocation(loc.SetBond().SetA(), scope);
1012  }
1013  if (loc.GetBond().IsSetB()) {
1014  ReverseComplementLocation(loc.SetBond().SetB(), scope);
1015  }
1016  loc.InvalidateCache();
1017  break;
1018  case CSeq_loc::e_Mix:
1019  // revcomp individual elements
1020  NON_CONST_ITERATE(CSeq_loc_mix::Tdata, it, loc.SetMix().Set()) {
1021  ReverseComplementLocation(**it, scope);
1022  }
1023  loc.InvalidateCache();
1024  break;
1025  case CSeq_loc::e_Equiv:
1026  // revcomp individual elements
1028  ReverseComplementLocation(**it, scope);
1029  }
1030  loc.InvalidateCache();
1031  break;
1033  // revcomp individual elements
1035  ReverseComplementLocation(**it, scope);
1036  }
1037  loc.InvalidateCache();
1038  break;
1041  loc.InvalidateCache();
1042  break;
1043 
1044  }
1045 }
1046 
1047 
1049 {
1050  if (cdr.IsSetCode_break()) {
1052  if ((*it)->IsSetLoc()) {
1053  ReverseComplementLocation((*it)->SetLoc(), scope);
1054  }
1055  }
1056  }
1057 }
1058 
1059 
1061 {
1062  if (trna.IsSetAnticodon()) {
1063  ReverseComplementLocation(trna.SetAnticodon(), scope);
1064  }
1065 }
1066 
1067 
1069 {
1070  if (feat.IsSetLocation()) {
1071  ReverseComplementLocation(feat.SetLocation(), scope);
1072  }
1073  if (feat.IsSetData()) {
1074  switch (feat.GetData().GetSubtype()) {
1076  ReverseComplementCDRegion(feat.SetData().SetCdregion(), scope);
1077  break;
1079  ReverseComplementTrna(feat.SetData().SetRna().SetExt().SetTRNA(), scope);
1080  break;
1081  default:
1082  break;
1083  }
1084  }
1085 }
1086 
1087 
1088 bool OkToAdjustLoc(const CSeq_interval& interval, const CSeq_id* seqid)
1089 {
1090  bool rval = true;
1091  if (seqid) {
1092  if (!interval.IsSetId() || interval.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1093  rval = false;
1094  }
1095  }
1096  return rval;
1097 }
1098 
1099 
1100 bool OkToAdjustLoc(const CSeq_point& pnt, const CSeq_id* seqid)
1101 {
1102  bool rval = true;
1103  if (seqid) {
1104  if (!pnt.IsSetId() || pnt.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1105  rval = false;
1106  }
1107  }
1108  return rval;
1109 }
1110 
1111 
1112 bool OkToAdjustLoc(const CPacked_seqpnt& pack, const CSeq_id* seqid)
1113 {
1114  bool rval = true;
1115  if (seqid) {
1116  if (!pack.IsSetId() || pack.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1117  rval = false;
1118  }
1119  }
1120  return rval;
1121 }
1122 
1123 
1125 {
1126  switch (loc.Which()) {
1127  case CSeq_loc::e_Equiv:
1128  {{
1129  CSeq_loc::TEquiv::Tdata::iterator it = loc.SetEquiv().Set().begin();
1130  while (it != loc.SetEquiv().Set().end()) {
1131  NormalizeLoc(**it);
1132  if (loc.Which() == CSeq_loc::e_not_set) {
1133  it = loc.SetEquiv().Set().erase(it);
1134  } else {
1135  ++it;
1136  }
1137  }
1138 
1139  // if only one, make regular loc
1140  if (loc.GetEquiv().Get().size() == 1) {
1141  CRef<CSeq_loc> sub(new CSeq_loc());
1142  sub->Assign(*(loc.GetEquiv().Get().front()));
1143  loc.Assign(*sub);
1144  } else if (loc.GetEquiv().Get().size() == 0) {
1145  // no sub intervals, reset
1146  loc.Reset();
1147  }
1148  }}
1149  break;
1150  case CSeq_loc::e_Mix:
1151  {{
1152  CSeq_loc::TMix::Tdata::iterator it = loc.SetMix().Set().begin();
1153  while (it != loc.SetMix().Set().end()) {
1154  NormalizeLoc(**it);
1155  if (loc.Which() == CSeq_loc::e_not_set) {
1156  it = loc.SetMix().Set().erase(it);
1157  } else {
1158  ++it;
1159  }
1160  }
1161 
1162  // if only one, make regular loc
1163  if (loc.GetMix().Get().size() == 1) {
1164  CRef<CSeq_loc> sub(new CSeq_loc());
1165  sub->Assign(*(loc.GetMix().Get().front()));
1166  loc.Assign(*sub);
1167  } else if (loc.GetMix().Get().size() == 0) {
1168  // no sub intervals, reset
1169  loc.Reset();
1170  }
1171  }}
1172  break;
1174  if (loc.GetPacked_int().Get().size() == 0) {
1175  loc.Reset();
1176  } else if (loc.GetPacked_int().Get().size() == 1) {
1178  sub->Assign(*(loc.GetPacked_int().Get().front()));
1179  loc.SetInt().Assign(*sub);
1180  }
1181  break;
1183  if (loc.GetPacked_pnt().GetPoints().size() == 0) {
1184  loc.Reset();
1185  } else if (loc.GetPacked_pnt().GetPoints().size() == 1) {
1186  CRef<CSeq_point> sub(new CSeq_point());
1187  if (loc.GetPacked_pnt().IsSetStrand()) {
1188  sub->SetStrand(loc.GetPacked_pnt().GetStrand());
1189  }
1190  if (loc.GetPacked_pnt().IsSetId()) {
1191  sub->SetId().Assign(loc.GetPacked_pnt().GetId());
1192  }
1193  if (loc.GetPacked_pnt().IsSetFuzz()) {
1194  sub->SetFuzz().Assign(loc.GetPacked_pnt().GetFuzz());
1195  }
1196  sub->SetPoint(loc.GetPacked_pnt().GetPoints()[0]);
1197  loc.SetPnt().Assign(*sub);
1198  }
1199  break;
1200  default:
1201  // do nothing
1202  break;
1203  }
1204 }
1205 
1206 
1208  TSeqPos cut_from, TSeqPos cut_to,
1209  const CSeq_id* seqid,
1210  bool& bCompleteCut,
1211  TSeqPos& trim5,
1212  bool& bAdjusted)
1213 {
1214  if (!OkToAdjustLoc(interval, seqid)) {
1215  return;
1216  }
1217 
1218  // These are required fields
1219  if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1220  {
1221  return;
1222  }
1223 
1224  // Feature location
1225  TSeqPos feat_from = interval.GetFrom();
1226  TSeqPos feat_to = interval.GetTo();
1227 
1228  // Size of the cut
1229  TSeqPos cut_size = cut_to - cut_from + 1;
1230 
1231  // Case 1: feature is located completely before the cut
1232  if (feat_to < cut_from)
1233  {
1234  // Nothing needs to be done - cut does not affect feature
1235  return;
1236  }
1237 
1238  // Case 2: feature is completely within the cut
1239  if (feat_from >= cut_from && feat_to <= cut_to)
1240  {
1241  // Feature should be deleted
1242  bCompleteCut = true;
1243  trim5 += feat_from - feat_to + 1;
1244  return;
1245  }
1246 
1247  // Case 3: feature is completely past the cut
1248  if (feat_from > cut_to)
1249  {
1250  // Shift the feature by the cut_size
1251  feat_from -= cut_size;
1252  feat_to -= cut_size;
1253  interval.SetFrom(feat_from);
1254  interval.SetTo(feat_to);
1255  bAdjusted = true;
1256  return;
1257  }
1258 
1259  /***************************************************************************
1260  * Cases below are partial overlapping cases
1261  ***************************************************************************/
1262  // Case 4: Cut is completely inside the feature
1263  // OR
1264  // Cut is to the "left" side of the feature (i.e., feat_from is
1265  // inside the cut)
1266  // OR
1267  // Cut is to the "right" side of the feature (i.e., feat_to is
1268  // inside the cut)
1269  if (feat_to > cut_to) {
1270  // Left side cut or cut is completely inside feature
1271  feat_to -= cut_size;
1272  }
1273  else {
1274  // Right side cut
1275  if (interval.IsSetStrand() && interval.GetStrand() == eNa_strand_minus) {
1276  TSeqPos diff = cut_from - 1 - feat_to;
1277  trim5 += diff;
1278  }
1279  feat_to = cut_from - 1;
1280  }
1281 
1282  // Take care of the feat_from from the left side cut case
1283  if (feat_from >= cut_from) {
1284  if (!interval.IsSetStrand() || interval.GetStrand() != eNa_strand_minus) {
1285  TSeqPos diff = cut_to + 1 - feat_from;
1286  trim5 += diff;
1287  }
1288  feat_from = cut_to + 1;
1289  feat_from -= cut_size;
1290  }
1291 
1292  interval.SetFrom(feat_from);
1293  interval.SetTo(feat_to);
1294  bAdjusted = true;
1295 }
1296 
1297 
1299  TSeqPos from, TSeqPos to,
1300  const CSeq_id* seqid,
1301  bool& bCompleteCut,
1302  TSeqPos& trim5,
1303  bool& bAdjusted)
1304 {
1305  if (packint.IsSet()) {
1306  bool from5 = true;
1307  // Process each interval in the list
1308  CPacked_seqint::Tdata::iterator it;
1309  for (it = packint.Set().begin();
1310  it != packint.Set().end(); )
1311  {
1312  bool bDeleted = false;
1313  TSeqPos this_trim = 0;
1314  SeqLocAdjustForTrim(**it, from, to, seqid,
1315  bDeleted, this_trim, bAdjusted);
1316 
1317  if (from5) {
1318  trim5 += this_trim;
1319  }
1320  // Should interval be deleted from list?
1321  if (bDeleted) {
1322  it = packint.Set().erase(it);
1323  }
1324  else {
1325  from5 = false;
1326  ++it;
1327  }
1328  }
1329  if (packint.Get().empty()) {
1330  packint.Reset();
1331  }
1332  }
1333  if (!packint.IsSet()) {
1334  bCompleteCut = true;
1335  }
1336 }
1337 
1338 
1340  TSeqPos from, TSeqPos to,
1341  const CSeq_id* seqid,
1342  bool& bCompleteCut,
1343  TSeqPos& trim5,
1344  bool& bAdjusted)
1345 {
1346  if (mix.IsSet()) {
1347  bool from5 = true;
1348  // Process each seqloc in the list
1349  CSeq_loc_mix::Tdata::iterator it;
1350  for (it = mix.Set().begin();
1351  it != mix.Set().end(); )
1352  {
1353  bool bDeleted = false;
1354  TSeqPos this_trim = 0;
1355  SeqLocAdjustForTrim(**it, from, to, seqid, bDeleted, this_trim, bAdjusted);
1356 
1357  if (from5) {
1358  trim5 += this_trim;
1359  }
1360  // Should seqloc be deleted from list?
1361  if (bDeleted) {
1362  it = mix.Set().erase(it);
1363  }
1364  else {
1365  from5 = false;
1366  ++it;
1367  }
1368  }
1369  }
1370  if (!mix.IsSet() || mix.Set().empty()) {
1371  bCompleteCut = true;
1372  }
1373 }
1374 
1375 
1377  TSeqPos from, TSeqPos to,
1378  const CSeq_id* seqid,
1379  bool& bCompleteCut,
1380  TSeqPos& trim5,
1381  bool& bAdjusted)
1382 {
1383  if (!OkToAdjustLoc(pnt, seqid)) {
1384  return;
1385  }
1386 
1387  if (to < pnt.GetPoint()) {
1388  auto diff = to - from + 1;
1389  pnt.SetPoint(pnt.GetPoint() - diff);
1390  bAdjusted = true;
1391  } else if (from < pnt.GetPoint()) {
1392  bCompleteCut = true;
1393  trim5 += 1;
1394  }
1395 }
1396 
1397 
1399  TSeqPos from, TSeqPos to,
1400  const CSeq_id* seqid,
1401  bool& bCompleteCut, TSeqPos& trim5, bool& bAdjusted)
1402 {
1403  if (!OkToAdjustLoc(pack, seqid)) {
1404  return;
1405  }
1406 
1407  if (pack.IsSetPoints()) {
1408  bool from5 = true;
1409  auto it = pack.SetPoints().begin();
1410  while (it != pack.SetPoints().end()) {
1411  if (to < *it) {
1412  auto diff = to - from + 1;
1413  *it -= diff;
1414  it++;
1415  bAdjusted = true;
1416  from5 = false;
1417  } else if (from < *it) {
1418  it = pack.SetPoints().erase(it);
1419  bAdjusted = true;
1420  if (from5) {
1421  trim5 += 1;
1422  }
1423  } else {
1424  it++;
1425  from5 = false;
1426  }
1427  }
1428  }
1429  if (pack.SetPoints().empty()) {
1430  bCompleteCut = true;
1431  }
1432 }
1433 
1434 
1436  TSeqPos from, TSeqPos to,
1437  const CSeq_id* seqid,
1438  bool& bCompleteCut,
1439  TSeqPos& trim5,
1440  bool& bAdjusted)
1441 {
1442  bool cutA = false, cutB = false;
1443  if (bond.IsSetA()) {
1444  SeqLocAdjustForTrim(bond.SetA(), from, to, seqid, cutA, trim5, bAdjusted);
1445  } else {
1446  cutA = true;
1447  }
1448 
1449  if (bond.IsSetB()) {
1450  SeqLocAdjustForTrim(bond.SetB(), from, to, seqid, cutB, trim5, bAdjusted);
1451  } else {
1452  cutB = true;
1453  }
1454  if (cutA && cutB) {
1455  bCompleteCut = true;
1456  }
1457 }
1458 
1459 
1461  TSeqPos from, TSeqPos to,
1462  const CSeq_id* seqid,
1463  bool& bCompleteCut,
1464  TSeqPos& trim5,
1465  bool& bAdjusted)
1466 {
1467  TSeqPos max_trim5 = 0;
1468  CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1469  while (it != equiv.Set().end()) {
1470  bool cut = false;
1471  TSeqPos this_trim5 = 0;
1472  SeqLocAdjustForTrim(**it, from, to, seqid, cut, this_trim5, bAdjusted);
1473  if (this_trim5 > max_trim5) {
1474  max_trim5 = this_trim5;
1475  }
1476  if (cut) {
1477  it = equiv.Set().erase(it);
1478  } else {
1479  it++;
1480  }
1481  }
1482  if (equiv.Set().empty()) {
1483  bCompleteCut = true;
1484  }
1485  trim5 = max_trim5;
1486 }
1487 
1488 
1490  TSeqPos from, TSeqPos to,
1491  const CSeq_id* seqid,
1492  bool& bCompleteCut,
1493  TSeqPos& trim5, bool& bAdjusted)
1494 {
1495  // Given a seqloc and a range, cut the seqloc
1496 
1497  switch(loc.Which())
1498  {
1499  // Single interval
1500  case CSeq_loc::e_Int:
1501  SeqLocAdjustForTrim(loc.SetInt(), from, to, seqid,
1502  bCompleteCut, trim5, bAdjusted);
1503  break;
1504 
1505  // Multiple intervals
1507  SeqLocAdjustForTrim(loc.SetPacked_int(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1508  break;
1509 
1510  // Multiple seqlocs
1511  case CSeq_loc::e_Mix:
1512  SeqLocAdjustForTrim(loc.SetMix(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1513  break;
1514  case CSeq_loc::e_Pnt:
1515  SeqLocAdjustForTrim(loc.SetPnt(), from, to , seqid, bCompleteCut, trim5, bAdjusted);
1516  break;
1518  SeqLocAdjustForTrim(loc.SetPacked_pnt(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1519  break;
1520  case CSeq_loc::e_Bond:
1521  SeqLocAdjustForTrim(loc.SetBond(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1522  break;
1523  case CSeq_loc::e_Equiv:
1524  SeqLocAdjustForTrim(loc.SetEquiv(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1525  break;
1526  case CSeq_loc::e_Empty:
1527  case CSeq_loc::e_Null:
1528  case CSeq_loc::e_not_set:
1529  case CSeq_loc::e_Whole:
1530  case CSeq_loc::e_Feat:
1531  // no adjustment needeed
1532  break;
1533  }
1534  if (!bCompleteCut) {
1535  NormalizeLoc(loc);
1536  }
1537 }
1538 
1539 
1541  TSeqPos insert_from, TSeqPos insert_to,
1542  const CSeq_id* seqid)
1543 {
1544  if (!OkToAdjustLoc(interval, seqid)) {
1545  return;
1546  }
1547 
1548  // These are required fields
1549  if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1550  {
1551  return;
1552  }
1553 
1554  // Feature location
1555  TSeqPos feat_from = interval.GetFrom();
1556  TSeqPos feat_to = interval.GetTo();
1557 
1558  // Size of the insert
1559  TSeqPos insert_size = insert_to - insert_from + 1;
1560 
1561  // Case 1: feature is located before the insert
1562  if (feat_to < insert_from)
1563  {
1564  // Nothing needs to be done - cut does not affect feature
1565  return;
1566  }
1567 
1568  // Case 2: feature is located after the insert
1569  if (feat_from > insert_from) {
1570  feat_from += insert_size;
1571  feat_to += insert_size;
1572  interval.SetFrom(feat_from);
1573  interval.SetTo(feat_to);
1574  return;
1575  }
1576 
1577  // Case 3: insert occurs within interval
1578  if (feat_from <= insert_from && feat_to >= insert_from)
1579  {
1580  feat_to += insert_size;
1581  interval.SetTo(feat_to);
1582  return;
1583  }
1584 }
1585 
1586 
1588  TSeqPos insert_from, TSeqPos insert_to,
1589  const CSeq_id* seqid)
1590 {
1591  if (packint.IsSet()) {
1592  // Process each interval in the list
1593  CPacked_seqint::Tdata::iterator it;
1594  for (it = packint.Set().begin();
1595  it != packint.Set().end(); it++)
1596  {
1597  SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1598  }
1599  }
1600 }
1601 
1602 
1604  TSeqPos insert_from, TSeqPos insert_to,
1605  const CSeq_id* seqid)
1606 {
1607  if (mix.IsSet()) {
1608  // Process each seqloc in the list
1609  CSeq_loc_mix::Tdata::iterator it;
1610  for (it = mix.Set().begin();
1611  it != mix.Set().end(); it++)
1612  {
1613  SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1614  }
1615  }
1616 }
1617 
1618 
1620  TSeqPos insert_from, TSeqPos insert_to,
1621  const CSeq_id* seqid)
1622 {
1623  if (!OkToAdjustLoc(pnt, seqid)) {
1624  return;
1625  }
1626  if (!pnt.IsSetPoint()) {
1627  return;
1628  }
1629 
1630  if (insert_from < pnt.GetPoint()) {
1631  auto diff = insert_to - insert_from + 1;
1632  pnt.SetPoint(pnt.GetPoint() + diff);
1633  }
1634 }
1635 
1636 
1638  TSeqPos from, TSeqPos to,
1639  const CSeq_id* seqid)
1640 {
1641  if (!OkToAdjustLoc(packpnt, seqid)) {
1642  return;
1643  }
1644 
1645  auto it = packpnt.SetPoints().begin();
1646  while (it != packpnt.SetPoints().end()) {
1647  if (from < *it) {
1648  auto diff = to - from + 1;
1649  *it += diff;
1650  }
1651  it++;
1652  }
1653 }
1654 
1655 
1657  TSeqPos from, TSeqPos to,
1658  const CSeq_id* seqid)
1659 {
1660  if (bond.IsSetA()) {
1661  SeqLocAdjustForInsert(bond.SetA(), from, to, seqid);
1662  }
1663 
1664  if (bond.IsSetB()) {
1665  SeqLocAdjustForInsert(bond.SetB(), from, to, seqid);
1666  }
1667 }
1668 
1669 
1671  TSeqPos from, TSeqPos to,
1672  const CSeq_id* seqid)
1673 {
1674  CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1675  while (it != equiv.Set().end()) {
1676  SeqLocAdjustForInsert(**it, from, to, seqid);
1677  it++;
1678  }
1679 }
1680 
1681 
1683  TSeqPos from, TSeqPos to,
1684  const CSeq_id* seqid)
1685 {
1686  // Given a seqloc and a range, insert into the seqloc
1687 
1688  switch(loc.Which())
1689  {
1690  // Single interval
1691  case CSeq_loc::e_Int:
1692  SeqLocAdjustForInsert(loc.SetInt(), from, to, seqid);
1693  break;
1694 
1695  // Multiple intervals
1697  SeqLocAdjustForInsert(loc.SetPacked_int(), from, to, seqid);
1698  break;
1699 
1700  // Multiple seqlocs
1701  case CSeq_loc::e_Mix:
1702  SeqLocAdjustForInsert(loc.SetMix(), from, to, seqid);
1703  break;
1704  case CSeq_loc::e_Pnt:
1705  SeqLocAdjustForInsert(loc.SetPnt(), from, to, seqid);
1706  break;
1707 
1709  SeqLocAdjustForInsert(loc.SetPacked_pnt(), from, to, seqid);
1710  break;
1711  case CSeq_loc::e_Bond:
1712  SeqLocAdjustForInsert(loc.SetBond(), from, to, seqid);
1713  break;
1714  case CSeq_loc::e_Equiv:
1715  SeqLocAdjustForInsert(loc.SetEquiv(), from, to, seqid);
1716  break;
1717  case CSeq_loc::e_Empty:
1718  case CSeq_loc::e_Null:
1719  case CSeq_loc::e_not_set:
1720  case CSeq_loc::e_Whole:
1721  case CSeq_loc::e_Feat:
1722  // no adjustment needeed
1723  break;
1724  }
1725 }
1726 
1727 
1729  TSeqPos start, TSeqPos stop,
1730  const CSeq_id* seqid, bool& cut,
1731  unsigned int options)
1732 {
1733  cut = false;
1734  if (!OkToAdjustLoc(before, seqid)) {
1735  return CRef<CSeq_interval>();
1736  }
1737  // These are required fields
1738  if (!(before.CanGetFrom() && before.CanGetTo()))
1739  {
1740  return CRef<CSeq_interval>();
1741  }
1742 
1743  // Feature location
1744  TSeqPos feat_from = before.GetFrom();
1745  TSeqPos feat_to = before.GetTo();
1746  CRef<CSeq_interval> after;
1747  if (feat_to < start) {
1748  // gap completely after location
1749  return after;
1750  }
1751 
1752  if (feat_from > stop && !(options & eSplitLocOption_split_in_intron)) {
1753  // if gap completely before location, but not splitting in introns,
1754  // no change
1755  return after;
1756  }
1757 
1758  if (feat_from < start && feat_to > stop) {
1759  // gap entirely in inteval
1760  if (!(options & eSplitLocOption_split_in_exon)) {
1761  return after;
1762  }
1763  }
1764 
1765  if (feat_to > stop) {
1766  after.Reset(new CSeq_interval());
1767  after->Assign(before);
1768  if (stop + 1 > feat_from) {
1769  after->SetFrom(stop + 1);
1770  if (options & eSplitLocOption_make_partial) {
1771  after->SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
1772  }
1773  }
1774  }
1775  if (feat_from < start) {
1776  before.SetTo(start - 1);
1777  if (options & eSplitLocOption_make_partial) {
1778  before.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
1779  }
1780  } else {
1781  cut = true;
1782  }
1783  return after;
1784 }
1785 
1786 
1788  CSeq_loc::TPacked_int& after_intervals,
1789  TSeqPos start, TSeqPos stop,
1790  const CSeq_id* seqid, unsigned int options)
1791 {
1792  if (before_intervals.IsSet()) {
1793  if (before_intervals.IsReverseStrand()) {
1794  reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1795  }
1796 
1797  auto it = before_intervals.Set().begin();
1798  while (it != before_intervals.Set().end()) {
1799  CSeq_interval& sub_interval = **it;
1800 
1801  TSeqPos int_from = sub_interval.GetFrom();
1802  if (int_from > stop && after_intervals.IsSet() && !after_intervals.Get().empty()) {
1803  after_intervals.Set().push_back(Ref(&sub_interval));
1804  it = before_intervals.Set().erase(it);
1805  }
1806  else {
1807  bool cut = false;
1808  CRef<CSeq_interval> after = SplitLocationForGap(sub_interval, start, stop, seqid, cut, options);
1809 
1810  // Should interval be deleted from list?
1811  if (cut) {
1812  it = before_intervals.Set().erase(it);
1813  }
1814  else {
1815  ++it;
1816  }
1817  if (after) {
1818  after_intervals.Set().push_back(after);
1819  }
1820  }
1821  }
1822 
1823  if (after_intervals.IsReverseStrand()) {
1824  reverse(after_intervals.Set().begin(), after_intervals.Set().end());
1825  }
1826  if (before_intervals.IsReverseStrand()) {
1827  reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1828  }
1829  }
1830 }
1831 
1832 
1834  size_t start, size_t stop,
1835  const CSeq_id* seqid, unsigned int options)
1836 {
1837  // Given a seqloc and a range, place the portion of the location before the range
1838  // into loc1 and the remainder of the location into loc2
1839 
1840  switch(loc1.Which())
1841  {
1842  // Single interval
1843  case CSeq_loc::e_Int:
1844  {{
1845  bool cut = false;
1847  static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1848  seqid, cut, options);
1849  if (cut) {
1850  loc1.Reset();
1851  }
1852  if (after) {
1853  if (loc2.Which() == CSeq_loc::e_not_set) {
1854  loc2.SetInt(*after);
1855  } else {
1856  CRef<CSeq_loc> add(new CSeq_loc());
1857  add->SetInt(*after);
1858  loc2.Add(*add);
1859  }
1860  }
1861  }}
1862  break;
1863  // Single point
1864  case CSeq_loc::e_Pnt:
1865  if (OkToAdjustLoc(loc1.GetPnt(), seqid)) {
1866  if (stop < loc1.GetPnt().GetPoint()) {
1867  if (loc2.Which() == CSeq_loc::e_not_set) {
1868  loc2.SetPnt().Assign(loc1.GetPnt());
1869  } else {
1870  loc2.Add(loc1);
1871  }
1872  loc1.Reset();
1873  }
1874  }
1875  break;
1876 
1877  // Multiple intervals
1879  {{
1880  CSeq_loc::TPacked_int& before_intervals = loc1.SetPacked_int();
1882  SplitLocationForGap(before_intervals, *after_intervals,
1883  static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1884  seqid, options);
1885 
1886  if (before_intervals.Set().empty()) {
1887  loc1.Reset();
1888  }
1889  if (!after_intervals->Set().empty()) {
1890  if (loc2.Which() == CSeq_loc::e_not_set) {
1891  loc2.SetPacked_int().Assign(*after_intervals);
1892  } else {
1893  CRef<CSeq_loc> add(new CSeq_loc());
1894  add->SetPacked_int().Assign(*after_intervals);
1895  loc2.Add(*add);
1896  }
1897  }
1898  }}
1899  break;
1900 
1901  // Multiple seqlocs
1902  case CSeq_loc::e_Mix:
1903  {{
1904  CSeq_loc_mix& before_mix = loc1.SetMix();
1905  CRef<CSeq_loc_mix> after_mix(new CSeq_loc_mix);
1906  if (before_mix.IsSet()) {
1907  if (before_mix.IsReverseStrand()) {
1908  reverse(before_mix.Set().begin(), before_mix.Set().end());
1909  }
1910  auto it = before_mix.Set().begin();
1911  while (it != before_mix.Set().end()) {
1912  CSeq_loc& sub_loc = **it;
1913 
1914  TSeqPos from = sub_loc.GetStart(eExtreme_Positional);
1915  if (from > stop && after_mix->IsSet() && !after_mix->Get().empty()) {
1916  after_mix->Set().push_back(Ref(&sub_loc));
1917  it = before_mix.Set().erase(it);
1918  }
1919  else {
1920  CRef<CSeq_loc> after(new CSeq_loc());
1921  SplitLocationForGap(**it, *after, start, stop, seqid, options);
1922  // Should seqloc be deleted from list?
1923  if ((*it)->Which() == CSeq_loc::e_not_set) {
1924  it = before_mix.Set().erase(it);
1925  }
1926  else {
1927  ++it;
1928  }
1929  if (after->Which() != CSeq_loc::e_not_set) {
1930  after_mix->Set().push_back(after);
1931  }
1932  }
1933  }
1934 
1935  if (after_mix->IsReverseStrand()) {
1936  reverse(after_mix->Set().begin(), after_mix->Set().end());
1937  }
1938  if (before_mix.IsReverseStrand()) {
1939  reverse(before_mix.Set().begin(), before_mix.Set().end());
1940  }
1941 
1942  // Update the original list
1943  if (before_mix.Set().empty()) {
1944  loc1.Reset();
1945  }
1946  if (!after_mix->Set().empty()) {
1947  if (loc2.Which() == CSeq_loc::e_not_set) {
1948  loc2.SetMix().Assign(*after_mix);
1949  }
1950  else {
1951  CRef<CSeq_loc> add(new CSeq_loc());
1952  add->SetMix().Assign(*after_mix);
1953  loc2.Add(*add);
1954  }
1955  }
1956  }
1957  }}
1958  break;
1959  case CSeq_loc::e_Equiv:
1960  {{
1961  CSeq_loc_equiv& before_equiv = loc1.SetEquiv();
1962  CRef<CSeq_loc_equiv> after_equiv(new CSeq_loc_equiv);
1963  if (before_equiv.IsSet()) {
1964  // Process each seqloc in the list
1965  CSeq_loc_equiv::Tdata::iterator it;
1966  for (it = before_equiv.Set().begin();
1967  it != before_equiv.Set().end(); )
1968  {
1969  CRef<CSeq_loc> after(new CSeq_loc());
1970  SplitLocationForGap(**it, *after, start, stop, seqid, options);
1971  // Should seqloc be deleted from list?
1972  if ((*it)->Which() == CSeq_loc::e_not_set) {
1973  it = before_equiv.Set().erase(it);
1974  }
1975  else {
1976  ++it;
1977  }
1978  if (after->Which() != CSeq_loc::e_not_set) {
1979  after_equiv->Set().push_back(after);
1980  }
1981  }
1982 
1983  // Update the original list
1984  if (before_equiv.Set().empty()) {
1985  loc1.Reset();
1986  }
1987  if (!after_equiv->Set().empty()) {
1988  if (loc2.Which() == CSeq_loc::e_not_set) {
1989  loc2.SetMix().Assign(*after_equiv);
1990  } else {
1991  CRef<CSeq_loc> add(new CSeq_loc());
1992  add->SetMix().Assign(*after_equiv);
1993  loc2.Add(*add);
1994  }
1995  }
1996  }
1997  }}
1998  break;
2000  if (OkToAdjustLoc(loc1.GetPacked_pnt(), seqid)) {
2001  CPacked_seqpnt::TPoints& before_points = loc1.SetPacked_pnt().SetPoints();
2002  CPacked_seqpnt::TPoints after_points;
2003  CPacked_seqpnt::TPoints::iterator it = loc1.SetPacked_pnt().SetPoints().begin();
2004  while (it != loc1.SetPacked_pnt().SetPoints().end()) {
2005  if (stop < *it) {
2006  after_points.push_back(*it);
2007  }
2008  if (start >= *it) {
2009  it = before_points.erase(it);
2010  } else {
2011  it++;
2012  }
2013  }
2014  if (!after_points.empty()) {
2015  CRef<CPacked_seqpnt> after(new CPacked_seqpnt());
2016  after->Assign(loc1.GetPacked_pnt());
2017  after->SetPoints().assign(after_points.begin(), after_points.end());
2018  if (loc2.Which() == CSeq_loc::e_not_set) {
2019  loc2.SetPacked_pnt().Assign(*after);
2020  } else {
2021  CRef<CSeq_loc> add(new CSeq_loc());
2022  add->SetPacked_pnt().Assign(*after);
2023  loc2.Add(*add);
2024  }
2025  }
2026  if (before_points.empty()) {
2027  loc1.Reset();
2028  }
2029  }
2030  break;
2031  case CSeq_loc::e_Empty:
2032  case CSeq_loc::e_Null:
2033  case CSeq_loc::e_not_set:
2034  case CSeq_loc::e_Whole:
2035  case CSeq_loc::e_Feat:
2036  case CSeq_loc::e_Bond:
2037  // no adjustment needeed
2038  break;
2039  }
2040  NormalizeLoc(loc1);
2041  NormalizeLoc(loc2);
2042 }
2043 
2044 
2046  TSeqPos from, TSeqPos to,
2047  const CSeq_id* seqid)
2048 {
2049  CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2050  while (it != cdr.SetCode_break().end()) {
2051  if ((*it)->IsSetLoc()) {
2052  bool cut = false;
2053  bool adjusted = false;
2054  TSeqPos trim5 = 0;
2055  SeqLocAdjustForTrim((*it)->SetLoc(), from, to, seqid, cut, trim5, adjusted);
2056  if (cut) {
2057  it = cdr.SetCode_break().erase(it);
2058  } else {
2059  it++;
2060  }
2061  } else {
2062  it++;
2063  }
2064  }
2065  if (cdr.GetCode_break().empty()) {
2066  cdr.ResetCode_break();
2067  }
2068 }
2069 
2070 
2072  TSeqPos from, TSeqPos to,
2073  const CSeq_id* seqid)
2074 {
2075  if (trna.IsSetAnticodon()) {
2076  bool cut = false;
2077  bool trimmed = false;
2078  TSeqPos trim5 = 0;
2079  SeqLocAdjustForTrim(trna.SetAnticodon(), from, to, seqid, cut, trim5, trimmed);
2080  if (cut) {
2081  trna.ResetAnticodon();
2082  }
2083  }
2084 }
2085 
2086 
2088  TSeqPos from, TSeqPos to,
2089  const CSeq_id* seqid,
2090  bool& bCompleteCut,
2091  bool& bTrimmed)
2092 {
2093  TSeqPos trim5 = 0;
2094  SeqLocAdjustForTrim (feat.SetLocation(), from, to, seqid, bCompleteCut, trim5, bTrimmed);
2095  if (bCompleteCut) {
2096  return;
2097  }
2098 
2099  if (feat.IsSetData()) {
2100  switch (feat.GetData().GetSubtype()) {
2102  CdregionAdjustForTrim(feat.SetData().SetCdregion(), from, to, seqid);
2103  break;
2105  TrnaAdjustForTrim(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2106  break;
2107  default:
2108  break;
2109  }
2110  }
2111 }
2112 
2113 
2115  TSeqPos from, TSeqPos to,
2116  const CSeq_id* seqid)
2117 {
2118  CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2119  while (it != cdr.SetCode_break().end()) {
2120  if ((*it)->IsSetLoc()) {
2121  SeqLocAdjustForInsert((*it)->SetLoc(), from, to, seqid);
2122  }
2123  it++;
2124  }
2125  if (cdr.GetCode_break().empty()) {
2126  cdr.ResetCode_break();
2127  }
2128 }
2129 
2130 
2132  TSeqPos from, TSeqPos to,
2133  const CSeq_id* seqid)
2134 {
2135  if (trna.IsSetAnticodon()) {
2136  SeqLocAdjustForInsert(trna.SetAnticodon(), from, to, seqid);
2137  }
2138 }
2139 
2140 
2142  TSeqPos from, TSeqPos to,
2143  const CSeq_id* seqid)
2144 {
2145  SeqLocAdjustForInsert (feat.SetLocation(), from, to, seqid);
2146 
2147  if (feat.IsSetData()) {
2148  switch (feat.GetData().GetSubtype()) {
2150  CdregionAdjustForInsert(feat.SetData().SetCdregion(), from, to, seqid);
2151  break;
2153  TrnaAdjustForInsert(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2154  break;
2155  default:
2156  break;
2157  }
2158  }
2159 }
2160 
2161 
2162 bool s_PPntComparePlus(const TSeqPos& p1, const TSeqPos& p2)
2163 {
2164  return (p1 < p2);
2165 }
2166 
2167 
2168 bool s_PPntCompareMinus(const TSeqPos& p1, const TSeqPos& p2)
2169 {
2170  return (p1 > p2);
2171 }
2172 
2173 
2175 {
2176  bool rval = false;
2177  if (!ppnt.IsSetPoints()) {
2178  // nothing to do
2179  } else if (!ppnt.IsSetStrand() || ppnt.GetStrand() == eNa_strand_plus || ppnt.GetStrand() == eNa_strand_unknown) {
2180  if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntComparePlus)) {
2181  stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntComparePlus);
2182  rval = true;
2183  }
2184  } else if (ppnt.IsSetStrand() && ppnt.GetStrand() == eNa_strand_minus) {
2185  if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntCompareMinus)) {
2186  stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntCompareMinus);
2187  rval = true;
2188  }
2189  }
2190  return rval;
2191 }
2192 
2193 
2195 {
2196  if (a.IsSetStrand() && a.GetStrand() == eNa_strand_minus) {
2197  if (!b.IsSetStrand() || b.GetStrand() != eNa_strand_minus) {
2198  return false;
2199  } else {
2200  return true;
2201  }
2202  } else if (b.IsSetStrand() && b.GetStrand() == eNa_strand_minus) {
2203  return false;
2204  } else {
2205  return true;
2206  }
2207 }
2208 
2209 
2211 {
2212  if (pint.Get().size() < 2) {
2213  return false;
2214  }
2215  bool any_change = false;
2216 
2217  bool this_change = true;
2218  while (this_change) {
2219  this_change = false;
2220 
2221  // can only swap elements if they have the same strand and Seq-id
2222  CPacked_seqint::Tdata::iterator a = pint.Set().begin();
2223  CPacked_seqint::Tdata::iterator b = a;
2224  b++;
2225  while (b != pint.Set().end()) {
2226  if ((*a)->IsSetId() && (*b)->IsSetId() &&
2227  (*a)->GetId().Equals((*b)->GetId()) &&
2228  (*a)->IsSetFrom() && (*a)->IsSetTo() && (*a)->GetFrom() < (*a)->GetTo() &&
2229  (*b)->IsSetFrom() && (*b)->IsSetTo() && (*b)->GetFrom() < (*b)->GetTo() &&
2230  s_StrandsConsistent(**a, **b)) {
2231  if ((*a)->IsSetStrand() && (*a)->GetStrand() == eNa_strand_minus) {
2232  if ((*b)->GetTo() > (*a)->GetFrom()) {
2233  CRef<CSeq_interval> swp(a->GetPointer());
2234  a->Reset(b->GetPointer());
2235  b->Reset(swp.GetPointer());
2236  this_change = true;
2237  any_change = true;
2238  }
2239  } else {
2240  if ((*b)->GetTo() < (*a)->GetFrom()) {
2241  CRef<CSeq_interval> swp(a->GetPointer());
2242  a->Reset(b->GetPointer());
2243  b->Reset(swp.GetPointer());
2244  this_change = true;
2245  any_change = true;
2246  }
2247  }
2248  }
2249  ++a;
2250  ++b;
2251  }
2252  }
2253  return any_change;
2254 }
2255 
2256 
2257 bool OneIdOneStrand(const CSeq_loc& loc, const CSeq_id** id, ENa_strand& strand)
2258 {
2259  try {
2260  CSeq_loc_CI li(loc);
2261  *id = &(li.GetSeq_id());
2262  strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2263  if (strand == eNa_strand_unknown) {
2264  strand = eNa_strand_plus;
2265  }
2266  if (strand != eNa_strand_plus && strand != eNa_strand_minus) {
2267  return false;
2268  }
2269  ++li;
2270  while (li) {
2271  if (!li.GetSeq_id().Equals(**id)) {
2272  return false;
2273  }
2274  ENa_strand this_strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2275  if (this_strand == eNa_strand_unknown) {
2276  this_strand = eNa_strand_plus;
2277  }
2278  if (this_strand != strand) {
2279  return false;
2280  }
2281  ++li;
2282  }
2283  return true;
2284  } catch (CException&) {
2285  return false;
2286  }
2287 }
2288 
2289 
2291 {
2292  bool any_change = false;
2294  any_change |= CorrectIntervalOrder(**it);
2295  }
2296  if (mix.size() < 2) {
2297  return any_change;
2298  }
2299  bool this_change = true;
2300  while (this_change) {
2301  this_change = false;
2302 
2303  // can only swap elements if they have the same strand and Seq-id
2304  CSeq_loc::TMix::Tdata::iterator a = mix.begin();
2305  CSeq_loc::TMix::Tdata::iterator b = a;
2306  b++;
2307  while (b != mix.end()) {
2308  try {
2309  const CSeq_id* a_id;
2310  const CSeq_id* b_id;
2311  ENa_strand a_strand;
2312  ENa_strand b_strand;
2313  if (OneIdOneStrand(**a, &a_id, a_strand) &&
2314  OneIdOneStrand(**b, &b_id, b_strand) &&
2315  a_id->Equals(*b_id) &&
2316  a_strand == b_strand) {
2317  if (a_strand == eNa_strand_plus) {
2318  if ((*a)->GetStart(eExtreme_Biological) > (*b)->GetStop(eExtreme_Biological)) {
2319  CRef<CSeq_loc> swp(a->GetPointer());
2320  a->Reset(b->GetPointer());
2321  b->Reset(swp.GetPointer());
2322  this_change = true;
2323  any_change = true;
2324  }
2325  } else if (a_strand == eNa_strand_minus) {
2326  if ((*a)->GetStart(eExtreme_Biological) < (*b)->GetStop(eExtreme_Biological)) {
2327  CRef<CSeq_loc> swp(a->GetPointer());
2328  a->Reset(b->GetPointer());
2329  b->Reset(swp.GetPointer());
2330  this_change = true;
2331  any_change = true;
2332  }
2333  }
2334  }
2335  } catch (CException&) {
2336  // not just one id
2337  }
2338  ++a;
2339  ++b;
2340  }
2341  }
2342  return any_change;
2343 }
2344 
2345 
2347 {
2348  bool any_change = false;
2349  switch (loc.Which()) {
2350  case CSeq_loc::e_Bond:
2351  case CSeq_loc::e_Empty:
2352  case CSeq_loc::e_Equiv:
2353  case CSeq_loc::e_Feat:
2354  case CSeq_loc::e_Int:
2355  case CSeq_loc::e_not_set:
2356  case CSeq_loc::e_Null:
2357  case CSeq_loc::e_Pnt:
2358  case CSeq_loc::e_Whole:
2359  // nothing to do
2360  break;
2361  case CSeq_loc::e_Mix:
2362  any_change = CorrectIntervalOrder(loc.SetMix().Set());
2363  break;
2365  any_change = CorrectIntervalOrder(loc.SetPacked_int());
2366  break;
2368  any_change = CorrectIntervalOrder(loc.SetPacked_pnt());
2369  break;
2370  }
2371  return any_change;
2372 }
2373 
2374 
2375 bool IsExtendableLeft(TSeqPos left, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2376 {
2377  bool rval = false;
2378  if (left < 3) {
2379  rval = true;
2380  extend_len = left;
2381  } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2382  seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2383  seq.GetInst().IsSetExt() &&
2384  seq.GetInst().GetExt().IsDelta()) {
2385  TSeqPos offset = 0;
2386  TSeqPos last_gap_stop = 0;
2387  ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2388  if ((*it)->IsLiteral()) {
2389  offset += (*it)->GetLiteral().GetLength();
2390  if (!(*it)->GetLiteral().IsSetSeq_data()) {
2391  last_gap_stop = offset;
2392  } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2393  last_gap_stop = offset;
2394  }
2395  } else if ((*it)->IsLoc()) {
2396  offset += sequence::GetLength((*it)->GetLoc(), scope);
2397  }
2398  if (offset > left) {
2399  break;
2400  }
2401  }
2402  if (left >= last_gap_stop && left - last_gap_stop <= 3) {
2403  rval = true;
2404  extend_len = left - last_gap_stop;
2405  }
2406  }
2407  return rval;
2408 }
2409 
2410 
2411 bool IsExtendableRight(TSeqPos right, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2412 {
2413  bool rval = false;
2414  if (right > seq.GetLength() - 5) {
2415  rval = true;
2416  extend_len = seq.GetLength() - right - 1;
2417  } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2418  seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2419  seq.GetInst().IsSetExt() &&
2420  seq.GetInst().GetExt().IsDelta()) {
2421  TSeqPos offset = 0;
2422  TSeqPos next_gap_start = 0;
2423  ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2424  if ((*it)->IsLiteral()) {
2425  if (!(*it)->GetLiteral().IsSetSeq_data()) {
2426  next_gap_start = offset;
2427  } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2428  next_gap_start = offset;
2429  }
2430  offset += (*it)->GetLiteral().GetLength();
2431  } else if ((*it)->IsLoc()) {
2432  offset += sequence::GetLength((*it)->GetLoc(), scope);
2433  }
2434  if (offset > right + 4) {
2435  break;
2436  }
2437  }
2438  if (next_gap_start > right && next_gap_start - right - 1 <= 3) {
2439  rval = true;
2440  extend_len = next_gap_start - right - 1;
2441  }
2442  }
2443  return rval;
2444 }
2445 
2446 
2447 bool AdjustFeatureEnd5(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2448 {
2450  return false;
2451  }
2452 
2453  bool rval = false;
2454 
2455  CSeq_loc_CI first_l(cds.GetLocation());
2456  CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2458 
2460  TSeqPos new_start = start;
2461  TSeqPos extend_len = 0;
2462  bool extendable = false;
2463 
2464  if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
2465  // positive strand
2466  if (start > 0) {
2467  extendable = IsExtendableLeft(start, *seq, &scope, extend_len);
2468  new_start = start - extend_len;
2469  }
2470  } else {
2471  if (start < seq->GetInst().GetLength() - 1) {
2472  extendable = IsExtendableRight(start, *seq, &scope, extend_len);
2473  new_start = start + extend_len;
2474  }
2475  }
2476  if (extendable) {
2477  CRef<CSeq_loc> cds_loc = SeqLocExtend5(cds.GetLocation(), new_start, &scope);
2478  if (cds_loc) {
2479  for (auto it : related_features) {
2480  if (it->GetLocation().GetStart(eExtreme_Biological) == start) {
2481  CRef<CSeq_loc> related_loc = SeqLocExtend5(it->GetLocation(), new_start, &scope);
2482  if (related_loc) {
2483  it->SetLocation().Assign(*related_loc);
2484  if (it->IsSetData() && it->GetData().IsCdregion()) {
2485  AdjustFrameFor5Extension(cds, extend_len);
2486  }
2487  }
2488  }
2489  }
2490  cds.SetLocation().Assign(*cds_loc);
2491  if (cds.IsSetData() && cds.GetData().IsCdregion()) {
2492  AdjustFrameFor5Extension(cds, extend_len);
2493  }
2494 
2495  rval = true;
2496  }
2497  }
2498 
2499  return rval;
2500 }
2501 
2502 
2503 bool AdjustFeatureEnd3(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2504 {
2506  return false;
2507  }
2508 
2509  bool rval = false;
2510 
2511  CSeq_loc_CI last_l(cds.GetLocation());
2512  size_t num_intervals = last_l.GetSize();
2513  last_l.SetPos(num_intervals - 1);
2514 
2515  CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2517 
2519  TSeqPos new_stop = stop;
2520  TSeqPos extend_len = 0;
2521  bool extendable = false;
2522 
2523  if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
2524  // positive strand
2525  if (stop < seq->GetInst().GetLength() - 1) {
2526  extendable = IsExtendableRight(stop, *seq, &scope, extend_len);
2527  new_stop = stop + extend_len;
2528  }
2529  } else {
2530  if (stop > 0) {
2531  extendable = IsExtendableLeft(stop, *seq, &scope, extend_len);
2532  new_stop = stop - extend_len;
2533  }
2534  }
2535  if (extendable) {
2536  CRef<CSeq_loc> cds_loc = SeqLocExtend3(cds.GetLocation(), new_stop, &scope);
2537  if (cds_loc) {
2538  for (auto it : related_features) {
2539  if (it->GetLocation().GetStop(eExtreme_Biological) == stop) {
2540  CRef<CSeq_loc> related_loc = SeqLocExtend3(it->GetLocation(), new_stop, &scope);
2541  if (related_loc) {
2542  it->SetLocation().Assign(*related_loc);
2543  }
2544  }
2545  }
2546  cds.SetLocation().Assign(*cds_loc);
2547  rval = true;
2548  }
2549  }
2550 
2551  return rval;
2552 }
2553 
2554 
2555 bool IsExtendable(const CSeq_feat& cds, CScope& scope)
2556 {
2558  CSeq_loc_CI first_l(cds.GetLocation());
2559  CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2561  TSeqPos extend_len = 0;
2562  TSeqPos start = first_l.GetRange().GetFrom();
2563  if (IsExtendableLeft(start, *seq, &scope, extend_len) && extend_len > 0) {
2564  return true;
2565  }
2566  }
2568  CSeq_loc_CI last_l(cds.GetLocation());
2569  size_t num_intervals = last_l.GetSize();
2570  last_l.SetPos(num_intervals - 1);
2571  CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2573 
2575  TSeqPos extend_len = 0;
2576 
2577  if (IsExtendableRight(stop, *seq, &scope, extend_len) && extend_len > 0) {
2578  return true;
2579  }
2580  }
2581  return false;
2582 }
2583 
2584 
2586 {
2587  bool any_change = false;
2588  CFeat_CI f(bsh);
2589  CRef<feature::CFeatTree> tr(new feature::CFeatTree(f));
2590  while (f) {
2591  if (f->GetData().IsCdregion()) {
2592  CMappedFeat gene = tr->GetBestGene(*f);
2593  CMappedFeat mRNA = tr->GetParent(*f, CSeqFeatData::eSubtype_mRNA);
2594  vector<CRef<CSeq_feat> > related_features;
2595  if (gene) {
2596  CRef<CSeq_feat> new_gene(new CSeq_feat());
2597  new_gene->Assign(*(gene.GetOriginalSeq_feat()));
2598  related_features.push_back(new_gene);
2599  }
2600  if (mRNA) {
2601  CRef<CSeq_feat> new_mRNA(new CSeq_feat());
2602  new_mRNA->Assign(*(mRNA.GetOriginalSeq_feat()));
2603  related_features.push_back(new_mRNA);
2604  }
2605  CRef<CSeq_feat> new_cds(new CSeq_feat());
2606  new_cds->Assign(*(f->GetOriginalSeq_feat()));
2607 
2608  const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope());
2609  const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope());
2610 
2611  if (adjusted_5prime || adjusted_3prime) {
2612  feature::RetranslateCDS(*new_cds, bsh.GetScope());
2613  CSeq_feat_EditHandle feh(*f);
2614  feh.Replace(*new_cds);
2615  if (gene) {
2616  CSeq_feat_EditHandle geh(gene);
2617  geh.Replace(*(related_features.front()));
2618  }
2619  if (mRNA) {
2620  CSeq_feat_EditHandle meh(mRNA);
2621  meh.Replace(*(related_features.back()));
2622  }
2623  any_change = true;
2624  }
2625  }
2626  ++f;
2627  }
2628  return any_change;
2629 }
2630 
2631 
2632 END_SCOPE(edit)
2635 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
void GetLabel(string *label, TSeqPos pos, bool right=true) const
Definition: Int_fuzz.cpp:61
static CRef< CSeq_loc > ConvertToOrder(const CSeq_loc &orig_loc, bool &changed)
Definition: loc_edit.cpp:694
bool Interpret5Policy(const CSeq_feat &orig_feat, CScope &scope, bool &do_set_5_partial, bool &do_clear_5_partial) const
Definition: loc_edit.cpp:301
static bool HasNulls(const CSeq_loc &orig_loc)
Definition: loc_edit.cpp:657
static bool Extend3(CSeq_feat &feat, CScope &scope)
Definition: loc_edit.cpp:805
static bool Extend5(CSeq_feat &feat, CScope &scope)
Definition: loc_edit.cpp:770
static CRef< CSeq_loc > ConvertToJoin(const CSeq_loc &orig_loc, bool &changed)
Definition: loc_edit.cpp:670
EPartialPolicy m_PartialPolicy3
Definition: loc_edit.hpp:118
static bool Is3AtEndOfSeq(const CSeq_loc &loc, CBioseq_Handle bsh)
Definition: loc_edit.cpp:234
static bool Is5AtEndOfSeq(const CSeq_loc &loc, CBioseq_Handle bsh)
Definition: loc_edit.cpp:216
EPartialPolicy m_PartialPolicy5
Definition: loc_edit.hpp:117
@ ePartialPolicy_eClearForGoodEnd
Definition: loc_edit.hpp:62
EMergePolicy m_MergePolicy
Definition: loc_edit.hpp:121
bool Interpret3Policy(const CSeq_feat &orig_feat, CScope &scope, bool &do_set_3_partial, bool &do_clear_3_partial) const
Definition: loc_edit.cpp:400
bool ApplyPolicyToFeature(CSeq_feat &feat, CScope &scope) const
Definition: loc_edit.cpp:576
CMappedFeat –.
Definition: mapped_feat.hpp:59
bool IsReverseStrand(void) const
void FlipStrand(void)
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void FlipStrand(void)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
bool IsReverseStrand(void) const
void FlipStrand(void)
Definition: Seq_point.cpp:208
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static const char * pack(const char *fmt,...)
Definition: genparams.c:254
int offset
Definition: replacements.h:160
static const char location[]
Definition: config.c:97
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
CConstRef< CSeq_id > GetSeqId(void) const
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
void InvalidateCache(void) const
Combine invalidation of all cached values.
Definition: Seq_loc.hpp:897
void SetPacked_pnt(TPacked_pnt &v)
Definition: Seq_loc.hpp:986
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
Definition: Seq_loc.cpp:2585
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
bool IsSetStrand(void) const
Get strand.
Definition: Seq_loc.hpp:1049
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
void SetPos(size_t pos)
Set iterator's position.
Definition: Seq_loc.cpp:2642
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
size_t GetSize(void) const
Get number of ranges.
Definition: Seq_loc.cpp:2636
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
void SetEquiv(TEquiv &v)
Definition: Seq_loc.hpp:988
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
void SetBond(TBond &v)
Definition: Seq_loc.hpp:989
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
void SetNull(void)
Override all setters to incorporate cache invalidation.
Definition: Seq_loc.hpp:960
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fMerge_SingleRange
Definition: Seq_loc.hpp:332
@ fMerge_AbuttingOnly
Definition: Seq_loc.hpp:327
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
Definition: feature.cpp:4121
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
Definition: feature.cpp:3983
bool AdjustForCDSPartials(const CSeq_feat &cds, CSeq_entry_Handle seh)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
Definition: feature.cpp:4115
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
Definition: sequence.cpp:1366
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
const TInst & GetInst(void) const
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
Definition: Trna_ext_.cpp:158
void ResetAnticodon(void)
Reset Anticodon data member.
Definition: Trna_ext_.cpp:153
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void ResetCode_break(void)
Reset Code_break data member.
Definition: Cdregion_.cpp:80
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
Definition: Cdregion_.hpp:739
const TCdregion & GetCdregion(void) const
Get the variant data.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetTo(TTo value)
Assign a value to To data member.
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
void SetA(TA &value)
Assign a value to A data member.
Definition: Seq_bond_.cpp:60
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
Definition: Seq_point_.hpp:378
void ResetFuzz_to(void)
Reset Fuzz_to data member.
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
bool CanGetFuzz_to(void) const
Check if it is safe to call GetFuzz_to method.
const TB & GetB(void) const
Get the B member data.
Definition: Seq_bond_.hpp:243
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
void ResetPoints(void)
Reset Points data member.
TStrand GetStrand(void) const
Get the Strand member data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetPoint(void) const
Check if a value has been assigned to Point data member.
Definition: Seq_point_.hpp:284
bool IsSetPoints(void) const
Check if a value has been assigned to Points data member.
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
Definition: Seq_loc_.hpp:546
Tdata & Set(void)
Assign a value to data member.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
bool IsSetA(void) const
connection to a least one residue Check if a value has been assigned to A data member.
Definition: Seq_bond_.hpp:201
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_point_.hpp:359
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TFrom GetFrom(void) const
Get the From member data.
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Definition: Seq_point_.cpp:71
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
list< CRef< CSeq_loc > > Tdata
bool IsEquiv(void) const
Check if variant Equiv is selected.
Definition: Seq_loc_.hpp:558
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsBond(void) const
Check if variant Bond is selected.
Definition: Seq_loc_.hpp:564
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
const Tdata & Get(void) const
Get the member data.
void ResetFuzz_from(void)
Reset Fuzz_from data member.
const TEquiv & GetEquiv(void) const
Get the variant data.
Definition: Seq_loc_.cpp:304
TPoints & SetPoints(void)
Assign a value to Points data member.
list< CRef< CSeq_loc > > Tdata
vector< TSeqPos > TPoints
virtual void Reset(void)
Reset the whole object.
Definition: Seq_loc_.cpp:59
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
const TA & GetA(void) const
Get the A member data.
Definition: Seq_bond_.hpp:213
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool CanGetB(void) const
Check if it is safe to call GetB method.
Definition: Seq_bond_.hpp:237
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
void Reset(void)
Reset data member.
Tdata & Set(void)
Assign a value to data member.
void ResetFrom(void)
Reset From data member.
const TPoints & GetPoints(void) const
Get the Points member data.
TStrand GetStrand(void) const
Get the Strand member data.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
TTo GetTo(void) const
Get the To member data.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
void SetB(TB &value)
Assign a value to B data member.
Definition: Seq_bond_.cpp:70
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
void ResetTo(void)
Reset To data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
bool IsSetB(void) const
other end may not be available Check if a value has been assigned to B data member.
Definition: Seq_bond_.hpp:231
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
const TBond & GetBond(void) const
Get the variant data.
Definition: Seq_loc_.cpp:326
bool CanGetFrom(void) const
Check if it is safe to call GetFrom method.
bool CanGetFuzz_from(void) const
Check if it is safe to call GetFuzz_from method.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both_rev
in reverse orientation
Definition: Na_strand_.hpp:69
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
@ e_Equiv
equivalent sets of locations
Definition: Seq_loc_.hpp:106
@ e_Empty
to NULL one Seq-id in a collection
Definition: Seq_loc_.hpp:99
@ e_Feat
indirect, through a Seq-feat
Definition: Seq_loc_.hpp:108
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
CRef< CSeq_loc > SeqLocExtend5(const CSeq_loc &loc, TSeqPos pos, CScope *scope)
Definition: loc_edit.cpp:491
bool IsExtendableRight(TSeqPos right, const CBioseq &seq, CScope *scope, TSeqPos &extend_len)
Definition: loc_edit.cpp:2411
bool IsExtendableLeft(TSeqPos left, const CBioseq &seq, CScope *scope, TSeqPos &extend_len)
Definition: loc_edit.cpp:2375
void AdjustFrameFor5Extension(CSeq_feat &feat, size_t diff)
Definition: loc_edit.cpp:741
bool OkToAdjustLoc(const CSeq_interval &interval, const CSeq_id *seqid)
Definition: loc_edit.cpp:1088
void SeqLocAdjustForInsert(CSeq_interval &interval, TSeqPos insert_from, TSeqPos insert_to, const CSeq_id *seqid)
Definition: loc_edit.cpp:1540
void CdregionAdjustForTrim(CCdregion &cdr, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2045
void ReverseComplementCDRegion(CCdregion &cdr, CScope &scope)
Definition: loc_edit.cpp:1048
bool AdjustFeatureEnd3(CSeq_feat &cds, vector< CRef< CSeq_feat > > related_features, CScope &scope)
Definition: loc_edit.cpp:2503
void FeatureAdjustForInsert(CSeq_feat &feat, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2141
bool ExtendPartialFeatureEnds(CBioseq_Handle bsh)
Definition: loc_edit.cpp:2585
bool s_StrandsConsistent(const CSeq_interval &a, const CSeq_interval &b)
Definition: loc_edit.cpp:2194
void ReverseComplementFeature(CSeq_feat &feat, CScope &scope)
Definition: loc_edit.cpp:1068
bool s_PPntCompareMinus(const TSeqPos &p1, const TSeqPos &p2)
Definition: loc_edit.cpp:2168
string PrintSeqIntUseBestID(const CSeq_interval &seq_int, CScope &scope, bool range_only)
Definition: loc_edit.cpp:82
void TrnaAdjustForTrim(CTrna_ext &trna, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2071
void SeqLocAdjustForTrim(CSeq_interval &interval, TSeqPos cut_from, TSeqPos cut_to, const CSeq_id *seqid, bool &bCompleteCut, TSeqPos &trim5, bool &bAdjusted)
Definition: loc_edit.cpp:1207
void NormalizeLoc(CSeq_loc &loc)
Definition: loc_edit.cpp:1124
bool CorrectIntervalOrder(CPacked_seqpnt &ppnt)
Definition: loc_edit.cpp:2174
bool AdjustFeatureEnd5(CSeq_feat &cds, vector< CRef< CSeq_feat > > related_features, CScope &scope)
Definition: loc_edit.cpp:2447
string PrintBestSeqId(const CSeq_id &sid, CScope &scope)
Definition: loc_edit.cpp:64
void TrnaAdjustForInsert(CTrna_ext &trna, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2131
bool OneIdOneStrand(const CSeq_loc &loc, const CSeq_id **id, ENa_strand &strand)
Definition: loc_edit.cpp:2257
void ReverseComplementLocation(CSeq_interval &interval, CScope &scope)
Definition: loc_edit.cpp:893
bool ApplyPolicyToFeature(const CLocationEditPolicy &policy, const CSeq_feat &orig_feat, CScope &scope, bool adjust_gene, bool retranslate_cds)
Definition: loc_edit.cpp:835
bool IsExtendable(const CSeq_feat &cds, CScope &scope)
Definition: loc_edit.cpp:2555
string SeqLocPrintUseBestID(const CSeq_loc &seq_loc, CScope &scope, bool range_only)
Definition: loc_edit.cpp:153
CRef< CSeq_loc > SeqLocExtend3(const CSeq_loc &loc, TSeqPos pos, CScope *scope)
Definition: loc_edit.cpp:517
string PrintPntAndPntsUseBestID(const CSeq_loc &seq_loc, CScope &scope, bool range_only)
Definition: loc_edit.cpp:131
bool s_PPntComparePlus(const TSeqPos &p1, const TSeqPos &p2)
Definition: loc_edit.cpp:2162
void FeatureAdjustForTrim(CSeq_feat &feat, TSeqPos from, TSeqPos to, const CSeq_id *seqid, bool &bCompleteCut, bool &bTrimmed)
Definition: loc_edit.cpp:2087
void CdregionAdjustForInsert(CCdregion &cdr, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2114
static const string strand_symbol[]
Definition: loc_edit.cpp:81
void ReverseComplementTrna(CTrna_ext &trna, CScope &scope)
Definition: loc_edit.cpp:1060
CRef< CSeq_interval > SplitLocationForGap(CSeq_interval &before, TSeqPos start, TSeqPos stop, const CSeq_id *seqid, bool &cut, unsigned int options)
Definition: loc_edit.cpp:1728
CRef< CSeq_loc > SeqLocExtend(const CSeq_loc &loc, size_t pos, CScope *scope)
Definition: loc_edit.cpp:546
@ eSplitLocOption_split_in_intron
Definition: loc_edit.hpp:163
@ eSplitLocOption_make_partial
Definition: loc_edit.hpp:161
@ eSplitLocOption_split_in_exon
Definition: loc_edit.hpp:162
Definition: fix_pub.hpp:45
unsigned int a
Definition: ncbi_localip.c:102
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
Generic utility macros and templates for exploring NCBI objects.
bool seq_mac_is_sorted(Iter first, Iter last, Comp comp)
#define const
Definition: zconf.h:232
Modified on Wed Apr 17 13:10:02 2024 by modify_doxy.py rev. 669887