NCBI C++ ToolKit
feature_edit.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 
2 /* $Id: feature_edit.cpp 90252 2020-05-28 11:20:32Z foleyjp $
3 * ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================
26 *
27 * Author: Justin Foley, NCBI
28 *
29 * File Description:
30 * Feature trimming code
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <objmgr/mapped_feat.hpp>
36 
41 
44 BEGIN_SCOPE(sequence)
45 
46 
48 {
49  SOutsideRange(const CRange<TSeqPos>& range) : m_Range(range) {}
50 
51  bool operator()(const CRef<CCode_break>& code_break) {
52  CRange<TSeqPos> cb_range = code_break->GetLoc().GetTotalRange();
53  return cb_range.IntersectionWith(m_Range).Empty();
54  }
56 };
57 
58 
60  const CRange<TSeqPos>& range)
61 {
62  CRef<CCode_break> trimmed_cb;
63 
64  if (code_break.GetLoc().GetTotalRange().IntersectionWith(range).NotEmpty())
65  {
66  trimmed_cb = Ref(new CCode_break());
67  trimmed_cb->Assign(code_break);
68  const auto strand = code_break.GetLoc().GetStrand();
69  // Trim the 3' end - RW-301
70  if (strand != eNa_strand_minus) {
71  const TSeqPos to = range.GetTo();
72  const TSeqPos cb_to = code_break.GetLoc().GetTotalRange().GetTo();
73  if (cb_to > to) {
74  x_TrimCodeBreak(0, to, *trimmed_cb);
75  }
76 
77  }
78  else { // strand == eNa_strand_minus
79  const TSeqPos from = range.GetFrom();
80  const TSeqPos cb_from = code_break.GetLoc().GetTotalRange().GetFrom();
81  if (cb_from < from) {
82  x_TrimCodeBreak(from, kMax_UInt, *trimmed_cb);
83  }
84  }
85  }
86  return trimmed_cb;
87 }
88 
89 
91  const CRange<TSeqPos>& range)
92 {
93  auto trimmed_ext = Ref(new CTrna_ext());
95  {
96  trimmed_ext->Assign(trna_ext);
97  x_TrimTrnaExt(range.GetFrom(), range.GetTo(), *trimmed_ext);
98  }
99  return trimmed_ext;
100 }
101 
102 
104  const CRange<TSeqPos>& range)
105 {
106  const bool set_partial = true;
107  const TSeqPos from = range.GetFrom();
108  const TSeqPos to = range.GetTo();
109 
110  CRef<CSeq_loc> trimmed_loc(new CSeq_loc());
111  trimmed_loc->Assign(loc);
112 
113  x_TrimLocation(from, to, set_partial, trimmed_loc);
114 
115  return trimmed_loc;
116 }
117 
118 
120  const CRange<TSeqPos>& range)
121 {
122  CRef<CSeq_loc> loc = Ref(new CSeq_loc());
123  loc->Assign(feat.GetLocation());
124 
125  const TSeqPos from = range.GetFrom();
126  const TSeqPos to = range.GetTo();
127 
128  const bool set_partial = true;
129 
130  x_TrimLocation(from, to, set_partial, loc);
131  if (loc->IsNull()) {
132  return Ref(new CSeq_feat());
133  }
134 
135  // Create a new seq-feat with the trimmed location
136  CRef<CSeq_feat> new_sf(new CSeq_feat());
137  new_sf->Assign(feat);
138  new_sf->SetLocation(*loc);
139  if (!loc->IsNull() &&
142  new_sf->SetPartial(true);
143  }
144 
145 
146  // If Cdregion need to consider changes in frameshift
147  if (new_sf->GetData().IsCdregion()) {
148  const TSeqPos offset = x_GetStartOffset(feat, from, to);
149  x_UpdateFrame(offset, new_sf->SetData().SetCdregion());
150 
151  if (new_sf->SetData().SetCdregion().IsSetCode_break()) {
152  // iterate over code breaks and remove if they fall outside the range
153  list<CRef<CCode_break>>& code_breaks = new_sf->SetData().SetCdregion().SetCode_break();
154  //code_breaks.remove_if(SOutsideRange(from,to));
155  code_breaks.remove_if(SOutsideRange(range));
156  if (code_breaks.empty()) {
157  new_sf->SetData().SetCdregion().ResetCode_break();
158  }
159  else {
160  const auto strand = loc->GetStrand();
161  // Trim the 3' end - RW-301
162  if (strand != eNa_strand_minus) {
163  for (auto code_break : code_breaks) {
164  const TSeqPos cb_to = code_break->GetLoc().GetTotalRange().GetTo();
165  if (cb_to > to) {
166  x_TrimCodeBreak(0, to, *code_break);
167  }
168  }
169  }
170  else { // strand == eNa_strand_minus
171  for (auto code_break : code_breaks) {
172  const TSeqPos cb_from = code_break->GetLoc().GetTotalRange().GetFrom();
173  if (cb_from < from) {
174  x_TrimCodeBreak(from, kMax_UInt, *code_break);
175  }
176  }
177  }
178  }
179  }
180  }
181  else
182  if (new_sf->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
183  auto& rna = new_sf->SetData().SetRna();
184  if (rna.IsSetExt() && rna.GetExt().IsTRNA()) {
185  x_TrimTrnaExt(from, to, rna.SetExt().SetTRNA());
186  }
187  }
188  return new_sf;
189 }
190 
191 
192 void CFeatTrim::x_TrimCodeBreak(const TSeqPos from, const TSeqPos to,
193  CCode_break& code_break)
194 {
195  const bool not_partial = false;
196  CRef<CSeq_loc> cb_loc(new CSeq_loc());
197  cb_loc->Assign(code_break.GetLoc());
198  x_TrimLocation(from, to, not_partial, cb_loc);
199  code_break.ResetLoc();
200  code_break.SetLoc(*cb_loc);
201 }
202 
203 
204 void CFeatTrim::x_TrimLocation(const TSeqPos from, const TSeqPos to,
205  const bool set_partial,
206  CRef<CSeq_loc>& loc)
207 {
208  if (loc.IsNull()) {
209  return;
210  }
211 
212  bool partial_start = false;
213  bool partial_stop = false;
214  const auto strand = loc->GetStrand();
215 
216 
217  for(CSeq_loc_CI loc_it(*loc); loc_it; ++loc_it) {
218 
219  const auto& current_range = loc_it.GetRange();
220  const auto current_from = current_range.GetFrom();
221  const auto current_to = current_range.GetTo();
222 
223  CRef<CSeq_id> current_seqid = Ref(new CSeq_id());
224  current_seqid->Assign(loc_it.GetSeq_id());
225 
226  // May be able to do this more succinctly and efficiently using CSeq_loc::Intersect
227  if ((current_to < from) ||
228  (current_from > to)) {
229  CRef<CSeq_loc> trim(new CSeq_loc(*current_seqid,
230  current_from,
231  current_to,
232  strand));
233 
234  loc = loc->Subtract(*trim, 0, NULL, NULL);
235  if (current_to < from) {
236  partial_start = true;
237  }
238  if (current_from > to) {
239  partial_stop = true;
240  }
241  continue;
242  }
243 
244  if (current_from < from) {
245  CRef<CSeq_loc> trim(new CSeq_loc(*current_seqid,
246  current_from,
247  from-1,
248  strand));
249 
250  loc = loc->Subtract(*trim, 0, NULL, NULL);
251  partial_start = true;
252  }
253 
254  if (current_to > to) {
255  CRef<CSeq_loc> trim(new CSeq_loc(*current_seqid,
256  to+1,
257  current_to,
258  strand));
259 
260  loc = loc->Subtract(*trim, 0, NULL, NULL);
261  partial_stop = true;
262  }
263  }
264 
265  if (loc->IsNull() || !set_partial) {
266  return;
267  }
268 
269 
270  if (strand == eNa_strand_minus) {
271  swap(partial_start, partial_stop);
272  }
273 
274 
275  if (partial_start) {
277  }
278 
279  if (partial_stop) {
281  }
282 }
283 
284 
285 static TSeqPos s_GetTrimmedLength(const CSeq_loc& trimmed_loc)
286 {
287 
288  if (trimmed_loc.IsEmpty() || trimmed_loc.IsNull()) {
289  return 0;
290  }
291 
292  if (trimmed_loc.IsPnt()) {
293  return 1;
294  }
295 
296  if (trimmed_loc.IsInt()) {
297  return trimmed_loc.GetInt().GetLength();
298  }
299 
300  if (trimmed_loc.IsPacked_int()) {
301  TSeqPos length=0;
302  for (auto pSubInt : trimmed_loc.GetPacked_int().Get()) {
303  length += pSubInt->GetLength();
304  }
305  return length;
306  }
307 
308  if (trimmed_loc.IsPacked_pnt()) {
309  return trimmed_loc.GetPacked_pnt().GetPoints().size();
310  }
311 
312  if (trimmed_loc.IsMix()) {
313  TSeqPos length=0;
314  for (auto pSubLoc : trimmed_loc.GetMix().Get()) {
315  length += s_GetTrimmedLength(*pSubLoc);
316  }
317  return length;
318  }
319 
320  return 0;
321 }
322 
323 static TSeqPos s_GetTrimmedLength(const CSeq_loc& loc, TSeqPos from, TSeqPos to)
324 {
325  auto pTrimmedInt = Ref(new CSeq_loc());
326  CSeq_loc_CI loc_it(loc);
327  pTrimmedInt->SetInt().SetId().Assign(loc_it.GetSeq_id());
328  pTrimmedInt->SetInt().SetFrom(from);
329  pTrimmedInt->SetInt().SetTo(to);
330  auto pTrimmedLoc = loc.Intersect(*pTrimmedInt, CSeq_loc::fStrand_Ignore, nullptr);
331  if (pTrimmedLoc) {
332  return s_GetTrimmedLength(*pTrimmedLoc);
333  }
334  return 0;
335 }
336 
337 
339  TSeqPos from, TSeqPos to)
340 {
341  TSeqPos offset = 0;
342  const auto strand = feat.GetLocation().GetStrand();
343  CRange<TSeqPos> feat_range = feat.GetLocation().GetTotalRange();
344 
345  if (strand != eNa_strand_minus) {
346  TSeqPos feat_from = feat_range.GetFrom();
347  if (feat_from < from) {
348  if (feat.GetLocation().IsInt()) {
349  return (from - feat_from);
350  }
351  return s_GetTrimmedLength(feat.GetLocation(), feat_from, from-1);
352  }
353  }
354  else { // eNa_strand_minus
355  TSeqPos feat_to = feat_range.GetTo();
356  if (feat_to > to) {
357  if (feat.GetLocation().IsInt()) {
358  return (feat_to - to);
359  }
360  return s_GetTrimmedLength(feat.GetLocation(), to+1, feat_to);
361  }
362  }
363  return offset;
364 }
365 
366 
368 {
369  switch(cds.GetFrame()) {
372  return 0;
374  return 1;
376  return 2;
377  default:
378  return 0;
379 
380  }
381  return 0;
382 }
383 
384 
386 {
387  const TSeqPos offset = x_GetStartOffset(cds_feature, range.GetFrom(), range.GetTo());
388  return x_GetNewFrame(offset, cds_feature.GetData().GetCdregion());
389 }
390 
391 
393 {
394 
395  const TSeqPos frame_change = offset%3;
396  if (!frame_change) {
397  return cdregion.GetFrame();
398  }
399 
400  const TSeqPos old_frame = x_GetFrame(cdregion);
401 
402  // RW-1098
403  const TSeqPos new_frame = 3 - ((3 + offset - old_frame)%3);
404  // Note new_frame, thus defined, takes values 1,2,3,
405  // whereas old_frame takes values 0,1,2.
406  // However, 0 == 3 in modulo 3 arithmetic.
407  if (new_frame == 1) {
408  return CCdregion::eFrame_two;
409  }
410  if (new_frame == 2) {
412  }
413  return CCdregion::eFrame_one;
414 }
415 
416 
418 {
419  const TSeqPos frame_change = offset%3;
420  if (!frame_change) {
421  return;
422  }
423 
424  cdregion.ResetFrame();
425  cdregion.SetFrame(x_GetNewFrame(offset, cdregion));
426 }
427 
428 
429 void CFeatTrim::x_TrimTrnaExt(const TSeqPos from, const TSeqPos to, CTrna_ext& ext)
430 {
431  if (!ext.IsSetAnticodon()) {
432  return;
433  }
434 
435  CRange<TSeqPos> ac_range = ext.GetAnticodon().GetTotalRange();
436 
437  const TSeqPos ac_from = ac_range.GetFrom();
438  const TSeqPos ac_to = ac_range.GetTo();
439 
440  if (from <= ac_from && to >= ac_to) {
441  return;
442  }
443 
444  if (from > ac_to || to < ac_from) {
445  ext.ResetAnticodon();
446  return;
447  }
448 
449  const bool set_partial=true;
450  // else there is some overlap
451  CRef<CSeq_loc> loc(new CSeq_loc());
452  loc->Assign(ext.GetAnticodon());
453  x_TrimLocation(from, to, set_partial, loc);
454  ext.ResetAnticodon();
455  ext.SetAnticodon(*loc);
456 
457  return;
458 }
459 
460 
461 END_SCOPE(sequence)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
CCdregion –.
Definition: Cdregion.hpp:66
CCode_break –.
Definition: Code_break.hpp:66
static CRef< CSeq_feat > Apply(const CSeq_feat &feat, const CRange< TSeqPos > &range)
static void x_TrimCodeBreak(TSeqPos from, TSeqPos to, CCode_break &cod_break)
static CCdregion::EFrame GetCdsFrame(const CSeq_feat &cds_feature, const CRange< TSeqPos > &range)
static TSeqPos x_GetStartOffset(const CSeq_feat &feat, TSeqPos from, TSeqPos to)
static TSeqPos x_GetFrame(const CCdregion &cdregion)
static CCdregion::EFrame x_GetNewFrame(TSeqPos offset, const CCdregion &region)
static void x_TrimTrnaExt(TSeqPos from, TSeqPos to, CTrna_ext &ext)
static void x_UpdateFrame(TSeqPos offset, CCdregion &cdregion)
static void x_TrimLocation(TSeqPos from, TSeqPos to, bool set_partial, CRef< CSeq_loc > &loc)
ESubtype GetSubtype(void) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
TSeqPos GetLength(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
Include a standard set of the NCBI C++ Toolkit most basic headers.
static TSeqPos s_GetTrimmedLength(const CSeq_loc &trimmed_loc)
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5087
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5183
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
@ fStrand_Ignore
Definition: Seq_loc.hpp:325
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define kMax_UInt
Definition: ncbi_limits.h:185
bool NotEmpty(void) const
Definition: range.hpp:152
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool Empty(void) const
Definition: range.hpp:148
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
Definition: Trna_ext_.cpp:158
void ResetAnticodon(void)
Reset Anticodon data member.
Definition: Trna_ext_.cpp:153
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLoc & GetLoc(void) const
Get the Loc member data.
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
void ResetLoc(void)
Reset Loc data member.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void ResetFrame(void)
Reset Frame data member.
Definition: Cdregion_.hpp:521
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TCdregion & GetCdregion(void) const
Get the variant data.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetFrame(TFrame value)
Assign a value to Frame data member.
Definition: Cdregion_.hpp:540
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
const Tdata & Get(void) const
Get the member data.
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
Definition: Seq_loc_.hpp:546
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
const TPoints & GetPoints(void) const
Get the Points member data.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
range(_Ty, _Ty) -> range< _Ty >
SOutsideRange(const CRange< TSeqPos > &range)
bool operator()(const CRef< CCode_break > &code_break)
CRange< TSeqPos > m_Range
Modified on Wed Apr 17 13:08:05 2024 by modify_doxy.py rev. 669887