NCBI C++ ToolKit
recompute_intervals.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: recompute_intervals.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
32 
35 
36 void CRecomputeIntervals::ExtendIntervalToEnd (objects::CSeq_interval& ival, size_t len)
37 {
38  if (ival.IsSetStrand() && ival.GetStrand() == objects::eNa_strand_minus) {
39  if (ival.GetFrom() > 3) {
40  ival.SetFrom(ival.GetFrom() - 3);
41  } else {
42  ival.SetFrom(0);
43  }
44  } else {
45  if (ival.GetTo() < len - 4) {
46  ival.SetTo(ival.GetTo() + 3);
47  } else {
48  ival.SetTo(static_cast<CSeq_interval::TTo>(len - 1));
49  }
50  }
51 }
52 
54 {
56  if (!tse)
57  return cmd;
58  cmd.Reset(new CCmdComposite("Recompute Intervals"));
59  CScope& scope = tse.GetScope();
60  bool modified = false;
61 
63  {
64  const CSeq_feat& cds = fi->GetOriginalFeature();
65  CRef<CSeq_feat> new_feat(new CSeq_feat());
66  new_feat->Assign(cds);
67  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(cds.GetProduct());
68  if (!prot_bsh)
69  continue;
70 
71  CProSplign prosplign(CProSplignScoring(), false, true, false, false);
72 
73  CBioseq_Handle bsh = scope.GetBioseqHandle(cds.GetLocation());
74 
75  CRef<objects::CSeq_id> seq_id(new objects::CSeq_id());
76  seq_id->Assign(*(bsh.GetSeqId()));
77  CRef<objects::CSeq_loc> match_loc(new objects::CSeq_loc(*seq_id, 0, bsh.GetBioseqLength() - 1));
78 
79  CRef<objects::CSeq_align> alignment;
80  try
81  {
82  alignment = prosplign.FindAlignment(scope, *(prot_bsh.GetSeqId()), *match_loc, CProSplignOutputOptions(CProSplignOutputOptions::ePassThrough));
83  }
84  catch(exception &e)
85  {
86  alignment.Reset();
87  }
88 
89  CRef<objects::CSeq_loc> cds_loc(new objects::CSeq_loc());
90  bool found_start_codon = false;
91  bool found_stop_codon = false;
92  if (alignment && alignment->IsSetSegs() && alignment->GetSegs().IsSpliced())
93  {
94  CRef<objects::CSeq_id> seq_id (new objects::CSeq_id());
95  seq_id->Assign(*match_loc->GetId());
96  ITERATE (objects::CSpliced_seg::TExons, exon_it, alignment->GetSegs().GetSpliced().GetExons())
97  {
98  CRef<objects::CSeq_loc> exon(new objects::CSeq_loc(*seq_id, (*exon_it)->GetGenomic_start(), (*exon_it)->GetGenomic_end()));
99  if ((*exon_it)->IsSetGenomic_strand())
100  {
101  exon->SetStrand((*exon_it)->GetGenomic_strand());
102  }
103  cds_loc->SetMix().Set().push_back(exon);
104  }
105  ITERATE (objects::CSpliced_seg::TModifiers, mod_it, alignment->GetSegs().GetSpliced().GetModifiers())
106  {
107  if ((*mod_it)->IsStart_codon_found())
108  {
109  found_start_codon = (*mod_it)->GetStart_codon_found();
110  }
111  if ((*mod_it)->IsStop_codon_found())
112  {
113  found_stop_codon = (*mod_it)->GetStop_codon_found();
114  }
115  }
116  }
117 
118  if (!cds_loc->IsMix())
119  {
120  continue;
121  }
122  else
123  {
124  if (cds_loc->GetMix().Get().size() == 1)
125  {
126  CRef<objects::CSeq_loc> exon = cds_loc->SetMix().Set().front();
127  cds_loc->Assign(*exon);
128  }
129  }
130 
131  if (!found_start_codon)
132  {
133  cds_loc->SetPartialStart(true, objects::eExtreme_Biological);
134  }
135  if (found_stop_codon)
136  {
137  // extend to cover stop codon
138  size_t len = bsh.GetInst_Length();
139  if (cds_loc->IsMix())
140  {
141  ExtendIntervalToEnd(cds_loc->SetMix().Set().back()->SetInt(), len);
142  }
143  else
144  {
145  ExtendIntervalToEnd(cds_loc->SetInt(), len);
146  }
147  }
148  else
149  {
150  cds_loc->SetPartialStop(true, objects::eExtreme_Biological);
151  }
152 
153  if (sequence::Compare(cds.GetLocation(), *cds_loc, &scope, 0) == sequence::eSame)
154  continue;
155 
156  new_feat->SetLocation(*cds_loc);
158  cmd->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(fi->GetSeq_feat_Handle(), *new_feat)));
159  modified = true;
160 
161  if (update_genes)
162  {
164  if (gene)
165  {
166  CRef<objects::CSeq_feat> new_gene(new CSeq_feat);
167  new_gene->Assign(*gene);
168  objects::CSeq_loc::TRange range = cds_loc->GetTotalRange();
169  CRef<objects::CSeq_loc> new_gene_loc(new objects::CSeq_loc);
170  CRef<objects::CSeq_id> id(new objects::CSeq_id);
171  id->Assign(*(cds_loc->GetId()));
172  CRef<objects::CSeq_interval> new_int(new objects::CSeq_interval(*id, range.GetFrom(), range.GetTo(), cds_loc->GetStrand()));
173  new_gene_loc->SetInt(*new_int);
174  new_gene_loc->SetPartialStart(cds_loc->IsPartialStart(eExtreme_Biological), objects::eExtreme_Biological);
175  new_gene_loc->SetPartialStop(cds_loc->IsPartialStop(eExtreme_Biological), objects::eExtreme_Biological);
176  new_gene->SetLocation().Assign(*new_gene_loc);
177  new_gene->SetPartial(new_gene->GetLocation().IsPartialStart(eExtreme_Positional) || new_gene->GetLocation().IsPartialStop(eExtreme_Positional));
178  cmd->AddCommand(*CRef< CCmdChangeSeq_feat >(new CCmdChangeSeq_feat(scope.GetSeq_featHandle(*gene),*new_gene)));
179  }
180  }
181  }
182  if (!modified)
183  cmd.Reset();
184  return cmd;
185 }
186 
187 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
Output filtering parameters.
Definition: prosplign.hpp:156
@ ePassThrough
all zeroes - no filtering
Definition: prosplign.hpp:162
spliced protein to genomic alignment
Definition: prosplign.hpp:299
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
Definition: prosplign.hpp:326
static void ExtendIntervalToEnd(objects::CSeq_interval &ival, size_t len)
static CRef< CCmdComposite > apply(CSeq_entry_Handle tse, bool update_genes)
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ eSame
CSeq_locs contain each other.
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
Definition: sequence.cpp:1366
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int len
range(_Ty, _Ty) -> range< _Ty >
#define fi
USING_SCOPE(objects)
Modified on Tue Apr 23 07:40:30 2024 by modify_doxy.py rev. 669887