NCBI C++ ToolKit
retranslate_cds.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: retranslate_cds.cpp 43629 2019-08-09 19:00:33Z filippov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 #include <ncbi_pch.hpp>
29 #include <objmgr/feat_ci.hpp>
31 #include <objmgr/util/sequence.hpp>
40 #include <wx/msgdlg.h>
41 
43 
46 
47 CRef<CCmdComposite> CRetranslateCDS::GetCommand( objects::CSeq_entry_Handle seh, string title, ERetranslateMethod method, string &error)
48 {
49  CRef<CMiscSeqTableColumn> col(new CMiscSeqTableColumn("CDS comment"));
50  vector<CConstRef<CObject> > objs = col->GetObjects(seh, "", CRef<edit::CStringConstraint>(NULL));
51  if (objs.empty()) {
52  error = "No coding regions found!";
53  return CRef<CCmdComposite>(NULL);
54  }
55  bool create_general_only = objects::edit::IsGeneralIdProtPresent(seh);
56  bool any_change = false;
57  int offset = 1;
58  CRef<CCmdComposite> cmd(new CCmdComposite(title)); // "Retranslate coding regions"
59  ITERATE(vector<CConstRef<CObject> >, it, objs) {
60  const CSeq_feat* f = dynamic_cast<const CSeq_feat* >((*it).GetPointer());
61  // skip if pseudo
62  if (sequence::IsPseudo(*f, seh.GetScope())) {
63  continue;
64  }
65  if (f->IsSetExcept_text() && NStr::Find(f->GetExcept_text(), "RNA editing") != string::npos)
66  {
67  continue;
68  }
69 
70  CRef<CSeq_feat> new_cds(new CSeq_feat());
71  new_cds->Assign(*f);
72  bool cds_change = false;
73  switch (method) {
75  // do nothing
76  break;
78  // truncate at stop
79  if (edit::TruncateCDSAtStop(*new_cds, seh.GetScope())) {
80  cds_change = true;
81  }
82  break;
84  {{
86  if (new_cds->GetData().GetCdregion().IsSetFrame()) {
87  orig_frame = new_cds->GetData().GetCdregion().GetFrame();
88  }
89  new_cds->SetData().SetCdregion().SetFrame(CSeqTranslator::FindBestFrame(*f, seh.GetScope()));
91  if (new_cds->GetData().GetCdregion().IsSetFrame()) {
92  new_frame = new_cds->GetData().GetCdregion().GetFrame();
93  }
94  if (orig_frame != new_frame) {
95  cds_change = true;
96  }
97  }}
98  break;
99  default:
100  // should never happen
101  return CRef<CCmdComposite>(NULL);
102  break;
103  }
104  bool transl_change = false;
105  CRef<CCmdComposite> subcmd = RetranslateCDSCommand(seh.GetScope(), *f, *new_cds, transl_change, offset, create_general_only);
106  if (subcmd) {
107  cmd->AddCommand(*subcmd);
108  if (cds_change || transl_change) {
110  CIRef<IEditCommand> chgFeat(new CCmdChangeSeq_feat(fh, *new_cds, true));
111  cmd->AddCommand(*chgFeat);
112  }
113  any_change = true;
114  }
115  }
116  if (!any_change)
117  cmd.Reset();
118  return cmd;
119 }
120 
121 void CRetranslateCDS::apply( objects::CSeq_entry_Handle seh, ICommandProccessor* cmdProcessor, string title, ERetranslateMethod method )
122 {
123  string error;
124  CRef<CCmdComposite> cmd = GetCommand(seh, title, method, error);
125  if (cmd) {
126  cmdProcessor->Execute(cmd);
127  } else if (!error.empty()) {
128  wxMessageBox(wxString(error), wxT("Error"), wxOK | wxICON_ERROR, NULL);
129  } else {
130  wxMessageBox(wxT("No effect!"), wxT("Error"), wxOK | wxICON_ERROR, NULL);
131  }
132 }
133 
134 CRef<CCmdComposite> CRetranslateCDS::RetranslateCDSCommand(CScope& scope, const CSeq_feat& old_cds, CSeq_feat& cds, bool& cds_change, int& offset, bool create_general_only)
135 {
136  // feature must be cds
137  if (!cds.IsSetData() && !cds.GetData().IsCdregion()) {
139  return empty;
140  }
141  cds_change = false;
142 
143  if (!cds.IsSetProduct()) {
144  string id_label;
145  CBioseq_Handle bsh = scope.GetBioseqHandle(cds.GetLocation());
146  CRef<CSeq_id> new_prot_id = objects::edit::GetNewProtId(bsh, offset, id_label, create_general_only);
147  cds.SetProduct().SetWhole().Assign(*new_prot_id);
148  }
149  // Use Cdregion.Product to get handle to protein bioseq
150  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(cds.GetProduct());
151 
152  if (!prot_bsh) {
153  return GetRetranslateCDSCommand(scope, cds, cds_change, offset, create_general_only);
154  }
155  // Should be a protein!
156  if (!prot_bsh.IsProtein())
157  {
159  return empty;
160  }
161 
162  // Make a copy of existing CSeq_inst
163  CRef<objects::CSeq_inst> new_inst(new objects::CSeq_inst());
164  new_inst->Assign(prot_bsh.GetInst());
165 
166  // Make edits to the CSeq_inst copy
167  //CRef<CBioseq> new_protein_bioseq;
168  if (new_inst->IsSetSeq_data())
169  {
170  new_inst->ResetSeq_data();
171  }
172 
173  // Generate new protein sequence data and length
174  string prot;
175  CSeqTranslator::Translate(cds, scope, prot);
176  if (NStr::EndsWith(prot, "*"))
177  {
178  prot = prot.substr(0, prot.length() - 1);
179  }
180  new_inst->ResetExt();
181  new_inst->SetRepr(objects::CSeq_inst::eRepr_raw);
182  new_inst->SetSeq_data().SetNcbieaa().Set(prot);
183  new_inst->SetLength(TSeqPos(prot.length()));
184  new_inst->SetMol(CSeq_inst::eMol_aa);
185 
186 
187  CRef<CCmdComposite> cmd(new CCmdComposite("Retranslate CDS"));
188 
189  // Update protein sequence data and length
190  CRef<CCmdChangeBioseqInst> chgInst (new CCmdChangeBioseqInst(prot_bsh, *new_inst));
191  cmd->AddCommand(*chgInst);
192 
193  // change molinfo on bioseq
194  CRef<CCmdComposite> synch_molinfo = GetSynchronizeProductMolInfoCommand(scope, cds);
195  if (synch_molinfo)
196  {
197  cmd->AddCommand(*synch_molinfo);
198  }
199 
200 
201  for ( CFeat_CI prot_feat_ci(prot_bsh, CSeqFeatData::e_Prot); prot_feat_ci; ++prot_feat_ci )
202  {
203  if (prot_feat_ci->GetFeatSubtype() != CSeqFeatData::eSubtype_prot)
204  continue;
205  CRef<CSeq_feat> new_feat(new CSeq_feat());
206  new_feat->Assign(prot_feat_ci->GetOriginalFeature());
207 
208  if ( new_feat->CanGetLocation() &&
209  new_feat->GetLocation().IsInt() &&
210  new_feat->GetLocation().GetInt().CanGetTo() )
211  {
212  new_feat->SetLocation().SetInt().SetTo(new_inst->GetLength() - 1);
213 
215 
216  CIRef<IEditCommand> chgFeat(new CCmdChangeSeq_feat(*prot_feat_ci, *new_feat));
217  cmd->AddCommand(*chgFeat);
218  }
219  }
220  bool any_actions = false;
221  try
222  {
223  NRawToDeltaSeq::RemapOtherProtFeats(old_cds, cds, prot_bsh, cmd, any_actions);
224  } catch(const CUtilException&) {}
225  return cmd;
226 }
227 
bool AdjustProteinFeaturePartialsToMatchCDS(CSeq_feat &new_prot, const CSeq_feat &cds)
AdjustProteinFeaturePartialsToMatchCDS A function to change an existing MolInfo to match a coding reg...
Definition: cds_fix.cpp:398
bool TruncateCDSAtStop(CSeq_feat &cds, CScope &scope)
TruncateCDSAtStop A function to truncate a CDS location after the first stop codon in the protein tra...
Definition: cds_fix.cpp:786
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse)
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
vector< CConstRef< CObject > > GetObjects(objects::CBioseq_Handle bsh)
static CRef< CCmdComposite > GetCommand(objects::CSeq_entry_Handle seh, string title, ERetranslateMethod method, string &error)
void apply(objects::CSeq_entry_Handle seh, ICommandProccessor *cmdProcessor, string title, ERetranslateMethod method)
static CRef< CCmdComposite > RetranslateCDSCommand(CScope &scope, const CSeq_feat &old_cds, CSeq_feat &cds, bool &cds_change, int &offset, bool create_general_only)
CScope –.
Definition: scope.hpp:92
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Undo/Redo interface for editing operations.
virtual void Execute(IEditCommand *command, wxWindow *window=0)=0
void RemapOtherProtFeats(const objects::CSeq_feat &old_cds, objects::CSeq_feat &cds, objects::CBioseq_Handle bh, CRef< CCmdComposite > composite, bool &any_actions)
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
bool IsProtein(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const TInst & GetInst(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
#define wxT(x)
Definition: muParser.cpp:41
constexpr bool empty(list< Ts... >) noexcept
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
USING_SCOPE(ncbi::objects)
CRef< CCmdComposite > GetRetranslateCDSCommand(objects::CScope &scope, const objects::CSeq_feat &cds, bool create_general_only)
CRef< CCmdComposite > GetSynchronizeProductMolInfoCommand(objects::CScope &scope, const objects::CSeq_feat &cds)
Modified on Thu May 02 14:26:29 2024 by modify_doxy.py rev. 669887