NCBI C++ ToolKit
1 /* $Id: extend_cds_to_stop.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
30 #include <ncbi_pch.hpp>
32 ////@begin includes
33 ////@end includes
35 #include <wx/stattext.h>
36 #include <wx/msgdlg.h>
39 #include <objmgr/seq_vector.hpp>
40 #include <objmgr/util/sequence.hpp>
49 ////@begin XPM images
50 ////@end XPM images
56 /*!
57  * CExtendCDSToStop type definition
58  */
63 /*!
64  * CExtendCDSToStop event table definition
65  */
67 BEGIN_EVENT_TABLE( CExtendCDSToStop, CBulkCmdDlg )
69 ////@begin CExtendCDSToStop event table entries
70 ////@end CExtendCDSToStop event table entries
75 /*!
76  * CExtendCDSToStop constructors
77  */
80 {
81  Init();
82 }
84 CExtendCDSToStop::CExtendCDSToStop( wxWindow* parent, IWorkbench* wb, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
85  : CBulkCmdDlg(wb)
86 {
87  Init();
88  Create(parent, id, caption, pos, size, style);
89 }
92 /*!
93  * CExtendCDSToStop creator
94  */
96 bool CExtendCDSToStop::Create( wxWindow* parent, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
97 {
98 ////@begin CExtendCDSToStop creation
99  SetExtraStyle(wxWS_EX_BLOCK_EVENTS);
100  CBulkCmdDlg::Create( parent, id, caption, pos, size, style );
102  CreateControls();
103  if (GetSizer())
104  {
105  GetSizer()->SetSizeHints(this);
106  }
107  Centre();
108 ////@end CExtendCDSToStop creation
109  return true;
110 }
113 /*!
114  * CExtendCDSToStop destructor
115  */
118 {
119 ////@begin CExtendCDSToStop destruction
120 ////@end CExtendCDSToStop destruction
121 }
124 /*!
125  * Member initialisation
126  */
129 {
130 ////@begin CExtendCDSToStop member initialisation
131  m_OkCancel = NULL;
132 ////@end CExtendCDSToStop member initialisation
133  m_ErrorMessage = "";
136  m_ExtendmRNA = NULL;
137  m_ResetGenes = NULL;
139 }
142 /*!
143  * Control creation for CExtendCDSToStop
144  */
147 {
148 ////@begin CExtendCDSToStop content construction
149  CExtendCDSToStop* itemCBulkCmdDlg1 = this;
151  wxBoxSizer* itemBoxSizer2 = new wxBoxSizer(wxVERTICAL);
152  itemCBulkCmdDlg1->SetSizer(itemBoxSizer2);
155  m_ExtendProtFeats = new wxCheckBox( itemCBulkCmdDlg1, wxID_ANY, _("Extend Protein Features"), wxDefaultPosition, wxDefaultSize, 0 );
156  m_ExtendProtFeats->SetValue(true);
157  itemBoxSizer2->Add(m_ExtendProtFeats, 0, wxALIGN_LEFT|wxALL, 5);
159  m_Retranslate = new wxCheckBox( itemCBulkCmdDlg1, wxID_ANY, _("Retranslate Coding Regions for Extended Proteins"), wxDefaultPosition, wxDefaultSize, 0 );
160  m_Retranslate->SetValue(true);
161  itemBoxSizer2->Add(m_Retranslate, 0, wxALIGN_LEFT|wxALL, 5);
163  m_ExtendmRNA = new wxCheckBox( itemCBulkCmdDlg1, wxID_ANY, _("Extend Associated mRNA Features"), wxDefaultPosition, wxDefaultSize, 0 );
164  m_ExtendmRNA->SetValue(false);
165  itemBoxSizer2->Add(m_ExtendmRNA, 0, wxALIGN_LEFT|wxALL, 5);
167  m_ResetGenes = new wxCheckBox( itemCBulkCmdDlg1, wxID_ANY, _("Reset Genes"), wxDefaultPosition, wxDefaultSize, 0 );
168  m_ResetGenes->SetValue(true);
169  itemBoxSizer2->Add(m_ResetGenes, 0, wxALIGN_LEFT|wxALL, 5);
173  m_OkCancel = new COkCancelPanel( itemCBulkCmdDlg1, wxID_ANY, wxDefaultPosition, wxSize(100, 100), 0 );
174  itemBoxSizer2->Add(m_OkCancel, 0, wxALIGN_CENTER_HORIZONTAL|wxALL, 5);
176 ////@end CExtendCDSToStop content construction
177 }
180 /*!
181  * Should we show tooltips?
182  */
185 {
186  return true;
187 }
189 /*!
190  * Get bitmap resources
191  */
193 wxBitmap CExtendCDSToStop::GetBitmapResource( const wxString& name )
194 {
195  // Bitmap retrieval
196 ////@begin CExtendCDSToStop bitmap retrieval
197  wxUnusedVar(name);
198  return wxNullBitmap;
199 ////@end CExtendCDSToStop bitmap retrieval
200 }
202 /*!
203  * Get icon resources
204  */
206 wxIcon CExtendCDSToStop::GetIconResource( const wxString& name )
207 {
208  // Icon retrieval
209 ////@begin CExtendCDSToStop icon retrieval
210  wxUnusedVar(name);
211  return wxNullIcon;
212 ////@end CExtendCDSToStop icon retrieval
213 }
219 {
220  return m_ErrorMessage;
221 }
225 {
227  if (!m_TopSeqEntry)
228  return cmd;
229  cmd.Reset(new CCmdComposite("Extend CDS To Stop Codon"));
230  CScope& scope = m_TopSeqEntry.GetScope();
231  bool modified = false;
234  {
235  const CSeq_loc& loc = fi->GetLocation();
236  CBioseq_Handle bsh = scope.GetBioseqHandle(loc);
237  if (!bsh)
238  continue;
239  const CSeq_feat& cds = fi->GetOriginalFeature();
240  if (cds.IsSetExcept_text() && NStr::Find(cds.GetExcept_text(), "RNA editing") != string::npos)
241  {
242  continue;
243  }
244  CRef<CSeq_feat> new_feat(new CSeq_feat());
245  new_feat->Assign(cds);
247  const CGenetic_code* code = NULL;
248  if (cds.IsSetData() && cds.GetData().IsCdregion() && cds.GetData().GetCdregion().IsSetCode()) {
249  code = &(cds.GetData().GetCdregion().GetCode());
250  }
252  size_t stop = loc.GetStop(eExtreme_Biological);
253  // figure out if we have a partial codon at the end
254  size_t orig_len = sequence::GetLength(loc, &scope);
255  size_t len = orig_len;
256  if (cds.IsSetData() && cds.GetData().IsCdregion() && cds.GetData().GetCdregion().IsSetFrame()) {
257  CCdregion::EFrame frame = cds.GetData().GetCdregion().GetFrame();
258  if (frame == CCdregion::eFrame_two) {
259  len -= 1;
260  } else if (frame == CCdregion::eFrame_three) {
261  len -= 2;
262  }
263  }
264  size_t mod = len % 3;
265  CRef<CSeq_loc> vector_loc(new CSeq_loc());
266  vector_loc->SetInt().SetId().Assign(*(loc.GetId()));
268  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
269  vector_loc->SetInt().SetFrom(0);
270  vector_loc->SetInt().SetTo(static_cast<CSeq_interval::TTo>(stop + mod - 1 + 3));
271  vector_loc->SetStrand(eNa_strand_minus);
272  } else {
273  vector_loc->SetInt().SetFrom(static_cast<CSeq_interval::TFrom>(stop - mod + 1 - 3));
274  vector_loc->SetInt().SetTo(bsh.GetInst_Length() - 1);
275  }
277  CSeqVector seq(*vector_loc, scope, CBioseq_Handle::eCoding_Iupac);
278  // reserve our space
279  const size_t usable_size = seq.size();
281  // get appropriate translation table
282  const CTrans_table & tbl =
286  // main loop through bases
287  CSeqVector::const_iterator start = seq.begin();
289  size_t i;
290  size_t k;
291  int state = 0;
292  size_t length = usable_size / 3;
294  CRef<CSeq_loc> new_loc(NULL);
296  for (i = 0; i < length; ++i)
297  {
298  // loop through one codon at a time
299  for (k = 0; k < 3; ++k, ++start)
300  {
301  state = tbl.NextCodonState(state, *start);
302  }
304  if (tbl.GetCodonResidue (state) == '*')
305  {
306  if (i == 0)
307  break;
308  CSeq_loc_CI it(loc);
309  it.SetPos(it.GetSize() - 1);
310  CConstRef<CSeq_loc> this_loc = it.GetRangeAsSeq_loc();
312  CRef<CSeq_loc> last_interval(new CSeq_loc());
313  size_t this_start = this_loc->GetStart(eExtreme_Positional);
314  size_t this_stop = this_loc->GetStop(eExtreme_Positional);
315  size_t extension = ((i + 1) * 3) - mod - 3;
316  last_interval->SetInt().SetId().Assign(*(this_loc->GetId()));
317  if (this_loc->IsSetStrand() && this_loc->GetStrand() == eNa_strand_minus)
318  {
319  last_interval->SetStrand(eNa_strand_minus);
320  last_interval->SetInt().SetFrom(static_cast<CSeq_interval::TFrom>(this_start - extension));
321  last_interval->SetInt().SetTo(static_cast<CSeq_interval::TTo>(this_stop));
322  }
323  else
324  {
325  last_interval->SetInt().SetFrom(static_cast<CSeq_interval::TFrom>(this_start));
326  last_interval->SetInt().SetTo(static_cast<CSeq_interval::TTo>(this_stop + extension));
327  }
329  CRef<CSeq_loc> new_loc = sequence::Seq_loc_Add(loc, *last_interval, CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope);
330  new_loc->SetPartialStop(false, eExtreme_Biological);
332  new_feat->SetLocation().Assign(*new_loc);
334  cmd->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(fi->GetSeq_feat_Handle(), *new_feat)));
335  CRef<CCmdComposite> retranslate = RetranslateCDSCommand(scope, *new_feat);
336  if (retranslate)
337  cmd->AddCommand(*retranslate);
338  if (m_ResetGenes->GetValue())
339  ResetGenes(cmd, *fi, last_interval, scope);
340  if (m_ExtendmRNA->GetValue())
341  ExtendmRNA(cmd, *fi, last_interval, scope);
342  modified = true;
343  break;
344  }
345  }
346  }
347  if (!modified)
348  cmd.Reset();
349  return cmd;
350 }
353 {
356  // feature must be cds
357  if (!cds.IsSetData() && !cds.GetData().IsCdregion()) {
358  return empty;
359  }
361  // Use Cdregion.Product to get handle to protein bioseq
362  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(cds.GetProduct());
364  if (!prot_bsh) {
365  return empty;
366  }
367  // Should be a protein!
368  if (!prot_bsh.IsProtein())
369  {
370  return empty;
371  }
373  // Make a copy of existing CSeq_inst
374  CRef<objects::CSeq_inst> new_inst(new objects::CSeq_inst());
375  new_inst->Assign(prot_bsh.GetInst());
377  // Make edits to the CSeq_inst copy
378  //CRef<CBioseq> new_protein_bioseq;
379  if (new_inst->IsSetSeq_data())
380  {
381  new_inst->ResetSeq_data();
382  }
384  // Generate new protein sequence data and length
385  string prot;
386  CSeqTranslator::Translate(cds, scope, prot);
387  if (NStr::EndsWith(prot, "*"))
388  {
389  prot = prot.substr(0, prot.length() - 1);
390  }
391  new_inst->ResetExt();
392  new_inst->SetRepr(objects::CSeq_inst::eRepr_raw);
393  new_inst->SetSeq_data().SetNcbieaa().Set(prot);
394  new_inst->SetLength(TSeqPos(prot.length()));
395  new_inst->SetMol(CSeq_inst::eMol_aa);
398  CRef<CCmdComposite> cmd(new CCmdComposite("Retranslate CDS"));
400  if (m_Retranslate->GetValue())
401  {
402  // Update protein sequence data and length
403  CRef<CCmdChangeBioseqInst> chgInst (new CCmdChangeBioseqInst(prot_bsh, *new_inst));
404  cmd->AddCommand(*chgInst);
406  // change molinfo on bioseq
407  CRef<CCmdComposite> synch_molinfo = GetSynchronizeProductMolInfoCommand(scope, cds);
408  if (synch_molinfo)
409  {
410  cmd->AddCommand(*synch_molinfo);
411  }
412  }
414  if (m_ExtendProtFeats->GetValue())
415  {
416  // If protein feature exists, update it
418  CFeat_CI prot_feat_ci(prot_bsh, sel);
419  for ( ; prot_feat_ci; ++prot_feat_ci ) {
420  CRef<CSeq_feat> new_feat(new CSeq_feat());
421  new_feat->Assign(prot_feat_ci->GetOriginalFeature());
423  if ( new_feat->CanGetLocation() &&
424  new_feat->GetLocation().IsInt() &&
425  new_feat->GetLocation().GetInt().CanGetTo() )
426  {
427  new_feat->SetLocation().SetInt().SetTo(new_inst->GetLength() - 1);
431  CIRef<IEditCommand> chgFeat(new CCmdChangeSeq_feat(*prot_feat_ci,
432  *new_feat));
433  cmd->AddCommand(*chgFeat);
434  }
435  }
436  }
438  return cmd;
439 }
442 void CExtendCDSToStop::ResetGenes(CRef<CCmdComposite> cmd, const objects::CMappedFeat& feat, CRef<CSeq_loc> add, CScope &scope)
443 {
444  objects::CSeq_feat_Handle gene = objects::feature::GetBestGeneForCds(feat).GetSeq_feat_Handle();
445  if (gene)
446  {
447  CRef<objects::CSeq_feat> new_gene(new CSeq_feat);
448  new_gene->Assign(*gene.GetOriginalSeq_feat());
449  const objects::CSeq_loc& gene_loc = gene.GetLocation();
450  CRef<CSeq_loc> new_loc = sequence::Seq_loc_Add(gene_loc, *add, CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope);
453  CRef<objects::CSeq_loc> new_gene_loc(new objects::CSeq_loc);
454  CRef<objects::CSeq_id> id(new objects::CSeq_id);
455  id->Assign(*gene_loc.GetId());
456  CRef<objects::CSeq_interval> new_int(new objects::CSeq_interval(*id, range.GetFrom(), range.GetTo(), gene_loc.GetStrand()));
457  new_gene_loc->SetInt(*new_int);
458  new_gene_loc->SetPartialStart(gene_loc.IsPartialStart(eExtreme_Biological), objects::eExtreme_Biological);
459  new_gene_loc->SetPartialStop(false, objects::eExtreme_Biological);
460  new_gene->SetLocation().Assign(*new_gene_loc);
461  new_gene->SetPartial(new_gene->GetLocation().IsPartialStart(eExtreme_Positional) || new_gene->GetLocation().IsPartialStop(eExtreme_Positional));
462  cmd->AddCommand(*CRef< CCmdChangeSeq_feat >(new CCmdChangeSeq_feat(gene,*new_gene)));
463  }
464 }
466 void CExtendCDSToStop::ExtendmRNA(CRef<CCmdComposite> cmd, const objects::CMappedFeat& feat, CRef<CSeq_loc> add, CScope &scope)
467 {
468  objects::CSeq_feat_Handle mrna = objects::feature::GetBestMrnaForCds(feat).GetSeq_feat_Handle();
469  if (mrna)
470  {
471  CRef<objects::CSeq_feat> new_mrna(new CSeq_feat);
472  new_mrna->Assign(*mrna.GetOriginalSeq_feat());
473  const objects::CSeq_loc& mrna_loc = mrna.GetLocation();
474  CRef<CSeq_loc> new_loc = sequence::Seq_loc_Add(mrna_loc, *add, CSeq_loc::fMerge_All|CSeq_loc::fSort, &scope);
475  new_loc->SetPartialStart(mrna_loc.IsPartialStart(eExtreme_Biological), objects::eExtreme_Biological);
477  new_mrna->SetLocation().Assign(*new_loc);
478  new_mrna->SetPartial(new_mrna->GetLocation().IsPartialStart(eExtreme_Positional) || new_mrna->GetLocation().IsPartialStop(eExtreme_Positional));
479  cmd->AddCommand(*CRef< CCmdChangeSeq_feat >(new CCmdChangeSeq_feat(mrna,*new_mrna)));
480  }
481 }
