NCBI C++ ToolKit
cmd_add_cds.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cmd_add_cds.cpp 42385 2019-02-13 16:34:44Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Colleen Bollin, based on a file by Roman Katargin
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
32 #include <objmgr/seq_entry_ci.hpp>
33 #include <objmgr/seq_annot_ci.hpp>
34 #include <objmgr/bioseq_ci.hpp>
35 #include <objmgr/seqdesc_ci.hpp>
36 #include <objmgr/util/sequence.hpp>
44 
45 
48 
50 {
51  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
52  if (!eh.IsSet()) {
53  eh.ConvertSeqToSet();
54  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
56  CBioseq_EditHandle nuc_h = bi->GetEditHandle();
57 
58  // move all source and pub descriptors from Bioseq to Nuc-prot set
59  CSeq_descr::Tdata& seq_descr = nuc_h.SetDescr();
60  CSeq_descr::Tdata::iterator desc_it = seq_descr.begin();
61  while (desc_it != seq_descr.end()) {
62  if ((*desc_it)->IsSource() || (*desc_it)->IsPub()) {
63  CRef<CSeqdesc> cpy(new CSeqdesc());
64  cpy->Assign(**desc_it);
65  eh.AddSeqdesc(*cpy);
66  desc_it = seq_descr.erase(desc_it);
67  } else {
68  ++desc_it;
69  }
70  }
71  }
72 
73 
75 
77  for (; annot_ci; ++annot_ci) {
78  if ((*annot_ci).IsFtable()) {
79  ftable = *annot_ci;
80  break;
81  }
82  }
83 
84  if (!ftable)
85  for (CBioseq_CI bi(m_seh, CSeq_inst::eMol_na); bi; ++bi)
86  {
87  for (CSeq_annot_CI annot_ci(bi->GetParentEntry(), CSeq_annot_CI::eSearch_entry); annot_ci; ++annot_ci)
88  {
89  if ((*annot_ci).IsFtable())
90  {
91  ftable = *annot_ci;
92  break;
93  }
94  }
95  if (ftable)
96  break;
97  }
98 
99  if (!ftable) {
100  CRef<CSeq_annot> new_annot(new CSeq_annot());
101  ftable = m_FTableCreated = eh.AttachAnnot(*new_annot);
102  }
103 
104 
105  // add protein sequence that is translation
106  CBioseq_Handle prot_h;
107  if (m_CDS->IsSetProduct())
108  {
109  prot_h = eh.GetBioseqHandle(*(m_CDS->GetProduct().GetId()));
110  }
111 
113  CRef<CSeq_id> prot_id(new CSeq_id());
114  if (m_CDS->IsSetProduct() && !prot_h) {
115  prot_id->Assign(*(m_CDS->GetProduct().GetId()));
116  } else {
117  // make up some new protein ID
118  CBioseq_Handle nuc_h = m_seh.GetScope().GetBioseqHandle(m_CDS->GetLocation());
119  int offset = 1;
120  string id_label;
121  prot_id->Assign(*objects::edit::GetNewProtId(nuc_h, offset, id_label, m_create_general_only));
122  m_CDS->SetProduct().SetWhole().Assign(*prot_id);
123  }
124  protein->SetId().push_back(prot_id);
125  m_ProtHandle = eh.AttachBioseq(*protein, -1);
126 
128  m_feh_CDS = aeh.AddFeat(*m_CDS);
129 
130  // create molinfo descriptor
131  CRef<CSeqdesc> pdesc(new CSeqdesc());
134 
135  // set location for protein feature
136 
137  if (m_CDS->GetLocation().IsPartialStart(eExtreme_Biological)) {
138  if (m_Prot)
139  {
140  m_Prot->SetLocation().SetPartialStart(true, eExtreme_Biological);
141  m_Prot->SetPartial(true);
142  }
144  }
145  if (m_CDS->GetLocation().IsPartialStop(eExtreme_Biological)) {
146  if (m_Prot)
147  {
148  m_Prot->SetLocation().SetPartialStop(true, eExtreme_Biological);
149  m_Prot->SetPartial(true);
150  }
153  } else {
155  }
156  }
157 
158  if (m_Prot)
159  {
160  m_Prot->SetLocation().SetInt().SetId(*prot_id);
161  m_Prot->SetLocation().SetInt().SetFrom(0);
162  m_Prot->SetLocation().SetInt().SetTo(protein->GetLength() - 1);
163 
164  CRef<CSeq_annot> new_prot_annot(new CSeq_annot());
165  CSeq_annot_Handle prot_ftable = m_ProtHandle.AttachAnnot(*new_prot_annot);
166  CSeq_annot_EditHandle paeh(prot_ftable);
167  paeh.AddFeat(*m_Prot);
168  }
169 
170  // add molinfo descriptor
171  m_ProtHandle.AddSeqdesc(*pdesc);
172 
173 }
174 
176 {
177  m_ProtHandle.Remove();
178  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
179  if (eh.IsSet() && eh.GetSet().GetCompleteBioseq_set()->GetSeq_set().size() == 1) {
180  eh.ConvertSetToSeq();
181  }
182 
183  m_feh_CDS.Remove();
184 
185  if (m_FTableCreated) {
186  m_FTableCreated.Remove();
187  }
188 }
189 
191 {
192  return "Convert nuc to nuc-prot set";
193 }
194 
196 {
197  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
198 
199  string prot = s_GetProductSequence(*m_CDS, eh.GetScope());
200  if (!prot.empty()) {
201  if (!eh.IsSet()) {
202  eh.ConvertSeqToSet();
203  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
205  CBioseq_EditHandle nuc_h = bi->GetEditHandle();
206 
207  // move all source and pub descriptors from Bioseq to Nuc-prot set
208  CSeq_descr::Tdata& seq_descr = nuc_h.SetDescr();
209  CSeq_descr::Tdata::iterator desc_it = seq_descr.begin();
210  while (desc_it != seq_descr.end()) {
211  if ((*desc_it)->IsSource() || (*desc_it)->IsPub()) {
212  CRef<CSeqdesc> cpy(new CSeqdesc());
213  cpy->Assign(**desc_it);
214  eh.AddSeqdesc(*cpy);
215  desc_it = seq_descr.erase(desc_it);
216  }
217  else {
218  ++desc_it;
219  }
220  }
221  }
222  }
223 
224 
227  for (; annot_ci; ++annot_ci) {
228  if ((*annot_ci).IsFtable()) {
229  ftable = *annot_ci;
230  break;
231  }
232  }
233 
234  /*
235  if (!ftable)
236  for (CBioseq_CI bi(m_seh, CSeq_inst::eMol_na); bi; ++bi)
237  {
238  for (CSeq_annot_CI annot_ci(bi->GetParentEntry(), CSeq_annot_CI::eSearch_entry); annot_ci; ++annot_ci)
239  {
240  if ((*annot_ci).IsFtable())
241  {
242  ftable = *annot_ci;
243  break;
244  }
245  }
246  if (ftable)
247  break;
248  }
249  */
250  if (!ftable) {
251  CRef<CSeq_annot> new_annot(new CSeq_annot());
252  ftable = m_FTableCreated = eh.AttachAnnot(*new_annot);
253  }
254 
255 
256  if (!prot.empty()) {
257  CRef<CBioseq> protein(new CBioseq);
258  protein->SetInst().ResetExt();
259  protein->SetInst().SetRepr(CSeq_inst::eRepr_raw);
260  if (NStr::Find(prot, "-") == NPOS) {
261  protein->SetInst().SetSeq_data().SetIupacaa().Set(prot);
262  }
263  else {
264  protein->SetInst().SetSeq_data().SetNcbieaa().Set(prot);
265  }
266  protein->SetInst().SetLength(TSeqPos(prot.length()));
267  protein->SetInst().SetMol(CSeq_inst::eMol_aa);
268 
269  m_CDS->SetProduct().SetWhole().Assign(*m_prot_id.front());
270  for (auto new_prot_id : m_prot_id)
271  {
272  protein->SetId().push_back(new_prot_id);
273  }
274 
275  m_ProtHandle = eh.AttachBioseq(*protein, -1);
277  x_AddProteinFeatures(protein->GetLength());
278  }
279 
281  m_feh_CDS = aeh.AddFeat(*m_CDS);
282 }
283 
285 {
286  string prot;
287  try {
288  CSeqTranslator::Translate(cds, scope, prot);
289  }
290  catch (const CSeqVectorException&) {}
291 
292  if (NStr::EndsWith(prot, "*")) {
293  prot = prot.substr(0, prot.length() - 1);
294  }
295  return prot;
296 }
297 
299 {
300  CRef<CSeqdesc> pdesc(new CSeqdesc());
303 
304  // set location for protein feature
305 
306  if (m_CDS->GetLocation().IsPartialStart(eExtreme_Biological)) {
307  if (m_Prot)
308  {
309  m_Prot->SetLocation().SetPartialStart(true, eExtreme_Biological);
310  m_Prot->SetPartial(true);
311  }
313  }
314  if (m_CDS->GetLocation().IsPartialStop(eExtreme_Biological)) {
315  if (m_Prot)
316  {
317  m_Prot->SetLocation().SetPartialStop(true, eExtreme_Biological);
318  m_Prot->SetPartial(true);
319  }
322  }
323  else {
325  }
326  }
327 
328  m_ProtHandle.AddSeqdesc(*pdesc);
329 }
330 
332 {
333  if (m_Prot || !m_other_prot_feats.empty())
334  {
335  CRef<CSeq_annot> new_prot_annot(new CSeq_annot());
336  CSeq_annot_Handle prot_ftable = m_ProtHandle.AttachAnnot(*new_prot_annot);
337  CSeq_annot_EditHandle paeh(prot_ftable);
338 
339  if (m_Prot)
340  {
341  m_Prot->SetLocation().SetInt().SetId(*m_prot_id.front());
342  m_Prot->SetLocation().SetInt().SetFrom(0);
343  m_Prot->SetLocation().SetInt().SetTo(seq_length - 1);
344  paeh.AddFeat(*m_Prot);
345  }
346  for (const auto f : m_other_prot_feats)
347  {
348  paeh.AddFeat(*f);
349  }
350  }
351 }
352 
353 
354 
356 {
357  if (m_ProtHandle) {
358  m_ProtHandle.Remove();
359  }
360  CSeq_entry_EditHandle eh = m_seh.GetEditHandle();
361  if (eh.IsSet() && eh.GetSet().GetCompleteBioseq_set()->GetSeq_set().size() == 1) {
362  eh.ConvertSetToSeq();
363  }
364 
365  m_feh_CDS.Remove();
366 
367  if (m_FTableCreated) {
368  m_FTableCreated.Remove();
369  }
370 }
371 
373 {
374  return "Create CDS";
375 }
376 
377 
378 
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
objects::CSeq_feat_EditHandle m_feh_CDS
Definition: cmd_add_cds.hpp:68
CRef< objects::CSeq_feat > m_CDS
Definition: cmd_add_cds.hpp:66
objects::CSeq_annot_EditHandle m_FTableCreated
Definition: cmd_add_cds.hpp:69
virtual string GetLabel()
objects::CSeq_entry_Handle m_seh
Definition: cmd_add_cds.hpp:65
virtual void Unexecute()
Undo (opposite to Execute())
bool m_create_general_only
Definition: cmd_add_cds.hpp:71
objects::CBioseq_EditHandle m_ProtHandle
Definition: cmd_add_cds.hpp:70
CRef< objects::CSeq_feat > m_Prot
Definition: cmd_add_cds.hpp:67
virtual void Execute()
Do the editing action.
Definition: cmd_add_cds.cpp:49
objects::CSeq_annot_EditHandle m_FTableCreated
objects::CBioseq_EditHandle m_ProtHandle
virtual string GetLabel()
vector< CRef< objects::CSeq_feat > > m_other_prot_feats
virtual void Unexecute()
Undo (opposite to Execute())
objects::CSeq_feat_EditHandle m_feh_CDS
CRef< objects::CSeq_feat > m_CDS
vector< CRef< objects::CSeq_id > > m_prot_id
void x_AddMolinfoDescriptorToProtein()
objects::CSeq_entry_Handle m_seh
Definition: cmd_add_cds.hpp:99
CRef< objects::CSeq_feat > m_Prot
virtual void Execute()
Do the editing action.
void x_AddProteinFeatures(const TSeqPos &seq_length)
static string s_GetProductSequence(const objects::CSeq_feat &cds, objects::CScope &scope)
CScope –.
Definition: scope.hpp:92
SeqVector related exceptions.
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
USING_SCOPE(objects)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
TSeq ConvertSetToSeq(void) const
Do the same as CollapseSet() when sub-entry is of type bioseq.
void SetDescr(TDescr &v) const
TSet GetSet(void) const
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSet(void) const
CBioseq_EditHandle AttachBioseq(CBioseq &seq, int index=-1) const
Attach an existing bioseq.
bool AddSeqdesc(CSeqdesc &v) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
int offset
Definition: replacements.h:160
#define ftable
Definition: utilfeat.h:37
Modified on Fri Dec 01 04:49:30 2023 by modify_doxy.py rev. 669887