NCBI C++ ToolKit
fix_product_names.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: fix_product_names.cpp 41872 2018-10-31 15:16:50Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
32 #include <objmgr/feat_ci.hpp>
34 #include <util/xregexp/regexp.hpp>
37 
38 #include <wx/button.h>
39 #include <wx/sizer.h>
40 
43 
44 
45 /*!
46  * CFixProductNames type definition
47  */
48 
49 IMPLEMENT_DYNAMIC_CLASS( CFixProductNames, wxDialog )
50 
51 
52 /*!
53  * CFixProductNames event table definition
54  */
55 
56 BEGIN_EVENT_TABLE( CFixProductNames, wxDialog )
57 
58 ////@begin CFixProductNames event table entries
59 ////@end CFixProductNames event table entries
60 
62 
63 
64 /*!
65  * CFixProductNames constructors
66  */
67 
69 {
70  Init();
71 }
72 
73 CFixProductNames::CFixProductNames( wxWindow* parent, objects::CSeq_entry_Handle seh,
74  wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
75 : m_TopSeqEntry(seh)
76 {
77  Init();
78  Create(parent, id, caption, pos, size, style);
79 }
80 
81 
82 /*!
83  * CVectorTrimDlg creator
84  */
85 
86 bool CFixProductNames::Create( wxWindow* parent, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
87 {
88 ////@begin CFixProductNames creation
89  SetExtraStyle(wxWS_EX_BLOCK_EVENTS);
90  wxDialog::Create( parent, id, caption, pos, size, style );
91 
93  if (GetSizer())
94  {
95  GetSizer()->SetSizeHints(this);
96  }
97  Centre();
98 ////@end CFixProductNames creation
99 
100  return true;
101 }
102 
103 
104 /*!
105  * CFixProductNames destructor
106  */
107 
109 {
110 ////@begin CFixProductNames destruction
111 ////@end CFixProductNames destruction
112 }
113 
114 
115 /*!
116  * Member initialisation
117  */
118 
120 {
122  m_BeforeCap = NULL;
123  m_BeforeNum = NULL;
124  m_RmCommas = NULL;
125  m_KeepLast = NULL;
126 }
127 
128 
129 /*!
130  * Control creation for CFixProductNames
131  */
132 
134 {
135  CFixProductNames* itemDialog1 = this;
136 
137  wxBoxSizer* itemBoxSizer2 = new wxBoxSizer(wxVERTICAL);
138  itemDialog1->SetSizer(itemBoxSizer2);
139 
140  m_BeforePunct = new wxCheckBox( itemDialog1, wxID_ANY, _("Capital letters before punctuation should remain capitalized"), wxDefaultPosition, wxDefaultSize, 0 );
141  m_BeforePunct->SetValue(false);
142  itemBoxSizer2->Add(m_BeforePunct, 0, wxALIGN_LEFT|wxALL, 5);
143 
144  m_BeforeCap = new wxCheckBox( itemDialog1, wxID_ANY, _("Capital letters before other capital letters should remain capitalized"), wxDefaultPosition, wxDefaultSize, 0 );
145  m_BeforeCap->SetValue(false);
146  itemBoxSizer2->Add(m_BeforeCap, 0, wxALIGN_LEFT|wxALL, 5);
147 
148  m_BeforeNum = new wxCheckBox( itemDialog1, wxID_ANY, _("Capital letters before numbers should remain capitalized"), wxDefaultPosition, wxDefaultSize, 0 );
149  m_BeforeNum->SetValue(false);
150  itemBoxSizer2->Add(m_BeforeNum, 0, wxALIGN_LEFT|wxALL, 5);
151 
152  m_RmCommas = new wxCheckBox( itemDialog1, wxID_ANY, _("Commas should be removed"), wxDefaultPosition, wxDefaultSize, 0 );
153  m_RmCommas->SetValue(false);
154  itemBoxSizer2->Add(m_RmCommas, 0, wxALIGN_LEFT|wxALL, 5);
155 
156  m_KeepLast = new wxCheckBox( itemDialog1, wxID_ANY, _("Keep caps single letter at the end"), wxDefaultPosition, wxDefaultSize, 0 );
157  m_KeepLast->SetValue(false);
158  itemBoxSizer2->Add(m_KeepLast, 0, wxALIGN_LEFT|wxALL, 5);
159 
160  wxBoxSizer* itemBoxSizer12 = new wxBoxSizer(wxHORIZONTAL);
161  itemBoxSizer2->Add(itemBoxSizer12, 0, wxALIGN_CENTER_HORIZONTAL|wxALL, 5);
162 
163  wxButton* itemButton13 = new wxButton( itemDialog1, wxID_OK, _("Accept"), wxDefaultPosition, wxDefaultSize, 0 );
164  itemBoxSizer12->Add(itemButton13, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
165 
166  wxButton* itemButton14 = new wxButton( itemDialog1, wxID_CANCEL, _("Cancel"), wxDefaultPosition, wxDefaultSize, 0 );
167  itemBoxSizer12->Add(itemButton14, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
168 }
169 
170 
171 /*!
172  * Should we show tooltips?
173  */
174 
176 {
177  return true;
178 }
179 
180 /*!
181  * Get bitmap resources
182  */
183 
184 wxBitmap CFixProductNames::GetBitmapResource( const wxString& name )
185 {
186  // Bitmap retrieval
187 ////@begin CVectorTrimDlg bitmap retrieval
188  wxUnusedVar(name);
189  return wxNullBitmap;
190 ////@end CVectorTrimDlg bitmap retrieval
191 }
192 
193 /*!
194  * Get icon resources
195  */
196 
197 wxIcon CFixProductNames::GetIconResource( const wxString& name )
198 {
199  // Icon retrieval
200 ////@begin CVectorTrimDlg icon retrieval
201  wxUnusedVar(name);
202  return wxNullIcon;
203 ////@end CVectorTrimDlg icon retrieval
204 }
205 
207 {
208  CRef<CCmdComposite> cmd = apply(m_TopSeqEntry, m_BeforePunct->GetValue(), m_BeforeCap->GetValue(), m_BeforeNum->GetValue(), m_RmCommas->GetValue(), m_KeepLast->GetValue());
209  return cmd;
210 }
211 
212 CRef<CCmdComposite> CFixProductNames::apply(objects::CSeq_entry_Handle tse, bool before_punct, bool before_cap, bool before_num, bool rm_commas, bool keep_last)
213 {
214  CRef<CCmdComposite> composite( new CCmdComposite("Fix Product Names") );
215 
216  for (CFeat_CI feat_ci(tse, SAnnotSelector(CSeqFeatData::e_Prot)); feat_ci; ++feat_ci)
217  {
218  const CSeq_feat& orig = feat_ci->GetOriginalFeature();
219  CRef<CSeq_feat> new_feat(new CSeq_feat());
220  new_feat->Assign(orig);
221  bool modified = false;
222  if (new_feat->IsSetData() && new_feat->GetData().IsProt() && new_feat->GetData().GetProt().IsSetName())
223  {
224  EDIT_EACH_NAME_ON_PROTREF(name, new_feat->SetData().SetProt())
225  {
226  string new_name = *name;
227  FixCaps(new_name, before_punct, before_cap, before_num, rm_commas, keep_last);
228  modified |= new_name != *name;
229  *name = new_name;
230  }
231  }
232  if (modified)
233  composite->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(feat_ci->GetSeq_feat_Handle(), *new_feat)));
234  }
235  for (CFeat_CI feat_ci(tse, SAnnotSelector(CSeqFeatData::e_Rna)); feat_ci; ++feat_ci)
236  {
237  const CSeq_feat& orig = feat_ci->GetOriginalFeature();
238  CRef<CSeq_feat> new_feat(new CSeq_feat());
239  new_feat->Assign(orig);
240  bool modified = false;
241  if (new_feat->IsSetData() && new_feat->GetData().IsRna() && new_feat->GetData().GetRna().IsSetExt() && new_feat->GetData().GetRna().GetExt().IsName())
242  {
243  string new_name = new_feat->GetData().GetRna().GetExt().GetName();
244  FixCaps(new_name, before_punct, before_cap, before_num, rm_commas, keep_last);
245  modified |= new_name != new_feat->GetData().GetRna().GetExt().GetName();
246  new_feat->SetData().SetRna().SetExt().SetName() = new_name;
247  }
248  if (new_feat->IsSetData() && new_feat->GetData().IsRna() && new_feat->GetData().GetRna().IsSetExt() && new_feat->GetData().GetRna().GetExt().IsGen() && new_feat->GetData().GetRna().GetExt().GetGen().IsSetProduct())
249  {
250  string new_name = new_feat->GetData().GetRna().GetExt().GetGen().GetProduct();
251  FixCaps(new_name, before_punct, before_cap, before_num, rm_commas, keep_last);
252  modified |= new_name != new_feat->GetData().GetRna().GetExt().GetGen().GetProduct();
253  new_feat->SetData().SetRna().SetExt().SetGen().SetProduct() = new_name;
254  }
255  if (modified)
256  composite->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(feat_ci->GetSeq_feat_Handle(), *new_feat)));
257  }
258  return composite;
259 }
260 
261 static const char* capitalized_names[] =
262 {
263  "ABC",
264  "AAA",
265  "ATP",
266  "ATPase",
267  "A\\/G",
268  "AMP",
269  "CDP",
270  "coproporphyrinogen III",
271  "cytochrome BD",
272  "cytochrome C",
273  "cytochrome C2",
274  "cytochrome C550",
275  "cytochrome D",
276  "cytochrome O",
277  "cytochrome P450",
278  "cytochrome P460",
279  "D\\-alanine",
280  "D\\-alanyl",
281  "D\\-amino",
282  "D\\-beta",
283  "D\\-cysteine",
284  "D\\-lactate",
285  "D\\-ribulose",
286  "D\\-xylulose",
287  "endonuclease I",
288  "endonuclease II",
289  "endonuclease III",
290  "endonuclease V",
291  "EPS I",
292  "Fe\\-S",
293  "ferredoxin I",
294  "ferredoxin II",
295  "GTP",
296  "GTPase",
297  "H\\+",
298  "hemolysin I",
299  "hemolysin II",
300  "hemolysin III",
301  "L\\-allo",
302  "L\\-arabinose",
303  "L\\-asparaginase",
304  "L\\-aspartate",
305  "L\\-carnitine",
306  "L\\-fuculose",
307  "L\\-glutamine",
308  "L\\-histidinol",
309  "L\\-isoaspartate",
310  "L\\-serine",
311  "MFS",
312  "FAD\\/NAD\\(P\\)",
313  "MCP",
314  "Mg\\+",
315  "Mg chelatase",
316  "Mg\\-protoporphyrin IX",
317  "N\\(5\\)",
318  "N\\,N\\-",
319  "N\\-\\(",
320  "N\\-acetyl",
321  "N\\-acyl",
322  "N\\-carb",
323  "N\\-form",
324  "N\\-iso",
325  "N\\-succ",
326  "NADP",
327  "Na\\+\\/H\\+",
328  "NAD",
329  "NAD\\(P\\)",
330  "NADPH",
331  "O\\-sial",
332  "O\\-succ",
333  "pH",
334  "ribonuclease BN",
335  "ribonuclease D",
336  "ribonuclease E",
337  "ribonuclease G",
338  "ribonuclease H",
339  "ribonuclease I",
340  "ribonuclease II",
341  "ribonuclease III",
342  "ribonuclease P",
343  "ribonuclease PH",
344  "ribonuclease R",
345  "RNAse",
346  "S\\-adeno",
347  "type I",
348  "type II",
349  "type III",
350  "type IV",
351  "type V",
352  "type VI",
353  "UDP",
354  "UDP\\-N",
355  "Zn",
356  "\0"
357 };
358 
359 void CFixProductNames::FixCaps(string &name, bool before_punct, bool before_cap, bool before_num, bool rm_commas, bool keep_last)
360 {
361 
362  size_t len = name.size();
363  for (size_t i = 0; i < len; i++)
364  {
365  if ( !(i > 0 && isdigit(name[i-1]) && tolower(name[i]) == 's') &&
366  !(before_punct && i+1 < len && ispunct(name[i+1])) &&
367  !(before_cap && i+1 < len && isalpha(name[i+1]) && isupper(name[i+1])) &&
368  !(before_num && i+1 < len && isdigit(name[i+1])) &&
369  !(keep_last && i+1 == len && isalpha(name[i]))
370  )
371  name[i] = tolower(name[i]);
372  }
373  if (rm_commas)
374  {
376  }
377  FixAbbreviationsInElement(name, false);
378  for(unsigned int p = 0; capitalized_names[p][0] != '\0'; ++p)
379  {
380  string cap_name = capitalized_names[p];
381  CRegexpUtil replacer( name );
382  replacer.Replace( "\\b"+cap_name+"\\b", cap_name, CRegexp::fCompile_ignore_case, CRegexp::fMatch_default, 0);
383  replacer.GetResult().swap( name );
384  }
385 }
386 
void AddCommand(IEditCommand &command)
CFeat_CI –.
Definition: feat_ci.hpp:64
static void FixCaps(string &name, bool before_punct, bool before_cap, bool before_num, bool rm_commas, bool keep_last)
CFixProductNames()
Constructors.
wxCheckBox * m_BeforePunct
CRef< CCmdComposite > GetCommand()
wxCheckBox * m_RmCommas
bool Create(wxWindow *parent, wxWindowID id=wxID_ANY, const wxString &caption=_("Fix Product Names"), const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxDefaultSize, long style=wxCAPTION|wxRESIZE_BORDER|wxSYSTEM_MENU|wxCLOSE_BOX|wxTAB_TRAVERSAL)
Creation.
void Init()
Initialises member variables.
void CreateControls()
Creates the controls and sizers.
wxCheckBox * m_KeepLast
wxCheckBox * m_BeforeCap
CSeq_entry_Handle m_TopSeqEntry
static CRef< CCmdComposite > apply(CSeq_entry_Handle tse, bool before_punct=false, bool before_cap=false, bool before_num=false, bool rm_commas=false, bool keep_last=false)
wxBitmap GetBitmapResource(const wxString &name)
Retrieves bitmap resources.
static bool ShowToolTips()
Should we show tooltips?
wxCheckBox * m_BeforeNum
~CFixProductNames()
Destructor.
wxIcon GetIconResource(const wxString &name)
Retrieves icon resources.
CRegexpUtil –.
Definition: regexp.hpp:312
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void FixAbbreviationsInElement(string &result, bool fix_end_of_sentence=true)
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
#define _(proto)
Definition: ct_nlmzip_i.h:78
static void Init(void)
Definition: cursor6.c:76
USING_SCOPE(objects)
static const char * capitalized_names[]
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
size_t Replace(CTempStringEx search, CTempString replace, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, size_t max_replace=0)
Replace occurrences of a substring within a string by pattern.
Definition: regexp.cpp:289
string GetResult(void)
Get result string.
Definition: regexp.hpp:582
@ fCompile_ignore_case
Definition: regexp.hpp:103
@ fMatch_default
Definition: regexp.hpp:127
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
bool IsSetProduct(void) const
Check if a value has been assigned to Product data member.
Definition: RNA_gen_.hpp:294
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: RNA_gen_.hpp:306
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsProt(void) const
Check if variant Prot is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TProt & GetProt(void) const
Get the variant data.
const TRna & GetRna(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
END_EVENT_TABLE()
int i
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
int ispunct(Uchar c)
Definition: ncbictype.hpp:68
int isupper(Uchar c)
Definition: ncbictype.hpp:70
Utility macros and typedefs for exploring NCBI objects from seqfeat.asn.
#define EDIT_EACH_NAME_ON_PROTREF(Itr, Var)
static static static wxID_ANY
SAnnotSelector –.
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
Modified on Sat Dec 09 04:44:23 2023 by modify_doxy.py rev. 669887