NCBI C++ ToolKit
uncul_tax_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: uncul_tax_tool.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
32 
33 #include <objmgr/seq_entry_ci.hpp>
34 #include <objmgr/bioseq_ci.hpp>
35 #include <objmgr/feat_ci.hpp>
37 #include <gui/objutils/utils.hpp>
49 
56 
58 
61 
62 
63 IMPLEMENT_DYNAMIC_CLASS( CUnculTaxTool, wxFrame )
64 
65 
66 /*!
67  * CUnculTaxTool event table definition
68  */
69 
70 BEGIN_EVENT_TABLE( CUnculTaxTool, wxFrame )
71 
78 
80 
81 
82 /*!
83  * CUnculTaxTool constructors
84  */
85 
87 {
88  Init();
89 }
90 
91 CUnculTaxTool::CUnculTaxTool( wxWindow* parent, CSeq_entry_Handle seh, IWorkbench* workbench,
92  wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
93  : m_TopSeqEntry(seh), m_Workbench(workbench)
94 {
95  Init();
96  Create(parent, id, caption, pos, size, style);
98 }
99 
100 
101 /*!
102  * CVectorTrimDlg creator
103  */
104 
105 bool CUnculTaxTool::Create( wxWindow* parent, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
106 {
107 ////@begin CUnculTaxTool creation
108  SetExtraStyle(wxWS_EX_BLOCK_EVENTS);
109  wxFrame::Create( parent, id, caption, pos, size, style );
110 
111  CreateControls();
112  if (GetSizer())
113  {
114  GetSizer()->SetSizeHints(this);
115  }
116  Centre(wxBOTH|wxCENTRE_ON_SCREEN);
117 ////@end CUnculTaxTool creation
118 
119  return true;
120 }
121 
122 
123 /*!
124  * CUnculTaxTool destructor
125  */
126 
128 {
129 ////@begin CUnculTaxTool destruction
130 ////@end CUnculTaxTool destruction
131 }
132 
133 
134 /*!
135  * Member initialisation
136  */
137 
139 {
140  m_GridPanel = NULL;
141 }
142 
143 
144 /*!
145  * Control creation for CUnculTaxTool
146  */
147 
149 {
150  CUnculTaxTool* itemFrame1 = this;
151 
152  wxBoxSizer* itemBoxSizer1 = new wxBoxSizer(wxVERTICAL);
153  SetSizer(itemBoxSizer1);
154 
155  wxPanel* itemDialog1 = new wxPanel( itemFrame1, wxID_ANY, wxDefaultPosition, wxDefaultSize, wxTAB_TRAVERSAL );
156  itemBoxSizer1->Add(itemDialog1, 1, wxGROW, 0);
157 
158 
159  wxBoxSizer* itemBoxSizer2 = new wxBoxSizer(wxVERTICAL);
160  itemDialog1->SetSizer(itemBoxSizer2);
161 
162  wxArrayString itemChoiceStrings, itemChoiceStringsWritable;
163 
164  wxBoxSizer* itemBoxSizer3 = new wxBoxSizer(wxHORIZONTAL);
165  itemBoxSizer2->Add(itemBoxSizer3, 0, wxALIGN_CENTER_HORIZONTAL|wxTOP, 5);
166 
167 
169 
170 
171  if (m_BioSource.empty())
172  {
173  wxMessageBox(wxT("No bad taxnames"), wxT("Error"), wxOK | wxICON_ERROR);
174  NCBI_THROW( CException, eUnknown, "No bad taxnames" );
175  }
176 
178  if (values_table->GetNum_rows() < 1)
179  {
180  wxMessageBox(wxT("No bad taxnames"), wxT("Error"), wxOK | wxICON_ERROR);
181  NCBI_THROW( CException, eUnknown, "No bad taxnames" );
182  }
183  CRef<CSeq_table> choices = GetChoices(values_table);
184  int glyph_col = GetCollapsible();
185  m_GridPanel = new CSeqTableGridPanel(itemDialog1, values_table, choices, glyph_col);
186  itemBoxSizer3->Add(m_GridPanel, 0, wxALIGN_CENTER_VERTICAL|wxALL, 2);
187 
189  CSeqTableGrid *gridAdapter = new CSeqTableGrid(values_table);
190  m_Grid->SetTable(gridAdapter, true);
191  m_Grid->AutoSizeColumns();
192  int l_height = m_Grid->GetColLabelSize();
193  m_Grid->SetColLabelSize( 2 * l_height );
194 
195  int pos = 0;
196  ITERATE (CSeq_table::TColumns, it, values_table->GetColumns())
197  {
198  if (pos > 0)
199  {
200  if ((*it)->IsSetHeader() && (*it)->GetHeader().IsSetTitle() )
201  {
202  string title = (*it)->GetHeader().GetTitle();
203  if (!title.empty())
204  {
205  itemChoiceStrings.Add(wxString(title));
206  if (!IsReadOnlyColumn(title))
207  itemChoiceStringsWritable.Add(wxString(title));
208  }
209  if (IsReadOnlyColumn(title))
210  m_GridPanel->MakeColumnReadOnly(pos - 1, true);
211  }
212  }
213  pos++;
214  }
215 
216  if (glyph_col >= 0 && glyph_col+2 < m_Grid->GetNumberCols())
217  {
218  m_GridPanel->InitColumnCollapse(glyph_col+2);
219  }
220 
221 
222  wxBoxSizer* itemBoxSizer4 = new wxBoxSizer(wxHORIZONTAL);
223  itemBoxSizer2->Add(itemBoxSizer4, 0, wxALIGN_CENTER_HORIZONTAL|wxLEFT|wxRIGHT, 5);
224 
225  CStringConstraintSelect *itemStringConstraintPanel = new CStringConstraintSelect( itemDialog1, m_GridPanel, itemChoiceStrings, wxID_ANY, wxDefaultPosition, wxDefaultSize, 0 );
226  itemBoxSizer4->Add(itemStringConstraintPanel, 0, wxALIGN_CENTER_VERTICAL|wxALL|wxFIXED_MINSIZE, 0);
227 
228  CApplyEditconvertPanel *itemAecrPanel = new CApplyEditconvertPanel( itemDialog1, m_GridPanel, itemChoiceStringsWritable, 1, false, wxID_ANY, wxDefaultPosition, wxDefaultSize);
229  itemBoxSizer2->Add(itemAecrPanel, 0, wxALIGN_CENTER_HORIZONTAL|wxALL|wxFIXED_MINSIZE, 1);
230 
231  wxBoxSizer* itemBoxSizer15 = new wxBoxSizer(wxHORIZONTAL);
232  itemBoxSizer2->Add(itemBoxSizer15, 0, wxALIGN_CENTER_HORIZONTAL|wxALL, 2);
233 
234  wxButton* itemButton13 = new wxButton( itemDialog1, ID_APPLY_BTN, _("Apply Corrections"), wxDefaultPosition, wxDefaultSize, 0 );
235  itemBoxSizer15->Add(itemButton13, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
236 
237  wxButton* itemButton15 = new wxButton( itemDialog1, ID_REFRESH_BTN, _("Refresh"), wxDefaultPosition, wxDefaultSize, 0 );
238  itemBoxSizer15->Add(itemButton15, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
239 
240  wxButton* itemButton11 = new wxButton( itemDialog1, ID_ADD_SP_UNCUL_TAXTOOL, _("Add sp."), wxDefaultPosition, wxDefaultSize, 0 );
241  itemBoxSizer15->Add(itemButton11, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
242 
243  wxButton* itemButton12 = new wxButton( itemDialog1, ID_ADD_BACTERIUM_UNCUL_TAXTOOL, _("Add bacterium"), wxDefaultPosition, wxDefaultSize, 0 );
244  itemBoxSizer15->Add(itemButton12, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
245 
246  wxButton* itemButton16 = new wxButton( itemDialog1, ID_TRIM_BTN, _("Trim Suggestion"), wxDefaultPosition, wxDefaultSize, 0 );
247  itemBoxSizer15->Add(itemButton16, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
248 
249  wxButton* itemButton14 = new wxButton( itemDialog1, ID_CANCEL_BTN, _("Dismiss"), wxDefaultPosition, wxDefaultSize, 0 );
250  itemBoxSizer15->Add(itemButton14, 0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
251 }
252 
253 
255 {
256 
259  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
260 
261  CRef<CSeqTable_column> expand_col(new CSeqTable_column());
262  expand_col->SetHeader().SetTitle("");
263  expand_col->SetHeader().SetField_name("expand");
264  expand_col->SetData().SetString();
265 
266  CRef<CSeqTable_column> current_col(new CSeqTable_column());
267  current_col->SetHeader().SetTitle("Taxname");
268  current_col->SetHeader().SetField_name("current");
269  current_col->SetData().SetString();
270 
271  CRef<CSeqTable_column> suggested_col(new CSeqTable_column());
272  suggested_col->SetHeader().SetTitle("Suggested Correction");
273  suggested_col->SetHeader().SetField_name("suggested");
274  suggested_col->SetData().SetString();
275 
276 
277  // bogus column to include last, otherwise deletion of the previous column will not work
278  CRef<CSeqTable_column> bogus_col(new CSeqTable_column());
279  bogus_col->SetHeader().SetTitle("");
280  bogus_col->SetHeader().SetField_name("");
281  bogus_col->SetData().SetString();
282 
284  table->SetColumns().push_back(id_col);
285  table->SetColumns().push_back(expand_col);
286  table->SetColumns().push_back(current_col);
287  table->SetColumns().push_back(suggested_col);
288  table->SetColumns().push_back(bogus_col);
289 
290 
291  vector<string> suggestions;
292  vector<bool> verified;
293  LookupSuggestions(suggestions, verified);
294 
295  size_t row = 0;
296  for (unsigned int i = 0; i<m_BioSource.size(); i++)
297  {
298  string taxname = m_BioSource[i].second->GetSource().GetTaxname();
299  string suggested = suggestions[i];
300  if (taxname == suggested && verified[i])
301  continue;
302  CRef<CSeq_id> id(new CSeq_id());
303  id->SetLocal().SetId(static_cast<CObject_id::TId>(row));
304  id_col->SetData().SetId().push_back(id);
305  expand_col->SetData().SetString().push_back("");
306  current_col->SetData().SetString().push_back(taxname);
307  suggested_col->SetData().SetString().push_back(suggested);
308  bogus_col->SetData().SetString().push_back("");
309  row++;
310  }
311 
312  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
313 
314  return table;
315 }
316 
317 /*!
318  * Should we show tooltips?
319  */
320 
322 {
323  return true;
324 }
325 
326 /*!
327  * Get bitmap resources
328  */
329 
330 wxBitmap CUnculTaxTool::GetBitmapResource( const wxString& name )
331 {
332  // Bitmap retrieval
333  wxUnusedVar(name);
334  return wxNullBitmap;
335 }
336 
337 /*!
338  * Get icon resources
339  */
340 
341 wxIcon CUnculTaxTool::GetIconResource( const wxString& name )
342 {
343  // Icon retrieval
344  wxUnusedVar(name);
345  return wxNullIcon;
346 }
347 
348 
350 {
351  CRef<CCmdComposite> composite( new CCmdComposite("Uncul Tax Tool") );
352  m_UnindexedObjects.clear();
353  bool select_all = false;
354  if (!m_Grid->IsSelection() && values_table->GetNum_rows() > 0)
355  {
356  if (wxMessageBox(ToWxString("Select All?"), wxT("Nothing is selected"), wxOK | wxCANCEL, NULL) == wxOK)
357  {
358  select_all = true;
359  }
360  }
361 
362  map<string,string> current_to_suggested;
363  for (unsigned int i = 0; i < values_table->GetColumn("current").GetData().GetString().size(); i++)
364  {
365  string current = values_table->GetColumn("current").GetData().GetString()[i];
366  string suggested = values_table->GetColumn("suggested").GetData().GetString()[i];
367  if (!current.empty() && !suggested.empty() && current != suggested && (m_Grid->IsInSelection(i,0) || select_all))
368  {
369  current_to_suggested[current] = suggested;
370  }
371  }
372 
373 
374 
375  for (unsigned int i=0; i<m_BioSource.size(); i++)
376  {
377 
378  string taxname = m_BioSource[i].second->GetSource().GetTaxname();
379  if (current_to_suggested.find(taxname) != current_to_suggested.end())
380  {
381  CRef<CSeqdesc> new_desc(new CSeqdesc);
382  new_desc->Assign(m_BioSource[i].second.GetObject());
383  CBioSource& edited_biosource = new_desc->SetSource();
384 
385  edited_biosource.SetOrg().SetTaxname(current_to_suggested[taxname]);
386  edit::CleanupForTaxnameChange(edited_biosource);
388  ConvertSpeciesSpecificNote(edited_biosource,"amplified with species-specific primers");
389  if (m_RemoveNote.find(i) != m_RemoveNote.end())
390  ConvertSpeciesSpecificNote(edited_biosource,"");
391 
392  CSeq_entry_Handle seh = m_TopSeqEntry.GetScope().GetSeq_entryHandle(*m_BioSource[i].first);
393  CRef<CCmdChangeSeqdesc> cmd(new CCmdChangeSeqdesc( seh, *m_BioSource[i].second, *new_desc));
394  composite->AddCommand(*cmd);
395 
396  SUnindexedObject sobj;
397  const CBioSource& biosource = m_BioSource[i].second->GetSource();
398  sobj.actual_obj.Reset(const_cast<CBioSource*>(&biosource));
399  sobj.new_obj.Reset(&edited_biosource);
400 
401  CRef<CBioSource> orig_obj(new CBioSource());
402  orig_obj->Assign(biosource);
403  sobj.orig_obj.Reset(orig_obj);
404  m_UnindexedObjects.push_back(sobj);
405  }
406  }
407 
408  return composite;
409 }
410 
411 
413 {
414  CRef<CSeq_table> values_table = m_GridPanel->GetValuesTable();
416  return cmd;
417 }
418 
419 
420 string CUnculTaxTool::StandardFixes(const CBioSource &biosource)
421 {
422  string taxname = biosource.GetTaxname();
423  string old;
424  while (old != taxname)
425  {
426  old = taxname;
427  const string uncultured = "uncultured ";
428  const string sp = " sp";
429  const string spdot = " sp.";
430  if (NStr::StartsWith(taxname,uncultured,NStr::eNocase))
431  {
432  taxname = taxname.substr(uncultured.length());
433  }
434  if (NStr::EndsWith(taxname,sp,NStr::eNocase))
435  {
436  taxname = taxname.substr(0,taxname.length()-sp.length());
437  }
438  if (NStr::EndsWith(taxname,spdot,NStr::eNocase))
439  {
440  taxname = taxname.substr(0,taxname.length()-spdot.length());
441  }
442  NStr::ReplaceInPlace(taxname,", "," ");
443  NStr::ReplaceInPlace(taxname,","," ");
444  }
445  return taxname;
446 }
447 
448 static const char* sAmplifiedSpeciesSpecific[] = {
449  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
450  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
451  "[BankIt_uncultured16S_wizard]; [species_specific primers]",
452  "[uncultured (with species-specific primers)]",
453  "[uncultured]; [amplified with species-specific primers]",
454  "[uncultured (using species-specific primers) bacterial source]",
455  "amplified with species-specific primers",
456  NULL
457 };
458 
460 {
461  if (biosource.IsSetSubtype())
462  for (CBioSource::TSubtype::const_iterator subtype = biosource.GetSubtype().begin(); subtype != biosource.GetSubtype().end(); ++subtype)
463  if ((*subtype)->IsSetSubtype() && (*subtype)->GetSubtype() == CSubSource::eSubtype_other && (*subtype)->IsSetName())
464  {
465  string name = (*subtype)->GetName();
466  int i=0;
468  {
470  return true;
471  i++;
472  }
473  }
474  return false;
475 }
476 
477 void CUnculTaxTool::ConvertSpeciesSpecificNote(CBioSource &biosource, const string &new_note)
478 {
479  if (biosource.IsSetSubtype())
480  {
481  CBioSource::TSubtype::iterator subtype = biosource.SetSubtype().begin();
482  while ( subtype != biosource.SetSubtype().end())
483  {
484  bool erased = false;
485  if ((*subtype)->IsSetSubtype() && (*subtype)->GetSubtype() == CSubSource::eSubtype_other && (*subtype)->IsSetName())
486  {
487  string name = (*subtype)->GetName();
488  int i=0;
490  {
492  {
493  if (new_note.empty())
494  {
495  subtype = biosource.SetSubtype().erase(subtype);
496  erased = true;
497  }
498  else
499  (*subtype)->SetName(new_note);
500  break;
501  }
502  i++;
503  }
504  }
505  if (!erased)
506  ++subtype;
507  }
508  }
509 }
510 
511 
512 CRef<CT3Reply> CUnculTaxTool::GetReply(const CBioSource &biosource, const string &standard_taxname)
513 {
514  if (m_ReplyCache.find(standard_taxname) != m_ReplyCache.end())
515  return m_ReplyCache[standard_taxname];
516 
517  vector<CRef<COrg_ref> > rq_list;
518  CRef<COrg_ref> org(new COrg_ref());
519  org->Assign(biosource.GetOrg());
520  org->SetTaxname(standard_taxname);
521  rq_list.push_back(org);
522 
523  CTaxon3 taxon3;
524  taxon3.Init();
525  CRef<CTaxon3_reply> reply;
526  reply = taxon3.SendOrgRefList(rq_list);
527  CRef<CT3Reply> t3reply;
528  if (reply->IsSetReply() && !reply->GetReply().empty())
529  {
530  t3reply = reply->GetReply().front();
531  m_ReplyCache[standard_taxname] = t3reply;
532  }
533  return t3reply;
534 }
535 
537 {
538  string rank;
539 
540  if ( reply && reply->IsData() && reply->GetData().IsSetStatus())
541  for (CT3Data::TStatus::const_iterator status = reply->GetData().GetStatus().begin(); status != reply->GetData().GetStatus().end(); ++status)
542  {
543  if ((*status)->IsSetProperty() && (*status)->GetProperty() == "rank" && (*status)->IsSetValue() && (*status)->GetValue().IsStr())
544  rank = (*status)->GetValue().GetStr();
545  }
546  return rank;
547 }
548 
550 {
551  string suggestion;
552  if (reply && reply->IsData())
553  {
554  suggestion = reply->GetData().GetOrg().GetTaxname();
555  }
556 
557  return suggestion;
558 }
559 
560 bool CUnculTaxTool::CompareOrgnameLineage(CRef<CT3Reply> reply, const string &lineage)
561 {
562  bool result = false;
563  if (reply && reply->IsData() && reply->GetData().IsSetOrg() && reply->GetData().GetOrg().IsSetOrgname()
564  && reply->GetData().GetOrg().GetOrgname().IsSetLineage()
565  && NStr::FindNoCase(reply->GetData().GetOrg().GetOrgname().GetLineage(),lineage) != NPOS)
566  {
567  result = true;
568  }
569  return result;
570 }
571 
572 
573 string CUnculTaxTool::MakeUnculturedName(const string &taxname, const string suffix)
574 {
575  return "uncultured "+taxname+suffix;
576 }
577 
578 bool CUnculTaxTool::CheckSuggestedFix(const CBioSource &biosource, string &suggestion)
579 {
580  CRef<CT3Reply> reply2 = GetReply(biosource, suggestion);
581  string rank2 = GetRank(reply2);
582  if (rank2 == "species")
583  {
584  suggestion = GetSuggestion(reply2); // Note that this does not only perform a check - it also potentially modifies suggestion. This is how it's done in the original sequin code.
585  return true;
586  }
587  return false;
588 }
589 
591 {
592  bool result = false;
593  if (reply && reply->IsError() && reply->GetError().IsSetMessage() && reply->GetError().GetMessage() == "Taxname is ambiguous")
594  {
595  result = true;
596  }
597  return result;
598 }
599 
600 string CUnculTaxTool::TryRankFix(CRef<CT3Reply> reply, unsigned int i, string &name) // In case of binomial truncation the input name will be modified. This is what's happening in the original sequin code
601 {
602  const CBioSource &biosource = m_BioSource[i].second->GetSource();
603  string suggestion;
604  string rank = GetRank(reply);
605 
606  bool is_species_specific = IsSpeciesSpecific(biosource);
607  if (rank == "species")
608  {
609  if ( is_species_specific )
610  {
611  suggestion = GetSuggestion(reply);
613  }
614  else if (NStr::FindNoCase(name," ") != NPOS)
615  {
616  string tmp,name2;
617  NStr::SplitInTwo(name," ",name2,tmp);
618  name = name2;
619  CRef<CT3Reply> reply2 = GetReply(biosource,name);
620  if (!reply2)
621  return suggestion;
622  return TryRankFix(reply2,i,name);
623  }
624  }
625  else
626  {
627  if ( is_species_specific)
628  {
630  }
631  if (rank == "genus")
632  {
633  if (CompareOrgnameLineage(reply,"archaea") || CompareOrgnameLineage(reply,"bacteria"))
634  suggestion = MakeUnculturedName(GetSuggestion(reply), " sp.");
635  else if (CompareOrgnameLineage(reply," Fungi;"))
636  suggestion = MakeUnculturedName(GetSuggestion(reply));
637  }
638  else
639  {
640  if (CompareOrgnameLineage(reply,"archaea"))
641  suggestion = MakeUnculturedName (GetSuggestion(reply), " archaeon");
642  else if (CompareOrgnameLineage(reply,"bacteria"))
643  suggestion = MakeUnculturedName(GetSuggestion(reply), " bacterium");
644  else if (CompareOrgnameLineage(reply," Fungi;"))
645  suggestion = MakeUnculturedName(GetSuggestion(reply));
646  }
647  }
648  return suggestion;
649 }
650 
652 {
654  set<string> submit;
655  for (unsigned int i=0; i<m_BioSource.size(); i++)
656  {
657  const auto& bsrc = m_BioSource[i].second->GetSource();
658  if (bsrc.IsSetTaxname() && NStr::StartsWith(bsrc.GetTaxname(), "uncultured", NStr::eNocase)) {
659  submit.insert(bsrc.GetTaxname());
660  }
661 
662  string standard_taxname = StandardFixes(bsrc);
663  if (standard_taxname.empty())
664  continue;
665 
666  submit.insert(standard_taxname);
667  submit.insert(MakeUnculturedName(standard_taxname," bacterium"));
668  submit.insert(MakeUnculturedName(standard_taxname," archaeon"));
669  submit.insert(MakeUnculturedName(standard_taxname, " sp."));
670  submit.insert(MakeUnculturedName(standard_taxname));
671  if (NStr::FindNoCase(standard_taxname," ") != NPOS)
672  {
673  string tmp,name2;
674  NStr::SplitInTwo(standard_taxname," ",name2,tmp);
675  standard_taxname = name2;
676  submit.insert(standard_taxname);
677  submit.insert(MakeUnculturedName(standard_taxname," bacterium"));
678  submit.insert(MakeUnculturedName(standard_taxname," archaeon"));
679  submit.insert(MakeUnculturedName(standard_taxname, " sp."));
680  submit.insert(MakeUnculturedName(standard_taxname));
681  }
682  }
683 
684 
685  vector<CRef<COrg_ref> > rq_list;
686  for (set<string>::iterator name = submit.begin(); name != submit.end(); ++name)
687  {
688  CRef<COrg_ref> org(new COrg_ref());
689  org->SetTaxname(*name);
690  rq_list.push_back(org);
691  }
692  CTaxon3 taxon3;
693  taxon3.Init();
694  CRef<CTaxon3_reply> reply;
695  reply = taxon3.SendOrgRefList(rq_list);
696  if (reply->IsSetReply())
697  {
698  unsigned int i=0;
699  for (CTaxon3_reply::TReply::const_iterator reply_it = reply->GetReply().begin(); reply_it != reply->GetReply().end(); ++reply_it)
700  {
701  // store the errors as well
702  m_ReplyCache[rq_list[i]->GetTaxname()] = *reply_it;
703  i++;
704  }
705  }
706 }
707 
708 void CUnculTaxTool::LookupSuggestions(vector<string> &suggestions, vector<bool> &verified)
709 {
710  PreloadCache();
711  suggestions.clear();
712  suggestions.resize(m_BioSource.size());
713  verified.clear();
714  verified.resize(m_BioSource.size(), true);
715  for (unsigned int i=0; i<m_BioSource.size(); i++)
716  {
717  const CBioSource& bsrc = m_BioSource[i].second->GetSource();
718  const string& orig_taxname = (bsrc.IsSetTaxname()) ? bsrc.GetTaxname() : kEmptyStr;
719  if (NStr::StartsWith(orig_taxname, "uncultured", NStr::eNocase)) {
720  CRef<CT3Reply> reply_orig = GetReply(bsrc, orig_taxname);
721  if (reply_orig && reply_orig->IsData()) {
722  suggestions[i] = reply_orig->GetData().GetOrg().GetTaxname();
723  continue;
724  }
725  }
726 
727  string standard_taxname = StandardFixes(bsrc);
728  if (standard_taxname.empty())
729  continue;
730  CRef<CT3Reply> reply = GetReply(bsrc, standard_taxname);
731  if (!reply)
732  continue;
733  if (IsAmbiguous(reply))
734  {
735  suggestions[i] = MakeUnculturedName(standard_taxname," bacterium");
736  if (CheckSuggestedFix(bsrc, suggestions[i]))
737  continue;
738  suggestions[i] = MakeUnculturedName(standard_taxname," archaeon");
739  if (CheckSuggestedFix(bsrc, suggestions[i]))
740  continue;
741  }
742 
743  suggestions[i] = TryRankFix(reply,i,standard_taxname);
744  if (CheckSuggestedFix(bsrc, suggestions[i]))
745  continue;
746 
747  suggestions[i] = MakeUnculturedName(standard_taxname);
748  if (CheckSuggestedFix(bsrc, suggestions[i]))
749  continue;
750  suggestions[i] = MakeUnculturedName(standard_taxname, " sp.");
751  if (CheckSuggestedFix(bsrc, suggestions[i]))
752  continue;
753  suggestions[i] = MakeUnculturedName(standard_taxname);
754  verified[i] = CheckSuggestedFix(bsrc, suggestions[i]);
755  }
756 }
757 
758 static const char* s_UntrimmableWords[] = { "sp.", "cf.", "aff.", "bacterium", "archaeon", NULL };
759 
760 void CUnculTaxTool::TrimSuggestions(wxCommandEvent& event )
761 {
762  CRef<CSeq_table> values_table = m_GridPanel->GetValuesTable();
763 
764  for (unsigned int i = 0; i < values_table->GetColumn("suggested").GetData().GetString().size(); i++)
765  {
766  string suggested = values_table->GetColumn("suggested").GetData().GetString()[i];
767  if (suggested.empty())
768  continue;
769  auto pos = NStr::FindNoCase(suggested," ");
770  if ( pos != NPOS && pos !=0 && !NStr::StartsWith(suggested,"uncultured "))
771  {
772  bool no_fix = false;
773  for (int j = 0; s_UntrimmableWords[j] != NULL && !no_fix; j++)
774  if ( NStr::EqualNocase(suggested,pos + 1, suggested.length()-pos-1,s_UntrimmableWords[j]))
775  no_fix = true;
776  if (!no_fix)
777  values_table->SetColumns()[3]->SetData().SetString()[i] = suggested.substr(0,pos);
778  }
779  }
780  int glyph_col = GetCollapsible();
781  if (glyph_col >= 0 && glyph_col+2 < m_Grid->GetNumberCols())
782  {
783  m_GridPanel->InitColumnCollapse(glyph_col+2);
784  }
785 }
786 
787 static const char* sUnfixable[] = {
788  "rickettsia",
789  "candidatus",
790  "endosymbiont",
791  "phytoplasma",
792  "wolbachia",
793  NULL
794 };
795 
796 
797 bool CUnculTaxTool::OkToTaxFix(const string& taxname)
798 {
799  int i=0;
800  while (sUnfixable[i])
801  {
802  if (NStr::FindNoCase(taxname,sUnfixable[i]) != NPOS)
803  return false;
804  i++;
805  }
806  return true;
807 }
808 
809 void CUnculTaxTool::AddBioSource(const CSeq_entry& seq, const CSeqdesc& desc) //const CBioSource& biosource)
810 {
811  const CBioSource& biosource = desc.GetSource();
812  if (biosource.IsSetTaxname() && !biosource.GetTaxname().empty() && OkToTaxFix(biosource.GetTaxname()) )
813  {
814  bool taxon_present = false;
815  if (biosource.IsSetOrg() && biosource.GetOrg().IsSetDb())
816  {
817  FOR_EACH_DBXREF_ON_ORGREF(db,biosource.GetOrg())
818  if ((*db)->IsSetDb() && NStr::Equal((*db)->GetDb(),"taxon",NStr::eNocase))
819  {
820  taxon_present = true;
821  break;
822  }
823  }
824  bool begins_with_uncultured = NStr::StartsWith(biosource.GetTaxname(), "uncultured", NStr::eNocase);
825 
826  if (!taxon_present || !begins_with_uncultured)
827  m_BioSource.emplace_back(ConstRef(&seq), ConstRef(&desc));
828  }
829 }
830 
832 {
834  if ((*it)->IsSource()) {
835  AddBioSource(se, **it);
836  }
837  }
838 
839  if (se.IsSet()) {
841  GetDesc (**it);
842  }
843  }
844 }
845 
846 
848 {
849  m_BioSource.clear();
852  if (!tse)
853  return;
854  GetDesc (*(tse.GetCompleteSeq_entry()));
855 }
856 
858 {
859  int col = -1;
860  for (int i=0; i < m_Grid->GetNumberCols(); i++)
861  if (m_Grid->GetColLabelValue(i) == _("Suggested Correction"))
862  {
863  col = i;
864  break;
865  }
866  return col;
867 }
868 
869 void CUnculTaxTool::OnAddBacterium( wxCommandEvent& event )
870 {
871  int col = GetColumn();
872  if (col == -1)
873  return;
874 
875  for (int i=0; i<m_Grid->GetNumberRows(); i++)
876  {
877  wxString value = m_Grid->GetCellValue(i,col);
878  m_Grid->SetCellValue(i,col,value+" bacterium");
879  }
880 }
881 
882 void CUnculTaxTool::OnAddSp( wxCommandEvent& event )
883 {
884  int col = GetColumn();
885  if (col == -1)
886  return;
887 
888  for (int i=0; i<m_Grid->GetNumberRows(); i++)
889  {
890  wxString value = m_Grid->GetCellValue(i,col);
891  m_Grid->SetCellValue(i,col,value+" sp.");
892  }
893 }
894 
895 void CUnculTaxTool::OnApply( wxCommandEvent& event )
896 {
897  if (!m_TopSeqEntry) return;
899  if (srv)
900  {
901  CRef<CGBWorkspace> ws = srv->GetGBWorkspace();
902  if (ws)
903  {
904  CGBDocument* doc = dynamic_cast<CGBDocument*>(ws->GetProjectFromScope(m_TopSeqEntry.GetScope()));
905  if (doc)
906  {
907  ICommandProccessor* cmdProccessor = &doc->GetUndoManager();
909  if (cmd)
910  {
912  {
913  SUnindexedObject& sobj = *iter;
914  sobj.actual_obj->Assign(*sobj.new_obj);
915  }
916  CRef<CCmdComposite> cleanup = CleanupCommand (m_TopSeqEntry, true, false); // extended, do_tax
918  {
919  SUnindexedObject& sobj = *iter;
920  sobj.actual_obj->Assign(*sobj.orig_obj);
921  }
922  if (cleanup)
923  {
924  cmd->AddCommand(*cleanup);
925  }
926  cmdProccessor->Execute(cmd);
927  }
928  }
929  }
930  }
931 }
932 
933 void CUnculTaxTool::OnCancel( wxCommandEvent& event )
934 {
935  Close();
936 }
937 
938 
939 void CUnculTaxTool::OnRefreshBtn( wxCommandEvent& event )
940 {
941  m_TopSeqEntry.Reset();
942  if (!m_Workbench) return;
943 
946  if (sel_srv)
947  {
948  sel_srv->GetActiveObjects(objects);
949  }
950  if (objects.empty())
951  {
953  }
954 
957  if (seh) {
958  m_TopSeqEntry = seh;
959  }
960  }
961 
963 
965  m_GridPanel->SetValuesTable(values_table);
967  CSeqTableGrid *gridAdapter = new CSeqTableGrid(values_table);
968  m_Grid->SetTable(gridAdapter, true);
969  m_Grid->AutoSizeColumns();
970  if (values_table->GetNum_rows() > 0)
971  {
972  int glyph_col = GetCollapsible();
973  if (glyph_col >= 0 && glyph_col+2 < m_Grid->GetNumberCols())
974  {
975  m_GridPanel->InitColumnCollapse(glyph_col+2);
976  }
977  }
978  Refresh();
979 }
980 
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
void AddCommand(IEditCommand &command)
CGBDocument.
Definition: document.hpp:113
CUndoManager & GetUndoManager()
Definition: document.hpp:158
CProjectService - a service providing API for operations with Workspaces and Projects.
void MakeColumnReadOnly(int pos, bool val=true)
wxGrid * GetGrid(void)
void InitColumnCollapse(int col)
void SetValuesTable(CRef< objects::CSeq_table > table)
CRef< objects::CSeq_table > GetValuesTable()
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
const CSeqTable_column & GetColumn(CTempString column_name) const
Definition: Seq_table.cpp:65
virtual CRef< CTaxon3_reply > SendOrgRefList(const vector< CRef< COrg_ref > > &list, COrg_ref::fOrgref_parts result_parts=COrg_ref::eOrgref_default, fT3reply_parts t3result_parts=eT3reply_default)
Definition: taxon3.cpp:190
virtual void Init()
Definition: taxon3.cpp:74
void ConvertSpeciesSpecificNote(CBioSource &biosource, const string &new_note)
CRef< CCmdComposite > GetCommandFromValuesTable(CRef< CSeq_table > values_table)
void OnAddSp(wxCommandEvent &event)
vector< pair< CConstRef< objects::CSeq_entry >, CConstRef< objects::CSeqdesc > > > m_BioSource
string MakeUnculturedName(const string &taxname, const string suffix="")
void OnApply(wxCommandEvent &event)
IWorkbench * m_Workbench
void OnCancel(wxCommandEvent &event)
TUnindexedObjects m_UnindexedObjects
CRef< objects::CSeq_table > GetValuesTableFromSeqEntry()
bool CompareOrgnameLineage(CRef< CT3Reply > reply, const string &lineage)
bool CheckSuggestedFix(const CBioSource &biosource, string &suggestion)
void Init()
Initialises member variables.
void OnAddBacterium(wxCommandEvent &event)
CRef< CT3Reply > GetReply(const CBioSource &biosource, const string &standard_taxname)
bool IsReadOnlyColumn(string column_name)
bool OkToTaxFix(const string &taxname)
void GetDesc(const CSeq_entry &se)
wxIcon GetIconResource(const wxString &name)
Retrieves icon resources.
bool IsAmbiguous(CRef< CT3Reply > reply)
set< unsigned int > m_RemoveNote
set< unsigned int > m_ConvertNote
void CreateControls()
Creates the controls and sizers.
string GetRank(CRef< CT3Reply > reply)
void FindBioSource(objects::CSeq_entry_Handle tse)
CUnculTaxTool()
Constructors.
CRef< CCmdComposite > GetCommand()
string StandardFixes(const CBioSource &biosource)
bool Create(wxWindow *parent, wxWindowID id=11300, const wxString &caption=_("Uncultured Taxonomy Tool"), const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(1400, 840), long style=wxMINIMIZE_BOX|wxMAXIMIZE_BOX|wxCAPTION|wxRESIZE_BORDER|wxSYSTEM_MENU|wxCLOSE_BOX|wxTAB_TRAVERSAL)
Creation.
void AddBioSource(const objects::CSeq_entry &seq, const objects::CSeqdesc &desc)
CRef< objects::CSeq_table > GetChoices(CRef< objects::CSeq_table > values_table)
wxBitmap GetBitmapResource(const wxString &name)
Retrieves bitmap resources.
void TrimSuggestions(wxCommandEvent &event)
vector< SUnindexedObject > TUnindexedObjects
bool IsSpeciesSpecific(const CBioSource &biosource)
static bool ShowToolTips()
Should we show tooltips?
void LookupSuggestions(vector< string > &suggestions, vector< bool > &verified)
string TryRankFix(CRef< CT3Reply > reply, unsigned int i, string &name)
objects::CSeq_entry_Handle m_TopSeqEntry
~CUnculTaxTool()
Destructor.
map< string, CRef< CT3Reply > > m_ReplyCache
string GetSuggestion(CRef< CT3Reply > reply)
void OnRefreshBtn(wxCommandEvent &event)
CSeqTableGridPanel * m_GridPanel
Undo/Redo interface for editing operations.
virtual void Execute(IEditCommand *command, wxWindow *window=0)=0
IWorkbench is the central interface in the application framework.
Definition: workbench.hpp:113
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
void clear()
Definition: set.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
char value[7]
Definition: config.c:431
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static void cleanup(void)
Definition: ct_dynamic.c:30
#define _(proto)
Definition: ct_nlmzip_i.h:78
static void Init(void)
Definition: cursor6.c:76
#define wxFIXED_MINSIZE
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
const char * kSequenceIdColLabel
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service.
Definition: service.hpp:91
void CleanupForTaxnameChange(CObjectInfo oi)
Definition: macro_util.cpp:526
objects::CSeq_entry_Handle GetTopSeqEntryFromScopedObject(SConstScopedObject &obj)
Definition: utils.cpp:2542
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
const TLineage & GetLineage(void) const
Get the Lineage member data.
Definition: OrgName_.hpp:864
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
Definition: OrgName_.hpp:852
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
const TColumns & GetColumns(void) const
Get the Columns member data.
Definition: Seq_table_.hpp:433
void SetHeader(THeader &value)
Assign a value to Header data member.
vector< CRef< CSeqTable_column > > TColumns
Definition: Seq_table_.hpp:92
void SetData(TData &value)
Assign a value to Data data member.
TNum_rows GetNum_rows(void) const
Get the Num_rows member data.
Definition: Seq_table_.hpp:393
const TString & GetString(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
TColumns & SetColumns(void)
Assign a value to Columns data member.
Definition: Seq_table_.hpp:439
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
TSource & SetSource(void)
Select the variant.
Definition: Seqdesc_.cpp:572
bool IsData(void) const
Check if variant Data is selected.
Definition: T3Reply_.hpp:263
const TData & GetData(void) const
Get the variant data.
Definition: T3Reply_.cpp:124
bool IsSetStatus(void) const
Check if a value has been assigned to Status data member.
Definition: T3Data_.hpp:328
const TStatus & GetStatus(void) const
Get the Status member data.
Definition: T3Data_.hpp:340
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: T3Data_.hpp:273
bool IsError(void) const
Check if variant Error is selected.
Definition: T3Reply_.hpp:257
const TError & GetError(void) const
Get the variant data.
Definition: T3Reply_.cpp:102
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: T3Data_.hpp:285
const TMessage & GetMessage(void) const
Get the Message member data.
Definition: T3Error_.hpp:394
bool IsSetMessage(void) const
Check if a value has been assigned to Message data member.
Definition: T3Error_.hpp:382
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
END_EVENT_TABLE()
int i
void GetViewObjects(IWorkbench *workbench, TConstScopedObjects &objects)
#define wxT(x)
Definition: muParser.cpp:41
void ReportUsage(const wxString &dialog_name)
Report opening & accepting events in the editing package.
const struct ncbi::grid::netcache::search::fields::SIZE size
static char tmp[2048]
Definition: utf8.c:42
static const char * suffix[]
Definition: pcregrep.c:408
#define FOR_EACH_DBXREF_ON_ORGREF(Itr, Var)
FOR_EACH_DBXREF_ON_ORGREF EDIT_EACH_DBXREF_ON_ORGREF.
static static static wxID_ANY
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FOR_EACH_SEQDESC_ON_SEQENTRY(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQENTRY EDIT_EACH_SEQDESC_ON_SEQENTRY.
CConstRef< objects::CBioSource > orig_obj
CConstRef< objects::CBioSource > new_obj
CRef< objects::CBioSource > actual_obj
else result
Definition: token2.c:20
static const char * sUnfixable[]
static const char * sAmplifiedSpeciesSpecific[]
USING_SCOPE(ncbi::objects)
static const char * s_UntrimmableWords[]
#define ID_ADD_BACTERIUM_UNCUL_TAXTOOL
#define ID_APPLY_BTN
#define ID_ADD_SP_UNCUL_TAXTOOL
#define ID_REFRESH_BTN
#define ID_CANCEL_BTN
#define ID_TRIM_BTN
CRef< CCmdComposite > CleanupCommand(objects::CSeq_entry_Handle orig_seh, bool extended, bool do_tax)
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
Modified on Wed Mar 27 11:25:14 2024 by modify_doxy.py rev. 669887