NCBI C++ ToolKit
entrez_search_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: entrez_search_tool.cpp 39744 2017-10-31 21:12:13Z katargir $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "entrez_search_tool.hpp"
36 
37 #include <gui/objutils/label.hpp>
38 
41 
43 
45 
46 #include <wx/sizer.h>
47 #include <wx/stattext.h>
48 #include <wx/choice.h>
49 #include <wx/srchctrl.h>
50 
52 
55 
56 #define ID_COMBOBOX 11003
57 #define ID_TEXT 11414
58 
59 ///////////////////////////////////////////////////////////////////////////////
60 /// CDocsumTableModel
61 
62 typedef pair<string,string> TStringPair;
63 
65 {
66 public:
67  CDocsumTableModel( const string& aDbName );
68 
69  virtual int GetNumExtraColumns() const;
70  virtual wxString GetExtraColumnName( int col ) const;
71  virtual wxVariant GetExtraValueAt( int row, int col ) const;
72 
73  virtual wxString GetImageAlias( int row ) const;
74 
75 protected:
76  string m_DbName;
77 
78  /// maps doc summary field names to columns
79  vector<TStringPair> m_Fields;
80 };
81 
82 
83 ///////////////////////////////////////////////////////////////////////////////
84 /// IDMSearchTool
86 {
87 }
88 
89 
91 {
92  return new CEntrezSearchTool();
93 }
94 
95 
97 {
98  static string s_name("Search NCBI Public Databases");
99  return s_name;
100 }
101 
102 
104 {
105  return "";
106 }
107 
108 
110 {
111  CIRef<IDMSearchForm> form(new CEntrezSearchForm(*this));
112  return form;
113 }
114 
115 
117 {
118  return false;
119 }
120 
121 
123 {
125  CEntrezSearchQuery* e_query = dynamic_cast<CEntrezSearchQuery*>(&query);
126  if(e_query) {
127  job.Reset(new CEntrezSearchJob(*e_query));
128  }
129  return job;
130 }
131 
133 {
134  return "search_tool::entrez_search_tool";
135 }
136 
138 {
139  return "Datamining Tool - Entrez Search search";
140 }
141 
142 
143 ///////////////////////////////////////////////////////////////////////////////
144 /// CEntrezSearchTool
145 CEntrezSearchQuery::CEntrezSearchQuery(const string& terms, const string& db_name)
146 : m_Terms(terms),
147  m_DbName(db_name)
148 {
149 }
150 
151 
152 ///////////////////////////////////////////////////////////////////////////////
153 /// CEntrezSearchForm
154 
156 : m_Tool(&tool),
157  m_CurrDbName("Entrez Gene"),
158  m_DbCombo(NULL)
159 {
160 }
161 
162 
164 {
165 }
166 
167 
169 {
171 
172  m_DbNames.clear();
174  m_CurrDbName = m_DbNames.begin()->second;
175 }
176 
177 
178 
179 static const char* kDatabaseTag = "Database";
180 
181 
183 {
185 }
186 
187 
189 {
190  if (m_DbCombo)
191  view.Set(kDatabaseTag, ToStdString( m_DbCombo->GetStringSelection() ));
192 }
193 
194 
195 wxSizer* CEntrezSearchForm::GetWidget(wxWindow * parent)
196 {
197  if ( !m_Sizer) {
198  wxFlexGridSizer * sz = new wxFlexGridSizer(1, 3, 0, 0);
199  sz->AddGrowableCol(2);
200  m_Sizer = sz;
201 
202  m_Sizer->Add(new wxStaticText( parent, wxID_STATIC,
203  wxT("Select NCBI Database:"),
204  wxDefaultPosition, wxDefaultSize, 0 ),
205  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
206 
207  m_DbCombo = new wxChoice(parent, ID_COMBOBOX,
208  wxDefaultPosition, wxDefaultSize,
209  0, (const wxString*)NULL);
210 
211  m_Sizer->Add(m_DbCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
212 
213  m_Text = new CSearchControl(parent, ID_TEXT, wxT(""),
214  wxDefaultPosition, wxDefaultSize,
215  wxTE_PROCESS_ENTER );
216  m_Sizer->Add(m_Text,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
217  //m_Text->SetAutoOff();
218  }
219  return m_Sizer;
220 }
221 
223 {
224  m_DbCombo->Clear();
225 
226  m_DbNames.clear();
229  m_DbCombo->Append(ToWxString(it->second), (void*)it->first.c_str());
230  }
231 
232  if ( !m_CurrDbName.empty() ) {
233  m_DbCombo->SetStringSelection(ToWxString(m_CurrDbName));
234  int sel = m_DbCombo->GetSelection();
235  if (sel == wxNOT_FOUND) {
236  m_DbCombo->Select(0);
237  }
238  } else {
239  m_DbCombo->Select(0);
240  }
241  m_CurrDbName = ToStdString(m_DbCombo->GetStringSelection());
242 }
243 
245 {
247  // context - independent
249 }
250 
251 
253 {
254  const char * dbname =
255  (const char*)m_DbCombo->GetClientData(m_DbCombo->GetSelection());
256 
257  string q_s = ToStdString(m_Text->GetValue());
259  return ref;
260 }
261 
262 
263 ///////////////////////////////////////////////////////////////////////////////
264 /// CEntrezSearchJob
265 
267 : m_Query(&query)
268 {
269  string vis_db_name = CEntrezDB::GetVisibleName(m_Query->GetDbName());
270  m_Descr = "Query: " + m_Query->GetTerms()
271  + ", database = " + vis_db_name;
272 }
273 
274 
276 {
277  if(m_Query->GetTerms().empty()) {
278  m_Error = new CAppJobError("Invalid input parameters - no search terms specified.");
279  return false;
280  }
281  return true;
282 }
283 
284 
285 static int kMaxResults = 1000;
286 
288 {
289  // prepare search params and search
290  string terms(m_Query->GetTerms());
291  string db_name(m_Query->GetDbName());
292  bool assemblyDB(db_name == "assembly");
293  size_t total_uids = 0;
294 
295  xml::document docsums;
296  CEntrezDB::Query(db_name, terms, total_uids, docsums, kMaxResults);
297  m_ResultsCount = (int)total_uids;
299 
300 
302  CRef<CScope> scope(new CScope(*om));
303  scope->AddDefaults();
304 
305  if(total_uids && !IsCanceled()) {
306  // process results
307  CMutexGuard Guard(m_Mutex);
308 
309  CObjectList * obj_list = m_TempResult->GetObjectList();
310 
311  xml::node_set nodes ( docsums.get_root_node().run_xpath_query("//DocumentSummary") );
312  NON_CONST_ITERATE(xml::node_set, it, nodes) {
313  if (assemblyDB)
314  SetReleaseType(*it);
315  obj_list->AddRow(new CXmlNodeObject(*it, db_name), scope.GetPointer());
316  if(IsCanceled()) {
317  return eCanceled;
318  }
319  }
320  return eCompleted;
321  }
322  return eCanceled;
323 }
324 
326 {
327  return new CDocsumTableModel( m_Query->GetDbName() );
328 }
329 
331 {
332  xml::node::const_iterator itAccession = ds.find("AssemblyAccession");
333  if (itAccession == ds.end())
334  return;
335  if (NPOS != NStr::Find(itAccession->get_content(), "GCF_")) { // RefSeq Accession
336  xml::node releaseType("ReleaseType", "RefSeq");
337  ds.insert(releaseType);
338  }
339  else { // GenBank Accession
340  xml::node releaseType("ReleaseType", "GenBank");
341  ds.insert(releaseType);
342  }
343 }
344 
345 ///////////////////////////////////////////////////////////////////////////////
346 /// CDocsumTableModel
347 
348 static const TStringPair skGeneralFields[] = {
349  TStringPair("AccessionVersion", "Label"),
350  TStringPair("Title", "Description"),
351  TStringPair("Extra", "FASTA IDs"),
352  TStringPair("TaxId", "Taxonomic ID")
353 };
354 
355 static const TStringPair skAssemblyFields[] = {
356  TStringPair("AssemblyName", "Name"),
357  TStringPair("AssemblyAccession", "Accession"),
358  TStringPair("Organism", "Organism"),
359  TStringPair("AssemblyDescription", "Description"),
360  TStringPair("AssemblyClass", "Class"),
361  TStringPair("ReleaseType", "Release Type"),
362  TStringPair("NCBIReleaseDate", "Release Date"),
363 };
364 
365 static const TStringPair skGeneFields[] = {
366  TStringPair("Name", "Label"),
367  TStringPair("Description", "Description"),
368  // The path to the ScientificName (it is nested in Organism node)
369  TStringPair("Organism/ScientificName", "Organism"),
370  TStringPair("Chromosome", "Chromosome"),
371  TStringPair("OtherAliases", "Aliases"),
372  TStringPair("MapLocation", "Map Location")
373 };
374 
375 static const TStringPair skGenomeFields[] = {
376  TStringPair("Organism_Name", "Name"),
377  TStringPair("Organism_Kingdom", "Kingdom"),
378  TStringPair("Organism_Group", "Group"),
379  TStringPair("Organism_Subgroup", "Subgroup"),
380  TStringPair("Defline", "Defline"),
381  TStringPair("Assembly_Name", "Assembly Name"),
382  TStringPair("Assembly_Accession", "Assembly Accession")
383 };
384 
385 CDocsumTableModel::CDocsumTableModel( const string& aDbName )
386 : m_DbName( aDbName )
387 {
388  const TStringPair* ptr;
389  int size;
390 
391  if( m_DbName == "gene" ){
392  ptr = &skGeneFields[0];
393  size = sizeof(skGeneFields);
394  }
395  else if( m_DbName == "genome" ){
396  ptr = &skGenomeFields[0];
397  size = sizeof(skGenomeFields);
398  }
399  else if (m_DbName == "assembly"){
400  ptr = &skAssemblyFields[0];
401  size = sizeof(skAssemblyFields);
402  }
403  else {
404  ptr = &skGeneralFields[0];
405  size = sizeof(skGeneralFields);
406  }
407 
408  int num = size /sizeof(TStringPair);
409  for( int i = 0; ptr && i < num; i++ ){
410  m_Fields.push_back( *(ptr +i) );
411  }
412 }
413 
415 {
416  return (int)m_Fields.size();
417 }
418 
419 wxString CDocsumTableModel::GetExtraColumnName( int col ) const
420 {
421  if( col < 0 || col >= GetNumExtraColumns() ){
422  _ASSERT(false);
423  NCBI_THROW(CException, eUnknown, "Invalid extra column index");
424  }
425  return ToWxString(m_Fields[col].second);
426 }
427 
428 wxVariant CDocsumTableModel::GetExtraValueAt( int row, int col ) const
429 {
430  if( col < 0 || col >= GetNumExtraColumns() ){
431  _ASSERT(false);
432  NCBI_THROW(CException, eUnknown, "Invalid extra column index");
433  }
434 
435  const CObject* obj = m_ObjectList->GetObject( row );
436  const CXmlNodeObject* doc_sum = dynamic_cast<const CXmlNodeObject*>(obj);
437  if( !doc_sum )
438  return ToWxString( "" );
439 
440  const string& field = m_Fields[col].first;
441  xml::node::const_iterator field_node = doc_sum->GetNode().find(field.c_str());
442  if ((field_node == doc_sum->GetNode().end())) {
443  do {
444  if ((string::npos == field.find('/')))
445  break;
446 
447  // Search for a path (unfortunately XPath queries are supportted only for the root node)
448  vector<string> nodes;
449  NStr::Split(field, "/", nodes);
450  const xml::node* parent = &(doc_sum->GetNode());
451  size_t count = nodes.size();
452  size_t i;
453  for (i = 0; parent && (i<count); ++i) {
454  field_node = parent->find(nodes[i].c_str());
455  if ((field_node == parent->end()))
456  break;
457  parent = &(*field_node);
458  }
459 
460  if (!parent || (i != count))
461  break;
462 
463  return ToWxString(parent->get_content());
464  }
465  while (false);
466  return ToWxString("");
467  }
468 
469  return ToWxString( field_node->get_content() );
470 }
471 
472 wxString CDocsumTableModel::GetImageAlias( int row ) const
473 {
474  if( m_DbName == "gene" ) return wxT("symbol::feature");
475  if( m_DbName == "protein" ) return wxT("symbol::sequence_protein");
476  if( m_DbName == "nucleotide" ) return wxT("symbol::sequence_dna");
477  if( m_DbName == "assembly" ) return wxT("symbol::sequence");
478 
479  return wxT("");
480 }
481 
482 
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CObjectList * GetObjectList()
virtual wxString GetExtraColumnName(int col) const
virtual int GetNumExtraColumns() const
virtual wxVariant GetExtraValueAt(int row, int col) const
CDocsumTableModel(const string &aDbName)
vector< TStringPair > m_Fields
maps doc summary field names to columns
virtual wxString GetImageAlias(int row) const
static void GetDbNames(vector< string > &names)
static string GetVisibleName(const string &db_name)
static void Query(const string &db_name, const string &terms, size_t &total_uids, xml::document &docsums, size_t max_return=0)
CEntrezSearchJob.
CEntrezSearchTool.
CEntrezSearchTool.
CObjectListTableModel.
CRef< CObjectList > m_ObjectList
CObjectList Data structure representing a list of CObjects with associated Scopes and other optional ...
Definition: object_list.hpp:63
int AddRow(CObject *obj, objects::CScope *scope)
CObject * GetObject(int row)
access to values (row, column)
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
class CRegistryReadView provides a nested hierarchical view at a particular key.
Definition: reg_view.hpp:58
string GetString(const string &key, const string &default_val=kEmptyStr) const
Definition: reg_view.cpp:246
void Set(const string &key, int val)
access a named key at this level, with no recursion
Definition: reg_view.cpp:533
CScope –.
Definition: scope.hpp:92
CSearchControl.
IDMSearchFormController * m_Controller
virtual void UpdateContexts()
updates m_ContextCombo
CSearchControl * m_Text
int m_MaxResultsCount
Max possible results count.
CRef< CDMSearchResult > m_TempResult
holds temporary results, guarded by Mutex
string m_Descr
human-readable description of the Job
int m_ResultsCount
total number of results
CMutex m_Mutex
synchronizes access to the Job members
CRef< CAppJobError > m_Error
const xml::node & GetNode() const
virtual void OnSearchEnabled(bool)
virtual void Init()
IDMSearchQuery - abstract data mining query.
IDataMiningContext IDataMiningContext represents an abstract context for a Search.
IUITool represents an abstract algorithm that is bound to a UI component.
Definition: ui_tool.hpp:59
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition: document.hpp:80
const node & get_root_node(void) const
Get a reference to the root node of this document.
Definition: document.cpp:539
The xml::node::const_iterator provides a way to access children nodes similar to a standard C++ conta...
Definition: node.hpp:746
The xml::node_set class is used to store xpath query result set.
Definition: node_set.hpp:68
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
iterator find(const char *name, const ns *nspace=NULL)
Find the first child node that has the given name and namespace.
Definition: node.cpp:1258
node_set run_xpath_query(const xpath_expression &expr)
Run the given XPath query.
Definition: node.cpp:1292
const char * get_content(void) const
Get the content for this text node.
Definition: node.cpp:797
iterator insert(const node &n)
Insert a new child node.
Definition: node.cpp:1463
size_type size(void) const
Returns the number of childer this nodes has.
Definition: node.cpp:1199
pair< string, string > TStringPair
CDocsumTableModel.
USING_SCOPE(objects)
static const TStringPair skGeneralFields[]
CDocsumTableModel.
static const TStringPair skGenomeFields[]
static const TStringPair skAssemblyFields[]
#define ID_TEXT
static int kMaxResults
static const char * kDatabaseTag
static const TStringPair skGeneFields[]
#define ID_COMBOBOX
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual CObjectListTableModel * x_GetNewOLTModel() const
factory method creating new column handler for CObjectListWidget
CEntrezSearchQuery(const string &terms, const string &db_name)
CEntrezSearchTool.
virtual string GetDescription() const
returns a detailed description of the method that is used in UI
vector< TStrPair > TNamePairs
virtual void x_LoadSettings(const CRegistryReadView &view)
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
string m_CurrDbName
Entrez db names.
string GetDbName() const
virtual CIRef< IDMSearchForm > CreateSearchForm()
factory method for creating a form representing the tool
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
CEntrezSearchTool()
IDMSearchTool.
CRef< CEntrezSearchQuery > m_Query
virtual wxSizer * GetWidget(wxWindow *parent)
return a widget associated with the form; the form controls the lifetime of the widget (do not delete...
wxChoice * m_DbCombo
techical name
virtual CIRef< IDMSearchQuery > ConstructQuery()
CEntrezSearchJob(CEntrezSearchQuery &query)
CEntrezSearchJob.
virtual void UpdateContexts()
updates m_ContextCombo
void SetReleaseType(xml::node &ds)
Adds an additional child node, indicating the release type (RefSeq or GenBank)
virtual IUITool * Clone() const
virtual bool IsCompatible(IDataMiningContext *context)
retuns true if the tool is compatible with the provided Search Context
virtual CRef< CSearchJobBase > x_CreateJob(IDMSearchQuery &query)
implementing CSearchToolBase pure virtual function
virtual string GetName() const
returns unique name of the method that is used in UI to identify it
virtual EJobState x_DoSearch()
performs searching, assuming that params are correct; Implement in derived classes
virtual void x_SaveSettings(CRegistryWriteView view) const
virtual bool x_ValidateParams()
returns true if Job params are correct, implement in derived classes
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
#define wxT(x)
Definition: muParser.cpp:41
const struct ncbi::grid::netcache::search::fields::SIZE size
The Object manager core.
CRef< objects::CObjectManager > om
static string query
#define _ASSERT
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
string ToStdString(const wxString &s)
Definition: wx_utils.hpp:161
Modified on Sat Dec 09 04:50:00 2023 by modify_doxy.py rev. 669887