NCBI C++ ToolKit
component_search_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: component_search_tool.cpp 36583 2016-10-12 15:11:14Z evgeniev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
36 
37 #include <corelib/ncbitime.hpp>
38 
41 
42 #include <gui/objutils/label.hpp>
43 #include <gui/objutils/utils.hpp>
45 
47 
48 #include <objmgr/util/sequence.hpp>
49 #include <objmgr/seq_map_ci.hpp>
50 
52 
53 #include <wx/sizer.h>
54 #include <wx/stattext.h>
55 #include <wx/choice.h>
56 #include <wx/srchctrl.h>
57 
58 
59 #define ID_COMBOBOX 10003
60 #define ID_TEXT 11414
61 
62 
65 
66 ///////////////////////////////////////////////////////////////////////////////
67 /// IDMSearchTool
69 {
70 }
71 
72 
74 {
75  static string name("Component Search");
76  return name;
77 }
78 
79 
81 {
82  return new CComponentSearchTool();
83 }
84 
85 
87 {
88  return "";
89 }
90 
91 
93 {
95  return form;
96 }
97 
98 
100 {
101  ISeqLocSearchContext* sl_ctx =
102  dynamic_cast<ISeqLocSearchContext*>(context);
103  return sl_ctx != NULL;
104 }
105 
106 
108 {
110  CComponentSearchQuery* f_query = dynamic_cast<CComponentSearchQuery*>(&query);
111  if(f_query) {
112  job.Reset(new CComponentSearchJob(*f_query));
113  }
114  return job;
115 }
116 
117 
118 
120 {
121  return "search_tool::component_search_tool";
122 }
123 
125 {
126  return "Datamining Tool - Component search";
127 }
128 
129 
130 ///////////////////////////////////////////////////////////////////////////////
131 /// CComponentSearchQuery
132 
134 : CSearchQueryBase(locs),
135  m_Ids(ids)
136 {
137 }
138 
139 
140 ///////////////////////////////////////////////////////////////////////////////
141 /// CComponentSearchForm
142 
144 : m_Tool(&tool)
145 {
146 }
147 
148 
150 {
151  x_CreateWidgets();
152 }
153 
154 
156 {
158 
159 
160 
161  // x_UpdateData(eDDX_Set);
162 }
163 
164 
166 {
168 }
169 
170 
172 {
173 // CSearchFormBase::x_CreateWidgets(2, 3, true);
174 }
175 
176 
178 {
179  return m_Tool.GetPointer();
180 
181 }
182 
183 
184 wxSizer * CComponentSearchForm::GetWidget(wxWindow * parent)
185 {
186  if (!m_Sizer) {
187  wxFlexGridSizer * sz = new wxFlexGridSizer(1, 4, 0, 0);
188  sz->AddGrowableCol(3);
189  m_Sizer = sz;
190 
191  m_Sizer->Add(new wxStaticText( parent, wxID_STATIC, wxT("Search Context:"), wxDefaultPosition, wxDefaultSize, 0 ),
192  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
193 
194  m_DbCombo = new wxChoice(parent, ID_COMBOBOX,
195  wxDefaultPosition, wxDefaultSize,
196  0, (const wxString*)NULL);
197 
198  m_Sizer->Add(m_DbCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
199 
200  m_Sizer->Add(new wxStaticText( parent, wxID_STATIC, wxT("Search Expression:"), wxDefaultPosition, wxDefaultSize, 0 ),
201  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
202 
203  m_Text = new CSearchControl(parent, ID_TEXT, wxT(""), wxDefaultPosition, wxDefaultSize, wxTE_PROCESS_ENTER );
204  m_Sizer->Add(m_Text,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
205 
206  }
207  return m_Sizer;
208 }
209 
211 {
213 }
214 
216 {
217 
219 
220  m_SeqLocContexts.clear();
221  for( size_t i = 0; i < m_Contexts.size(); i++ ) {
222  if (m_DbCombo->GetSelection() != 0 &&
223  m_DbCombo->GetSelection() != int(i + 1)) {
224  continue;
225  }
226  ISeqLocSearchContext* seq_ctx =
227  dynamic_cast<ISeqLocSearchContext*>(m_Contexts[i]);
228  m_SeqLocContexts.push_back(seq_ctx);
229  }
230 
231  _ASSERT(m_SeqLocContexts.size());
232 
233  /// accumulate locations from all selected contexts
234  TScopedLocs sc_locs;
235  for( size_t i = 0; i < m_SeqLocContexts.size(); i++ ) {
237  _ASSERT(sl_ctx);
238 
239  // tereshko: skip context if it can't participate in search anymore (selection loss, etc)
240  if (sl_ctx->GetSearchLoc().IsNull()) continue;
241 
242  CRef<CSeq_loc> loc(new CSeq_loc());
243  if (m_bRange) {
244  loc = sl_ctx->GetSearchLoc();
245  }
246  else {
247  CRef<CSeq_id> id(new CSeq_id());
248  id->Assign(*(sl_ctx->GetSearchLoc()->GetId()));
249  loc->SetWhole(*id);
250  }
251 
253  sl.m_Loc = loc;
254  sl.m_Scope = sl_ctx->GetSearchScope();
255  sl.m_ContextName = sl_ctx->GetDMContextName();
256  sc_locs.push_back(sl);
257  }
258 
259  ref = new CComponentSearchQuery(sc_locs, ToStdString(m_Text->GetValue()));
260  return ref;
261 }
262 
263 
264 ///////////////////////////////////////////////////////////////////////////////
265 /// CComponentSearchJob
266 
268 : m_Query(&query)
269 {
270  string s_locs, id;
271  static string sep(", ");
273  CLabel::GetLabel(*it->m_Loc, &id, CLabel::eDefault,
274  it->m_Scope.GetPointer());
275 
276  if ( !s_locs.empty() ) {
277  s_locs += sep;
278  }
279  s_locs += id;
280  }
281  m_Descr = "Search Components \"" + m_Query->GetIds() + "\" on " + s_locs;
282 }
283 
284 
286 {
287  m_Error.Reset();
288 
289  if( m_Query->GetScopedLocs().empty()) {
290  // this should never happen
291  m_Error = new CAppJobError("Invalid input parameters - search context is undefined.");
292  } else if(m_Query->GetIds().empty()) {
293  m_Error = new CAppJobError("No sequence ids are provided.");
294  }
295  return m_Error ? false : true;
296 }
297 
298 
300 {
301  obj_list.AddColumn(CObjectList::eString, "Component");
302  obj_list.AddColumn(CObjectList::eString, "Context");
303 }
304 
305 
306 //TODO this functions is too long
308 {
309  CStopWatch sw;
310  sw.Start();
311  {
312  CMutexGuard Guard(m_Mutex);
313  m_ProgressStr = "Loading data." ;
314  }
315 
316  TScopedLocs& scoped_locs = m_Query->GetScopedLocs();
317  string ids = m_Query->GetIds();
318 
319  /// for every Seq_loc
320  int count = 0;
321  int found = 0;
322  NON_CONST_ITERATE(TScopedLocs, it_sl, scoped_locs) {
323  if(IsCanceled()) {
324  break;
325  }
326  /// extract context data
327  TScopedLoc& sc_loc = *it_sl;
328  CSeq_loc& seq_loc = *sc_loc.m_Loc;
329  CScope& scope = *sc_loc.m_Scope;
330 
331  string loc_label;
332  CLabel::GetLabel(seq_loc, &loc_label, CLabel::eDefault, &scope);
333 
334  /// determine our list of seq-ids from the input query
335  list<CSeq_id_Handle> idhs;
336  list<string> toks;
337  NStr::Split(ids, " \t\n;:,", toks, NStr::fSplit_Tokenize);
338  ITERATE (list<string>, iter, toks) {
339  const string& s = *iter;
340  try {
341  CSeq_id id(s);
343 
344  // make sure seq_id's lifetime is long enough to ensure
345  // a valid tex_id
346  CConstRef<CSeq_id> seq_id = idh.GetSeqId();
347  const CTextseq_id* txt_id = seq_id->GetTextseq_Id();
348  if (txt_id && txt_id->IsSetVersion()) {
349  try {
351  if (gi) {
352  idh = gi;
353  }
354  }
355  catch (CException& e) {
356  LOG_POST(Info << "accession: " << s
357  << "not a gi: " << e.GetMsg());
358  }
359  }
360 
361  idhs.push_back(idh);
362  }
363  catch (CException& e) {
364  LOG_POST(Error << "error handling accession: " << s
365  << ": " << e.GetMsg());
366  }
367  }
368 
369  /// retrieve the sequence of interest and establish our search range
370  CSeq_id_Handle bsh_idh = sequence::GetIdHandle(seq_loc, &scope);
371  CBioseq_Handle bsh = scope.GetBioseqHandle(bsh_idh);
372  const CSeqMap& seqmap = bsh.GetSeqMap();
374  if (seq_loc.IsWhole()) {
375  range.SetFrom(0);
376  range.SetTo(bsh.GetBioseqLength());
377  } else {
378  range = seq_loc.GetTotalRange();
379  }
380 
381  /// scan up to five levels deep in the seq-map structure
382  for (size_t level = 0;
383  level < 5 && ! IsCanceled() && idhs.size(); ++level) {
384  CSeqMap_CI map_it =
385  seqmap.ResolvedRangeIterator(&scope, range.GetFrom(),
386  range.GetLength(), eNa_strand_plus,
387  level, CSeqMap::fFindRef);
388 
389  for ( ; idhs.size() && map_it && ! IsCanceled(); ++map_it) {
390  CSeq_id_Handle handle = map_it.GetRefSeqid();
391 
392  if (map_it.GetType() != CSeqMap::eSeqRef) {
393  continue;
394  }
395 
396  list<CSeq_id_Handle>::iterator id_it = idhs.begin();
397  for ( ; id_it != idhs.end(); ) {
398  if (CSeqUtils::Match(*id_it, handle, &scope)) {
399 
400  /// format the label for this entry
401  string seq_id_str;
402  CLabel::GetLabel(*handle.GetSeqId(), &seq_id_str,
403  CLabel::eDefault, &scope);
404 
405  /// get a location for display
406  CRef<CSeq_loc> loc(new CSeq_loc);
407  loc->SetInt().SetFrom(map_it.GetPosition());
408  loc->SetInt().SetTo (map_it.GetEndPosition() - 1);
409  loc->SetId(*bsh_idh.GetSeqId());
410  x_AddToResults(*loc, scope, seq_id_str,
411  sc_loc.m_ContextName);
412  idhs.erase(id_it++);
413  ++found;
414  } else {
415  ++id_it;
416  ++count;
417  if (count % 20 == 0) {
418  // time to update the Result
419  CMutexGuard Guard(m_Mutex);
420  m_ProgressStr =
422  " components searched, " +
424  " component";
425  if (found != 1) {
426  m_ProgressStr += "s";
427  }
428  m_ProgressStr += " found.";
429  }
430  }
431  }
432  }
433  }
434  }
435 
436  LOG_POST(Info << "CComponentSearchJob::x_DoSearch(): done: "
437  << sw.Elapsed() << " seconds. Found = " << found);
438  return IsCanceled() ? eCanceled : eCompleted;
439 }
440 
441 
443  CScope& scope,
444  const string& loc_name,
445  const string& ctx_name)
446 {
447  static const int kUpdateIncrement = 20;
448 
449  int row = m_AccList.AddRow(&obj, &scope);
450  m_AccList.SetString(0, row, loc_name);
451  m_AccList.SetString(1, row, ctx_name);
452 
453  int count = m_AccList.GetNumRows();
454  if(count >= kUpdateIncrement) {
455  // time to update the Result
456  CMutexGuard Guard(m_Mutex);
457 
458  /// transfer results from Accumulator to m_TempResult
461  }
462 }
463 
464 
#define false
Definition: bool.h:36
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CBioseq_Handle –.
CComponentSearchJob.
CComponentSearchTool.
CComponentSearchTool.
CObjectList * GetObjectList()
CObjectList Data structure representing a list of CObjects with associated Scopes and other optional ...
Definition: object_list.hpp:63
int AddRow(CObject *obj, objects::CScope *scope)
void SetString(int col, int row, const string &val)
void Append(const CObjectList &list)
adds rows from the given list, the lists must have identical sets of columns
void ClearRows()
int AddColumn(EColumnType type, const string &name, int col=-1)
int GetNumRows() const
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSearchControl.
virtual void UpdateContexts()
updates m_ContextCombo
CSearchControl * m_Text
void UpdateContextCombo(wxChoice *combo)
CRef< CDMSearchResult > m_TempResult
holds temporary results, guarded by Mutex
string m_Descr
human-readable description of the Job
CMutex m_Mutex
synchronizes access to the Job members
CRef< CAppJobError > m_Error
CObjectList m_AccList
accumulates found objects before they are transferred to m_ResultList
CSearchQueryBase.
TScopedLocs & GetScopedLocs()
vector< SScopedLoc > TScopedLocs
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
CStopWatch –.
Definition: ncbitime.hpp:1938
virtual void Init()
IDMSearchQuery - abstract data mining query.
IDMSearchTool interface representing a single search tool in Data Mining Service.
IDataMiningContext IDataMiningContext represents an abstract context for a Search.
virtual string GetDMContextName()=0
returns Name of the context to be used in UI
ISeqlocSearchContext.
virtual CRef< objects::CScope > GetSearchScope()=0
virtual CRef< objects::CSeq_loc > GetSearchLoc()=0
IUITool represents an abstract algorithm that is bound to a UI component.
Definition: ui_tool.hpp:59
USING_SCOPE(objects)
#define ID_TEXT
#define ID_COMBOBOX
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static bool Match(const objects::CSeq_id &id1, const objects::CSeq_id &id2, objects::CScope *scope=NULL)
check to see if two seq-ids are identical.
virtual CIRef< IDMSearchQuery > ConstructQuery()
CComponentSearchTool()
IDMSearchTool.
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
CRef< CComponentSearchTool > m_Tool
void x_AddToResults(CObject &obj, objects::CScope &scope, const string &loc_name, const string &ctx_name)
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
virtual wxSizer * GetWidget(wxWindow *parent)
return a widget associated with the form; the form controls the lifetime of the widget (do not delete...
virtual bool x_ValidateParams()
returns true if Job params are correct, implement in derived classes
virtual CRef< CSearchJobBase > x_CreateJob(IDMSearchQuery &query)
implementing CSearchToolBase pure virtual function
virtual IUITool * Clone() const
virtual EJobState x_DoSearch()
performs searching, assuming that params are correct; Implement in derived classes
virtual bool IsCompatible(IDataMiningContext *context)
retuns true if the tool is compatible with the provided Search Context
CComponentSearchQuery(TScopedLocs &locs, const string &ids)
CComponentSearchQuery.
CRef< CComponentSearchQuery > m_Query
CComponentSearchQuery::TScopedLocs TScopedLocs
CComponentSearchJob(CComponentSearchQuery &query)
CComponentSearchJob.
virtual void x_SetupColumns(CObjectList &obj_list)
add custom columns if needed
virtual void UpdateContexts()
updates m_ContextCombo
virtual CIRef< IDMSearchForm > CreateSearchForm()
factory method for creating a form representing the tool
CComponentSearchForm(CComponentSearchTool &tool)
CComponentSearchForm.
virtual string GetName() const
returns unique name of the method that is used in UI to identify it
virtual IDMSearchTool * x_GetTool()
CComponentSearchQuery::TScopedLocs TScopedLocs
virtual string GetDescription() const
returns a detailed description of the method that is used in UI
vector< ISeqLocSearchContext * > m_SeqLocContexts
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
@ eDefault
Definition: label.hpp:73
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TSeqPos GetBioseqLength(void) const
const CSeqMap & GetSeqMap(void) const
Get sequence map.
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
Definition: seq_map_ci.hpp:679
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
CSeq_id_Handle GetRefSeqid(void) const
The following function makes sense only when the segment is a reference to another seq.
Definition: seq_map_ci.cpp:312
TSeqPos GetPosition(void) const
return position of current segment in sequence
Definition: seq_map_ci.hpp:665
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
Definition: seq_map.cpp:868
@ fFindRef
Definition: seq_map.hpp:137
@ eSeqRef
reference to Bioseq
Definition: seq_map.hpp:100
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
static CStopWatch sw
int i
#define wxT(x)
Definition: muParser.cpp:41
range(_Ty, _Ty) -> range< _Ty >
Defines: CTimeFormat - storage class for time format.
CRef< objects::CScope > m_Scope
CRef< objects::CSeq_loc > m_Loc
static string query
#define _ASSERT
string ToStdString(const wxString &s)
Definition: wx_utils.hpp:161
Modified on Wed Nov 29 02:18:59 2023 by modify_doxy.py rev. 669887