NCBI C++ ToolKit
sequence_search_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sequence_search_tool.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 #include "sequence_search_tool.hpp"
36 
37 #include <corelib/ncbiapp.hpp>
38 #include <corelib/ncbireg.hpp>
40 
43 
44 #include <gui/objutils/label.hpp>
45 #include <gui/objutils/utils.hpp>
47 
50 
51 #include <objmgr/util/sequence.hpp>
53 
54 #include <wx/sizer.h>
55 #include <wx/stattext.h>
56 #include <wx/choice.h>
57 #include <wx/srchctrl.h>
58 #include <wx/filename.h>
59 #include <wx/dir.h>
60 
61 #define ID_COMBOBOX 10003
62 #define ID_TEXT 11414
63 #define ID_COMBOBOX_UPDATE 10111
64 
67 
68 static const char* kSeqPattern = "SeqPattern";
69 
70 /*
71 class CSeqSearchTableModel : public CObjectListTableModel
72 {
73  virtual int GetNumExtraColumns() const;
74  virtual wxString GetExtraColumnName( int col ) const;
75 };
76 
77 int CSeqSearchTableModel::GetNumExtraColumns() const
78 {
79  // we can use just the first column from the set of standard ones
80  // ie "Label"
81  return 1;
82 }
83 
84 wxString CSeqSearchTableModel::GetExtraColumnName( int col ) const
85 {
86  if( col < 0 || col >= GetNumExtraColumns() ){
87  _ASSERT(false);
88  NCBI_THROW( CException, eUnknown, "Invalid extra column index" );
89  }
90 
91  return wxT("Location");
92 }
93 */
94 
95 ///////////////////////////////////////////////////////////////////////////////
96 /// CSequenceSearchTool
98 {
99  m_Conversions.options.clear();
110 
111  m_Filters.filters.push_back(pair<string,string>("Show only positive strand results",
112  "Strand='+'"));
113 
114  m_Filters.filters.push_back(pair<string,string>("Show only negative strand results",
115  "Strand='-'"));
116 }
117 
118 
120 {
121  static string name("Sequence Search");
122  return name;
123 }
124 
125 
127 {
128  return new CSequenceSearchTool();
129 }
130 
131 
133 {
134  return "";
135 }
136 
137 
139 {
140  CIRef<IDMSearchForm> form(new CSequenceSearchForm(*this));
141  return form;
142 }
143 
144 
146 {
147  ISeqLocSearchContext* sl_ctx =
148  dynamic_cast<ISeqLocSearchContext*>(context);
149  return sl_ctx != NULL;
150 }
151 
153 {
155  CSequenceSearchQuery* f_query = dynamic_cast<CSequenceSearchQuery*>(&query);
156  if(f_query) {
157  job.Reset(new CSequenceSearchJob(*f_query));
158  }
159  return job;
160 }
161 
162 
163 
165 {
166  return "search_tool::sequence_search_tool";
167 }
168 
170 {
171  return "Datamining Tool - Sequence Search search";
172 }
173 
175 {
177 }
178 
179 ///////////////////////////////////////////////////////////////////////////////
180 /// CSequenceSearchQuery
181 
183  const string& pattern,
184  TPatternType pt_type,
185  const string& file )
186 : CSearchQueryBase(locs),
187  m_Pattern(pattern),
188  m_PatternType(pt_type),
189  m_PatternFile(file)
190 {
191 }
192 
194 {
195  string s;
197  s.append(" ");
198  s.append(m_Pattern);
199  s.append(" at '");
200  s.append(m_PatternFile);
201  s.append("'");
202  return s;
203 }
204 
205 
206 ///////////////////////////////////////////////////////////////////////////////
207 /// CSequenceSearchForm
208 
210 
212 : m_Tool(&tool),
213  m_TypeCombo(NULL)
214 {
215 }
216 
217 
219 {
220 }
221 
223 {
224 }
225 
227 {
230 }
231 
232 
234 {
236 }
237 
238 
240 {
241  if (m_TypeCombo)
242  view.Set(kSeqPattern, ToStdString( m_TypeCombo->GetStringSelection() ));
243 }
244 
245 
247 {
248  return m_Tool.GetPointer();
249 }
250 
251 
252 wxSizer * CSequenceSearchForm::GetWidget(wxWindow * parent)
253 {
254  if (!m_Sizer) {
255  wxBoxSizer * vSz = new wxBoxSizer(wxVERTICAL);
256  m_Sizer = vSz;
257 
258  wxFlexGridSizer * sz = new wxFlexGridSizer(1, 4, 0, 0);
259  sz->AddGrowableCol(3);
260 
261  m_Sizer->Add(sz, 0, wxGROW|wxALL, 0);
262 
263 
264  sz->Add(new wxStaticText( parent, wxID_STATIC,
265  wxT("Search Context:"),
266  wxDefaultPosition, wxDefaultSize, 0 ),
267  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
268 
269  m_DbCombo = new wxChoice(parent, ID_COMBOBOX,
270  wxDefaultPosition, wxDefaultSize,
271  0, (const wxString*)NULL);
272 
273  sz->Add(m_DbCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
274 
275  // search type
276  sz->Add(new wxStaticText( parent, wxID_STATIC, wxT("Search Type:"),
277  wxDefaultPosition, wxDefaultSize, 0 ),
278  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
279 
280 
281  m_TypeCombo = new wxChoice(parent, ID_COMBOBOX_UPDATE,
282  wxDefaultPosition, wxDefaultSize,
283  0, (const wxString*)NULL);
284  m_TypeCombo->Append(wxT("Exact Match"));
285  m_TypeCombo->Append(wxT("Regular Expression"));
286  m_TypeCombo->Append(wxT("Wildcard"));
287  m_TypeCombo->Append(wxT("Named Pattern Sets"));
288 
289 
290  if (!m_SearchPattern.empty()) {
291  m_TypeCombo->SetStringSelection(ToWxString(m_SearchPattern));
292  }
293  else {
294  m_TypeCombo->Select(0);
295  }
296 
297  sz->Add(m_TypeCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
298 
299 
300  m_pSearchSizer = new wxFlexGridSizer(1, 2, 0, 0);
301  m_pSearchSizer->AddGrowableCol(1);
302  m_pSearchSizer->Add(new wxStaticText( parent, wxID_STATIC,
303  wxT("Search Expression:"),
304  wxDefaultPosition, wxDefaultSize, 0 ),
305  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
306  m_Text = new CSearchControl(parent, ID_TEXT, wxT(""), wxDefaultPosition, wxDefaultSize, wxTE_PROCESS_ENTER );
307  m_pSearchSizer->Add(m_Text,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
308 
309  m_pPatternSizer = new wxFlexGridSizer(1, 2, 0, 0);
310  m_pPatternSizer->AddGrowableCol(1);
311 
312  m_pPatternSizer->Add(new wxStaticText( parent, wxID_STATIC, wxT("Select Named Pattern:"), wxDefaultPosition, wxDefaultSize, 0 ),
313  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
314  m_PatternCombo = new wxChoice(parent, ID_COMBOBOX,
315  wxDefaultPosition, wxDefaultSize,
316  0, (const wxString*)NULL);
317 
319  m_PatternCombo->Append(ToWxString(it->first));
320  }
321  m_PatternCombo->Select(0);
322  m_pPatternSizer->Add(m_PatternCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
323 
324  m_Sizer->Add(m_pSearchSizer, 0, wxGROW|wxALL, 0);
325  m_Sizer->Add(m_pPatternSizer, 0, wxGROW|wxALL, 0);
326 
327  }
328  return m_Sizer;
329 }
330 
331 
333 {
335 
336  if (m_TypeCombo) {
337  bool bPattern = (m_TypeCombo->GetSelection()==3);
338  m_Sizer->Show(m_pSearchSizer, !bPattern);
339  m_Sizer->Show(m_pPatternSizer, bPattern);
340  m_Sizer->Layout();
341  }
342 }
343 
344 
346 {
347  m_SeqLocContexts.clear();
348  for( size_t i = 0; i < m_Contexts.size(); i++ ) {
349  if (m_DbCombo->GetSelection() != 0 &&
350  m_DbCombo->GetSelection() != int(i + 1)) {
351  continue;
352  }
353  ISeqLocSearchContext* seq_ctx =
354  dynamic_cast<ISeqLocSearchContext*>(m_Contexts[i]);
355  m_SeqLocContexts.push_back(seq_ctx);
356  }
357 
358  _ASSERT(m_SeqLocContexts.size());
359 
360  /// accumulate locations from all selected contexts
361  TScopedLocs sc_locs;
362  for( size_t i = 0; i < m_SeqLocContexts.size(); i++ ) {
364  _ASSERT(sl_ctx);
365 
366  // tereshko: skip context if it can't participate in search anymore (selection loss, etc)
367  CRef<CSeq_loc> search_loc = sl_ctx->GetSearchLoc();
368  if (search_loc.IsNull()) {
369  continue;
370  }
371 
372  vector<CRef<CSeq_loc> > locs;
373  if (m_bRange) {
374  if (search_loc->IsPacked_int()) {
375  ITERATE (CSeq_loc::TPacked_int::Tdata, iter, search_loc->GetPacked_int().Get()) {
376  locs.push_back(CRef<CSeq_loc>(new CSeq_loc()));
377  CRef<CSeq_interval> ref = *iter;
378  locs.back()->SetInt(*ref);
379  }
380  }
381  else
382  locs.push_back(search_loc);
383  }
384  else {
385  CRef<CSeq_id> id(new CSeq_id());
386  id->Assign(*(search_loc->GetId()));
387  locs.push_back(CRef<CSeq_loc>(new CSeq_loc()));
388  locs.back()->SetWhole(*id);
389  }
390 
391  ITERATE(vector<CRef<CSeq_loc> >, it, locs) {
393  sl.m_Loc = *it;
394  sl.m_Scope = sl_ctx->GetSearchScope();
395  sl.m_ContextName = sl_ctx->GetDMContextName();
396  sc_locs.push_back(sl);
397  }
398  }
399 
400  string searchString = ToStdString(m_Text->GetValue());
401 
402  switch (m_TypeCombo->GetSelection()){
404  case 2: m_PatternType = CSearchToolBase::eWildcard; break;
405  case 1: m_PatternType = CSearchToolBase::eRegexp; break;
406  case 3:
407  {
409  searchString = "#undefined#";
410  break;
411  }
412  }
413 
414 
416  ref = new CSequenceSearchQuery(sc_locs,
417  searchString,
419  ToStdString(m_PatternCombo->GetStringSelection()));
420  return ref;
421 }
422 
424 {
426  /*
427  bool bEnable = false;
428 
429  ITERATE(TContexts, cont, m_Contexts) {
430  ISeqLocSearchContext* seq_ctx =
431  dynamic_cast<ISeqLocSearchContext*>(*cont);
432  if (seq_ctx && !seq_ctx->GetSearchLoc().Empty()){
433  bEnable=true;
434  break;
435  }
436  }
437  if (m_Controller) {
438  m_Controller->OnSearchEnabled(bEnable);
439  }
440  */
441 }
442 
443 
445 {
446  string dir;
447 
448  m_FileList.clear();
449 
451  _ASSERT(app);
452  CNcbiRegistry& registry = app->GetConfig();
453 
454  if ( (dir = registry.Get("Patterns", "PatternPath")).empty() ) {
455  registry.Set("Patterns", "PatternPath", "<std>, <home>",
456  CNcbiRegistry::ePersistent, " default external_path");
457  }
458  dir = registry.Get("Patterns", "PatternPath");
459 
460  list<string> paths;
461  NStr::Split(dir, ", \t\n\r", paths, NStr::fSplit_Tokenize);
462 
463  ITERATE (list<string>, iter, paths) {
464  wxString dir_name;
465  if (*iter == "<std>" || *iter == "<home>") {
466  dir_name = CSysPath::ResolvePath(wxString::FromAscii(iter->c_str()), wxT("etc/patterns"));
467  } else {
468  dir_name = CSysPath::ResolvePath(wxString::FromAscii(iter->c_str()), wxEmptyString);
469  }
470  if ( dir_name.empty() ) {
471  continue;
472  }
473 
474  wxFileName fname(dir_name, wxEmptyString);
475  if ( !fname.DirExists(dir_name) ) {
476  continue;
477  }
478 
479  wxDir dir(dir_name);
480  wxString filename;
481 
482  bool cont = dir.GetFirst(&filename, wxT("*.ini"), wxDIR_FILES);
483  while ( cont ) {
484  fname.SetFullName(filename);
485  wxString full_path = fname.GetFullPath();
486 
487  CNcbiIfstream reg_stream(full_path.fn_str());
488  CNcbiRegistry patterns(reg_stream);
489 
490  // iterate over patterns
491  list<string> pat_ids;
492  patterns.EnumerateSections(&pat_ids);
493  string fName="";
494 
495  ITERATE (list<string>, pat_id, pat_ids) {
496  if (*pat_id == "-") {
497  fName = patterns.Get("-", "name");
498  if (!fName.empty()) {
499  m_FileList[fName].first = full_path;
500  }
501  }
502  else {
503  string pattern = patterns.Get(*pat_id, "pattern");
504  string desc = patterns.Get(*pat_id, "description");
505  if (!fName.empty()) {
506  m_FileList[fName].second.push_back(TDescPattern(desc, pattern));
507  }
508  }
509  }
510  cont = dir.GetNext(&filename);
511  }
512  }
513 }
514 
515 
516 ///////////////////////////////////////////////////////////////////////////////
517 /// CSequenceSearchJob
518 
520 : m_Query(&query),
521  m_PatternType(query.GetPatternType())
522 {
523  string s_locs, id;
524  static string sep(", ");
526  CLabel::GetLabel(*it->m_Loc, &id, CLabel::eDefault,
527  it->m_Scope.GetPointer());
528  s_locs += id;
529  s_locs += sep;
530  }
531  if( ! s_locs.empty()) {
532  s_locs.resize(s_locs.size() - sep.size());
533  }
534  string s_pat = m_Query->GetPattern();
535  m_Descr = "Search Sequence for \"" + s_pat + "\" on " + s_locs;
536 }
537 
538 
540 {
541  m_Error.Reset();
542 
543  if( m_Query->GetScopedLocs().empty()) {
544  m_Error = new CAppJobError("Invalid input parameters - no search context specified.");
545  }
546  if(m_Query->GetPattern().empty()) {
547  m_Error = new CAppJobError("Search pattern is empty.");
548  }
549  return m_Error ? false : true;
550 }
551 
552 
554 {
556  TScopedLocs& scoped_locs = m_Query->GetScopedLocs();
559  EJobState res_state = IAppJob::eFailed;
560 
562  switch(m_PatternType) {
564  string pattern = CRegexp::WildcardToRegexp(m_SearchStr);
566  break;
567  }
570  break;
571  }
572  default:
573  m_Pattern = NULL;
574  break;
575  }
576 
577  res_state = x_SearchSequence(scoped_locs);
578 
579  if (m_Pattern) {
580  delete m_Pattern;
581  m_Pattern = NULL;
582  }
583  }
584  else {
585  // named pattern search
587  m_SearchStr = pat->first;
588  m_Pattern = new CRegexp(pat->second, CRegexp::fCompile_ignore_case);
589  res_state = x_SearchSequence(scoped_locs);
590  delete m_Pattern;
591  m_Pattern = NULL;
592  }
593  }
594 
595  return res_state;
596 }
597 
598 
600 {
601  obj_list.ClearObjectLabels();
602  obj_list.AddObjectLabel( "Location", CLabel::eContent );
603 
604  obj_list.AddColumn(CObjectList::eString, "Sequence");
605 
606 
607  // special case for Kozak pattern search
609  obj_list.AddColumn(CObjectList::eInteger, "Pattern Start");
610  obj_list.AddColumn(CObjectList::eString, "Pattern Name");
611  }
612 
613  obj_list.AddColumn( CObjectList::eInteger, "Start" );
614  obj_list.AddColumn( CObjectList::eInteger, "Stop" );
615  obj_list.AddColumn( CObjectList::eString, "Strand" );
616 
617  obj_list.AddColumn(CObjectList::eString, "Context");
618 }
619 
620 
621 static const int kMaxResults = 20000;
622 
624 {
625  {
626  CMutexGuard Guard(m_Mutex);
627  m_ProgressStr = "Loading sequence.";
628  }
629 
630  bool searchForward = true, searchReverse = true;
631 
632  ITERATE(vector<int>, iflt, m_Filters.selected) {
633  string expr = m_Filters.filters[*iflt].second;
634  if (expr.find("Strand=") == 0 && expr.size() > 7) {
635  string value = expr.substr(7);
636  if (value == "'+'")
637  searchReverse = false;
638  else if (value == "'-'")
639  searchForward = false;
640  }
641  }
642 
643  if (!searchForward && !searchReverse)
644  searchForward = searchReverse = true;
645 
646  /// for every Seq_loc
647  NON_CONST_ITERATE(TScopedLocs, it_sl, scoped_locs) {
648  /// extract context data
649  TScopedLoc& sc_loc = *it_sl;
650  CSeq_loc& seq_loc = *sc_loc.m_Loc;
651  CScope& scope = *sc_loc.m_Scope;
652 
653  string loc_label;
654  CLabel::GetLabel(seq_loc, &loc_label, CLabel::eDefault, &scope);
655 
656  CSeq_id_Handle loc_id;
657  CConstRef<CSeq_id> id(seq_loc.GetId());
658  if (id) {
659  loc_id = CSeq_id_Handle::GetHandle(*id);
660  } else {
661  loc_id = CSeq_id_Handle::GetHandle(sequence::GetId(seq_loc, sc_loc.m_Scope));
662  id = loc_id.GetSeqId();
663  }
664 
665  // create an id that will be used in results
666  CRef<CSeq_id> res_id(new CSeq_id());
667  res_id->Assign(*id);
668 
669 
670  if(IsCanceled()) {
671  return eCanceled;
672  }
673 
674  CSeqVector vec(seq_loc, scope, CBioseq_Handle::eCoding_Iupac);
675 
676  TSeqPos seq_offset = 0;
677  if (seq_loc.IsInt())
678  seq_offset = seq_loc.GetInt().GetFrom();
679 
680  TRangeCollection coll;
681  TSeqPos len = vec.size();
682  TSeqPos span = 10000; //TODO
683  TSeqPos step = span - 1000;
684  string data;
685 
686  if (searchForward) {
687  for (TSeqPos start = 0; start < len && ! IsCanceled(); start += step) {
688  data.erase();
689  span = min(span, TSeqPos(vec.size() - start));
690  vec.GetSeqData(start, start + span, data);
691 
692  x_GetMatches(data, start, coll, false);
693  if(m_ResultsCount + (int) coll.size() >= kMaxResults) {
694  break; // end the loop and add the accumulated results
695  }
696 
697  if(m_ResultsCount + (int) coll.size() >= kMaxResults) {
698  break; // end the loop and add the accumulated results
699  }
700  }
701 
702  // create results from the collection
703  ITERATE (TRangeCollection, it, coll) {
704  CRange<TSeqPos> range(it->first, it->second);
705 
706  CRef<CSeq_loc> loc(new CSeq_loc(*res_id,
707  range.GetFrom() + seq_offset,
708  range.GetTo() + seq_offset));
709  string seq;
710  x_GetSequence(vec, range, &seq);
711 
712  x_AddToResults(*loc, scope, seq, loc_label, "+", sc_loc.m_ContextName);
713  m_ResultsCount++;
714  if(m_ResultsCount >= kMaxResults) {
715  return eCompleted;
716  }
717  }
718  }
719 
720  coll.clear();
721 
722  span = 10000; //TODO
723  step = span - 1000;
724 
725  if (searchReverse) {
726  for (TSeqPos start = 0; start < len && ! IsCanceled(); start += step) {
727  data.erase();
728  span = min(span, TSeqPos(vec.size() - start));
729  vec.GetSeqData(start, start + span, data);
730 
732  x_GetMatches(data, start, coll, true);
733 
734  if(m_ResultsCount + (int) coll.size() >= kMaxResults) {
735  break; // end the loop and add the accumulated results
736  }
737  }
738  ITERATE (TRangeCollection, it, coll) {
739  CRange<TSeqPos> range(it->first, it->second);
740 
741  CRef<CSeq_loc> loc(new CSeq_loc(*res_id,
742  range.GetFrom() + seq_offset,
743  range.GetTo() + seq_offset));
744  string seq;
745  x_GetSequence(vec, range, &seq);
746 
747  x_AddToResults(*loc, scope, seq, loc_label, "-", sc_loc.m_ContextName);
748  m_ResultsCount++;
749  if(m_ResultsCount >= kMaxResults) {
750  return eCompleted;
751  }
752  }
753  }
754  }
755  return IsCanceled() ? eCanceled : eCompleted;
756 }
757 
758 
759 /// "offset" argument defines the sequence position corresponding to the fisrt
760 /// character in "data"
762  TRangeCollection& coll, bool reverse)
763 {
764  // calculate the upper limit on the expected number of matches
765  int max_coll_size = kMaxResults - m_ResultsCount;
766 
767  switch(m_PatternType) {
769  {
770  SIZE_TYPE pos = 0; // start of a match
771  SIZE_TYPE occurrence = 0; // search position
772  SIZE_TYPE len = m_SearchStr.size();
773 
774  while((int) coll.size() < max_coll_size) {
776  if(pos != NPOS) {
777  TSeqPos from = (int)(reverse ? offset + data.size() - pos - len : offset + pos);
778  TSeqPos to = static_cast<TSeqPos>(from + len - 1);
779  coll.insert(make_pair(from, to));
780  } else {
781  break;
782  }
783  }
784  break;
785  }
789  {
790  int start_pos = 0, num = 0;
791  while(num >= 0) {
792  m_Pattern->GetMatch(data, start_pos, 0, CRegexp::fMatch_default, true);
793  num = m_Pattern->NumFound();
794 
795  for( int i = 0; i < num && max_coll_size > 0 && ! IsCanceled(); ++i ) {
796  const auto* p = m_Pattern->GetResults(i);
797  int p0, p1;
798  if (reverse) {
799  p0 = (int)(data.size() - p[1]);
800  p1 = (int)(data.size() - p[0]);
801  }
802  else {
803  p0 = p[0];
804  p1 = p[1];
805  }
806 
807  TSeqPos from = p0 + offset;
808  TSeqPos to = p1 + offset - 1;
809 
810  coll.insert(make_pair(from, to));
811 
812  start_pos = p[1];
813  if((int) coll.size() >= max_coll_size) {
814  return;
815  }
816  }
817  }
818  break;
819  }
820  default:
821  _ASSERT(false);
822  break;
823  }
824 }
825 
826 // generate sequence label to be disaplayed in UI
828  const CRange<TSeqPos>& range,
829  string* s)
830 {
831  static const TSeqPos kMaxLen = 40;
832  if(s) {
833  if (range.GetLength() > kMaxLen) {
834  string s1, s2;
835  vec.GetSeqData(range.GetFrom(), range.GetFrom() + kMaxLen / 2, s1);
836  vec.GetSeqData(range.GetTo() - kMaxLen / 2, range.GetTo() + 1, s2);
837  *s = s1 + "..." + s2;
838  } else {
839  vec.GetSeqData(range.GetFrom(), range.GetTo() + 1, *s);
840  }
841  }
842 }
843 
844 
846  const string& sequence,
847  const string& loc_name,
848  const string& strand,
849  const string& ctx_name)
850 {
851  static const int kUpdateIncrement = 250;
852 
853  /// adding the result to the Accumulator
854  int row = m_AccList.AddRow(&obj, &scope);
855 
856  CSeq_loc * loc = dynamic_cast<CSeq_loc*>(&obj);
857 
858  int col = 0;
859  //m_AccList.SetString(col++, row, loc_name);
860  m_AccList.SetString(col++, row, sequence);
861 
863  m_AccList.SetInteger(col++, row, (int)loc->GetTotalRange().GetFrom() + 4);
865  }
866 
867  m_AccList.SetInteger(col++, row, (int)loc->GetTotalRange().GetFrom()+1);
868  m_AccList.SetInteger(col++, row, (int)loc->GetTotalRange().GetTo()+1);
869  m_AccList.SetString(col++, row, strand);
870  m_AccList.SetString(col++, row, ctx_name);
871 
872 
873  int count = m_AccList.GetNumRows();
874  if(count >= kUpdateIncrement) {
875  // time to update the Result
876  CMutexGuard Guard(m_Mutex);
877 
878  /// transfer results from Accumulator to m_TempResult
881 
882  /// update progress string
884  if (count != 1) {
885  m_ProgressStr += "es";
886  }
887  m_ProgressStr += " found.";
888  }
889 }
890 
892 {
893  return new CObjectListTableModel();
894 }
895 
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CObjectList * GetObjectList()
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiRegistry –.
Definition: ncbireg.hpp:913
CObjectListTableModel.
CObjectList Data structure representing a list of CObjects with associated Scopes and other optional ...
Definition: object_list.hpp:63
int AddRow(CObject *obj, objects::CScope *scope)
void AddObjectLabel(const string &name, CLabel::ELabelType type)
void SetString(int col, int row, const string &val)
void ClearObjectLabels()
void SetInteger(int col, int row, int val)
void Append(const CObjectList &list)
adds rows from the given list, the lists must have identical sets of columns
void ClearRows()
int AddColumn(EColumnType type, const string &name, int col=-1)
int GetNumRows() const
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
CRegexp –.
Definition: regexp.hpp:70
class CRegistryReadView provides a nested hierarchical view at a particular key.
Definition: reg_view.hpp:58
string GetString(const string &key, const string &default_val=kEmptyStr) const
Definition: reg_view.cpp:246
void Set(const string &key, int val)
access a named key at this level, with no recursion
Definition: reg_view.cpp:533
CScope –.
Definition: scope.hpp:92
CSearchControl.
virtual void UpdateContexts()
updates m_ContextCombo
CSearchControl * m_Text
void UpdateContextCombo(wxChoice *combo)
CRef< CDMSearchResult > m_TempResult
holds temporary results, guarded by Mutex
string m_Descr
human-readable description of the Job
int m_ResultsCount
total number of results
IDMSearchTool::TFilters m_Filters
holds the final results
CMutex m_Mutex
synchronizes access to the Job members
CRef< CAppJobError > m_Error
CObjectList m_AccList
accumulates found objects before they are transferred to m_ResultList
CSearchQueryBase.
TScopedLocs & GetScopedLocs()
vector< SScopedLoc > TScopedLocs
IDMSearchTool::TFilters m_Filters
static string GetPatternTypeStr(EPatternType ptype)
String representation of pattern type.
IDMSearchTool::TConversions m_Conversions
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Iupacna
Definition: sequtil.hpp:47
CSeqVector –.
Definition: seq_vector.hpp:65
CSequenceSearchJob.
CSequenceSearchTool.
CSequenceSearchTool.
static wxString ResolvePath(const wxString &path, const wxString &rel_name)
Utility function to hide the platform specifics of locating our standard directories and files.
Definition: sys_path.cpp:106
virtual void Init()
IDMSearchQuery - abstract data mining query.
IDMSearchTool interface representing a single search tool in Data Mining Service.
IDataMiningContext IDataMiningContext represents an abstract context for a Search.
virtual string GetDMContextName()=0
returns Name of the context to be used in UI
ISeqlocSearchContext.
virtual CRef< objects::CScope > GetSearchScope()=0
virtual CRef< objects::CSeq_loc > GetSearchLoc()=0
IUITool represents an abstract algorithm that is bound to a UI component.
Definition: ui_tool.hpp:59
void clear()
Definition: map.hpp:169
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
size_type size() const
Definition: set.hpp:132
static CMemoryRegistry registry
Definition: cn3d_tools.cpp:81
#define false
Definition: bool.h:36
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
virtual IAppJob::EJobState x_DoSearch()
performs searching, assuming that params are correct; Implement in derived classes
CRef< CSequenceSearchTool > m_Tool
void x_GetSequence(objects::CSeqVector &vec, const CRange< TSeqPos > &range, string *s)
virtual CObjectListTableModel * x_GetNewOLTModel() const
factory method creating new column handler for CObjectListWidget
virtual IDMSearchTool * x_GetTool()
virtual CIRef< IDMSearchForm > CreateSearchForm()
factory method for creating a form representing the tool
virtual IDMSearchTool::TUIToolFlags GetFlags(void)
special flags
CSequenceSearchQuery::TScopedLocs TScopedLocs
CSequenceSearchQuery(TScopedLocs &locs, const string &pattern, TPatternType pt_type, const string &file)
CSequenceSearchQuery.
wxFlexGridSizer * m_pSearchSizer
CRegexp * m_Pattern
search patteern for "Exact Match" and "Wildcard" modes
virtual void x_LoadSettings(const CRegistryReadView &view)
virtual string GetName() const
returns unique name of the method that is used in UI to identify it
CSequenceSearchJob(CSequenceSearchQuery &query)
CSequenceSearchJob.
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
virtual string ToString() const
Prepare a string representation of a query (for logging and debugging)
virtual bool IsCompatible(IDataMiningContext *context)
retuns true if the tool is compatible with the provided Search Context
CRef< CSequenceSearchQuery > m_Query
virtual void UpdateContexts()
updates m_ContextCombo
void x_AddToResults(CObject &obj, objects::CScope &scope, const string &sequence, const string &loc_name, const string &strand, const string &ctx_name)
virtual wxSizer * GetWidget(wxWindow *parent)
return a widget associated with the form; the form controls the lifetime of the widget (do not delete...
vector< ISeqLocSearchContext * > m_SeqLocContexts
virtual CIRef< IDMSearchQuery > ConstructQuery()
virtual IUITool * Clone() const
void x_GetMatches(const string &data, int start, TRangeCollection &coll, bool reverse)
"offset" argument defines the sequence position corresponding to the fisrt character in "data"
CSequenceSearchForm(CSequenceSearchTool &tool)
list< TDescPattern > TDescPatList
wxFlexGridSizer * m_pPatternSizer
virtual void x_SaveSettings(CRegistryWriteView view) const
pair< string, string > TDescPattern
IAppJob::EJobState x_SearchSequence(TScopedLocs &scoped_locs)
virtual void x_SetupColumns(CObjectList &obj_list)
add custom columns if needed
virtual bool x_ValidateParams()
returns true if Job params are correct, implement in derived classes
string m_PatternFile
regular expression to apply
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
virtual CRef< CSearchJobBase > x_CreateJob(IDMSearchQuery &query)
implementing CSearchToolBase pure virtual function
CSequenceSearchQuery::TScopedLocs TScopedLocs
CSequenceSearchTool()
CSequenceSearchTool.
virtual string GetDescription() const
returns a detailed description of the method that is used in UI
static TFileList m_FileList
CSequenceSearchForm.
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
@ eFailed
Definition: app_job.hpp:90
@ eDefault
Definition: label.hpp:73
@ eContent
Definition: label.hpp:62
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
const TOffset * GetResults(size_t idx) const
Get location of pattern/subpattern for the last GetMatch().
Definition: regexp.hpp:569
CTempString GetMatch(CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
Get matching pattern and subpatterns.
Definition: regexp.cpp:182
int NumFound() const
Get number of patterns + subpatterns.
Definition: regexp.hpp:562
static string WildcardToRegexp(CTempString mask)
Convert wildcard mask to regular expression.
Definition: regexp.cpp:228
@ fCompile_ignore_case
Definition: regexp.hpp:103
@ fMatch_default
Definition: regexp.hpp:127
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
bool Set(const string &section, const string &name, const string &value, TFlags flags=0, const string &comment=kEmptyStr)
Set the configuration parameter value.
Definition: ncbireg.cpp:826
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ eForwardSearch
Search in a forward direction.
Definition: ncbistr.hpp:1946
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
list< CRef< CSeq_interval > > Tdata
const Tdata & Get(void) const
Get the member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
FILE * file
int i
int len
#define wxT(x)
Definition: muParser.cpp:41
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Process information in the NCBI Registry, including working with configuration files.
T min(T x_, T y_)
static patstr * patterns
Definition: pcregrep.c:259
USING_SCOPE(objects)
#define ID_COMBOBOX_UPDATE
#define ID_TEXT
static const int kMaxResults
static const char * kSeqPattern
#define ID_COMBOBOX
#define row(bind, expected)
Definition: string_bind.c:73
CRef< objects::CScope > m_Scope
CRef< objects::CSeq_loc > m_Loc
objects::CSeqFeatData::ESubtype selected
vector< objects::CSeqFeatData::ESubtype > options
vector< TFilter > filters
static string query
#define _ASSERT
static CS_CONTEXT * context
Definition: will_convert.c:21
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
string ToStdString(const wxString &s)
Definition: wx_utils.hpp:161
Modified on Thu Apr 25 08:17:11 2024 by modify_doxy.py rev. 669887