NCBI C++ ToolKit
windowmasker_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: windowmasker_tool.cpp 47080 2022-07-22 18:11:54Z asztalos $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Vladimir Tereshkov
27 *
28 */
29 
30 #include <ncbi_pch.hpp>
31 
34 
37 
39 
40 #include <serial/typeinfo.hpp>
41 
43 
45 #include <gui/objutils/label.hpp>
46 #include <gui/objutils/utils.hpp>
47 
48 #include <objmgr/seq_vector.hpp>
49 #include <objmgr/util/sequence.hpp>
52 
56 
58 
59 #include <wx/filename.h>
60 #include <wx/dir.h>
61 
64 
65 ///////////////////////////////////////////////////////////////////////////////
66 /// CWindowMaskerTool
68 : CAlgoToolManagerBase("Search/Find Repetitive Sequences with WindowMasker",
69  "",
70  "Search/Find Repetitive Sequences with WindowMasker",
71  "Find low-complexity sequences using WindowMasker "
72  "data rearrangements and clarifications",
73  "https://www.ncbi.nlm.nih.gov/tools/gbench/tutorial2/",
74  "Edit"),
75  m_Panel()
76 {
77 }
78 
80 {
81  return "windowmasker_tool";
82 }
83 
84 
86 {
87  return "Windowmasker Tool";
88 }
89 
91 {
93  m_Panel = NULL;
94  m_Objects.clear();
95 }
96 
98 {
99  m_Panel = NULL;
101  m_Objects.clear();
102 }
103 
105 {
106  if (m_Panel == NULL) {
108 
109  m_Panel = new CWindowMaskerPanel();
110  m_Panel->Hide(); // to reduce flicker
114 
115  m_Panel->SetRegistryPath(m_RegPath + ".ParamsPanel");
117  }
118  return true;
119 }
120 
121 wxString s_GetPath(int taxId) {
122  if (taxId <= 0)
123  return wxString();
124 
126  wxFileName fname(storage.GetPath(), wxEmptyString);
127  fname.AppendDir(wxString::Format(wxT("%u"), taxId));
128 
129  wxDir dir(fname.GetPath());
130  if (dir.IsOpened()) {
131  wxString dirName;
132  for (
133  bool cont = dir.GetFirst(&dirName, wxEmptyString, wxDIR_DIRS);
134  cont;
135  cont = dir.GetNext(&dirName)
136  ) {
137  fname.AppendDir(dirName);
138  break;
139  }
140  }
141 
142  return fname.GetPath();
143 }
144 
146 {
147  if( !m_Panel ){
148  return true;
149  }
150 
151  m_Params = m_Panel->GetData();
152 
153  wxString path = s_GetPath(m_Params.GetTaxId());
154 
155  if ( !wxFileName::FileExists(path) ) {
156  static const wxChar* sc_Files[] = {
157  wxT("wmasker.obinary"),
158  wxT("wmasker.oascii"),
159  NULL
160  };
161 
162  for (const wxChar** p = sc_Files; p && *p; ++p) {
163  wxString filepath = wxFileName(path, *p).GetFullPath();
164  if (wxFileName::FileExists(filepath)) {
165  path = filepath;
166  break;
167  }
168  }
169 
170  if ( !wxFileName::FileExists(path) ) {
172  wxT("Failed to find WindowMasker statistics file\nat ") + path,
174  wxT("WindowMasker Run Tool")
175  );
176 
177  return false;
178  }
179  }
180 
181  return true;
182 }
183 
185 {
187  x_ConvertInputObjects(CSeq_loc::GetTypeInfo(), objects);
188 
189  for (auto& o : objects) {
190  if (!o.scope) continue;
191  const CSeq_loc* seqLoc = dynamic_cast<const CSeq_loc*>(o.object.GetPointerOrNull());
192  if (!seqLoc) continue;
193  CBioseq_Handle h = o.scope->GetBioseqHandle(*seqLoc->GetId());
194  if (!h) continue;
195  if (h.IsNa()) m_Objects.push_back(o);
196  }
197 }
198 
200 {
201  return m_Panel;
202 }
203 
205 {
206  return &m_Params;
207 }
208 
209 ///////////////////////////////////////////////////////////////////////////////
210 /// CWindowMaskerJob
212 {
213 public:
214  CWindowMaskerJob (const CWindowMaskerParams& params);
215 
216 protected:
217  virtual void x_CreateProjectItems(); // overriding virtual function
218 
219  virtual void RequestCancel();
220 
221 protected:
223 
224 private:
226 };
227 
229 {
230  if( m_Panel ){
231  m_Params = m_Panel->GetData();
232  }
234  return job;
235 }
236 
238  #if defined(NCBI_OS_UNIX)
239  (pid_t)-1
240  #elif defined(NCBI_OS_MSWIN)
242  #else
243  -1
244  #endif
245 ;
246 
248  : m_Params( params )
249  , m_ProcHandle( kInvalidProcHandle )
250 {
251  CFastMutexGuard lock(m_Mutex);
252 
253  m_Descr = "Windowmasker Job"; //TODO
254 }
255 
257 {
260  }
261 
263 }
264 
265 struct SMaskResult {
266  string name;
267  vector< CRef<CSeq_loc> > mask_locs;
268 };
269 
271 {
272  wxString path = s_GetPath(m_Params.GetTaxId());
273 
274  if ( !wxFileName::FileExists(path) ) {
275  static const wxChar* sc_Files[] = {
276  wxT("wmasker.obinary"),
277  wxT("wmasker.oascii"),
278  NULL
279  };
280 
281  for (const wxChar** p = sc_Files; p && *p; ++p) {
282  wxString filepath = wxFileName(path, *p).GetFullPath();
283  if (wxFileName::FileExists(filepath)) {
284  path = filepath;
285  break;
286  }
287  }
288 
289  if ( !wxFileName::FileExists(path) ) {
290  NCBI_THROW(CException, eUnknown, "Failed to find WindowMasker statistics file.");
291  }
292  }
293 
294  vector<SMaskResult> mask_res;
295 
296 
297  //vector< CRef<CSeq_loc> > mask_locs;
298  if (m_Params.GetStandalone()) {
299  wxString prog_name = wxT("windowmasker");
300 #ifdef NCBI_OS_MSWIN
301  prog_name += wxT(".exe");
302 #endif
303 
304  wxString agent_path = CSysPath::GetStdPath();
305 
306  wxFileName fname(agent_path, wxEmptyString);
307 #if !defined( NCBI_OS_DARWIN ) || defined( _DEBUG )
308  fname.AppendDir(wxT("bin"));
309 #endif
310  fname.SetFullName(prog_name);
311 
312  string agent_path_utf8(fname.GetFullPath().ToUTF8());
313 
314  vector<string> args;
315  args.push_back("-ustat");
316  string lstat_name(path.ToUTF8());
317  args.push_back(lstat_name);
318  args.push_back("-outfmt");
319  args.push_back("seqloc_asn1_text");
320  args.push_back("-parse_seqids");
321  //args.push_back("-break");
322 
323  string agent_args;
324  ITERATE(vector<string>, arg, args){
325  agent_args += *arg + " ";
326  }
327 
328  for (const auto& o : m_Params.GetObjects()) {
329  try {
330  ERR_POST(Trace << "We are about to start program on path '" << agent_path_utf8 << "'");
331  ERR_POST(Trace << "with parameters '" << agent_args << "'");
332 
333  if (IsCanceled())
334  break;
335 
336  const CSeq_loc& loc = dynamic_cast<const CSeq_loc&>(o.object.GetObject());
337  CScope& scope = const_cast<CScope&>(o.scope.GetObject());
338 
339  string name;
340  CLabel::GetLabel(loc, &name, CLabel::eDefault, &scope);
341  name += ": WindowMasker Repeat Regions";
342 
343  CBioseq_Handle handle = scope.GetBioseqHandle(loc);
344 
345  CConn_PipeStream ps(agent_path_utf8, args, CPipe::fStdErr_Share);
346  CFastaOstream fastm(ps);
348 
349  fastm.Write(handle, &loc);
350  ps.flush();
352 
353  if (IsCanceled())
354  break;
355 
356  CRef<CSeq_loc> res_loc(new CSeq_loc());
357  ps >> MSerial_AsnText >> *res_loc;
358 
359  int exitcode;
360  EIO_Status status = ps.GetPipe().Close(&exitcode);
361  ERR_POST(
362  Info << "Command completed with status "
363  << IO_StatusStr(status) << " and exitcode " << exitcode
364  );
366 
367  SMaskResult rv;
368 
369  switch (res_loc->Which()){
371 
372  rv.name = name;
373 
374  NON_CONST_ITERATE(list< CRef<CSeq_interval> >, int_iter, res_loc->SetPacked_int().Set()){
375  CRef<CSeq_loc> pair_loc;
376  pair_loc.Reset(new CSeq_loc());
377  pair_loc->SetInt(**int_iter);
378  pair_loc->SetInt().SetId().Assign(*loc.GetId());
379 
380  rv.mask_locs.push_back(CSeqUtils::RemapChildToParent(loc, *pair_loc));
381  }
382  mask_res.push_back(rv);
383 
384  break;
385 
386  case CSeq_loc::e_Int:
387 
388  rv.name = name;
389 
390  res_loc->SetInt().SetId().Assign(*loc.GetId());
391 
392  rv.mask_locs.push_back(CSeqUtils::RemapChildToParent(loc, *res_loc));
393  mask_res.push_back(rv);
394  break;
395 
396  default:
397  continue;
398  }
399  } catch (const CException& ex) {
400  if (!IsCanceled()) ERR_POST("Failed to run windowmasker: " << ex.GetMsg());
401  }
402  }
403  }
404  else {
405  // Starting tool locally
406  string lstat_name(path.ToUTF8());
407  CSeqMasker masker(lstat_name,
408  0 /*!cmd["window"].IsEmpty() ? cmd["window"].AsInteger() : 0*/,
409  1 /* window step */,
410  1 /* unit step */,
411  0 /* !cmd["t_extend"].IsEmpty() ? cmd["t_extend"].AsInteger() : 0*/,
412  0 /*!cmd["t_thres"].IsEmpty() ? cmd["t_thres"].AsInteger() : 0*/,
413  0 /*!cmd["t_high"].IsEmpty() ? cmd["t_high"].AsInteger() : 0*/,
414  0 /*!cmd["t_low"].IsEmpty() ? cmd["t_low"].AsInteger() : 0*/,
415  0 /*!cmd["set_t_high"].IsEmpty() ? cmd["set_t_high"].AsInteger() : 0*/,
416  0 /*!cmd["set_t_low"].IsEmpty() ? cmd["set_t_low"].AsInteger() : 0*/,
417  false /* merge pass */,
418  0 /* merge cutoff */,
419  0 /* abs merge cutoff dist */,
420  0 /* mean merge cutoff dist */ ,
421  1 /* merge unit step */,
422  "mean" /* trigger */,
423  0 /* tmin count */,
424  false /* discontig */,
425  0 /* pattern */,
426  true /* use ba ?? */
427  );
428 
429  size_t count_failures = 0;
430  for(const auto& o : m_Params.GetObjects()) {
431  const CSeq_loc& loc = dynamic_cast<const CSeq_loc&>(o.object.GetObject());
432  CScope& scope = const_cast<CScope&>(o.scope.GetObject());
433 
434  if (IsCanceled())
435  break;
436 
437  string name;
438  CLabel::GetLabel( loc, &name, CLabel::eDefault, &scope );
439  name += ": WindowMasker Repeat Regions";
440 
441  CSeqVector vec( loc, scope, CBioseq_Handle::eCoding_Iupac );
442 
443  // Do the window masking
444  unique_ptr<CSeqMasker::TMaskList> mask_list( masker( vec ) );
445 
446  SMaskResult rv;
447  rv.name = name;
448 
449  // Make sequence locations (remapped)
450  CSeq_loc tmp_loc;
451  tmp_loc.SetInt().SetId().Assign( *loc.GetId() );
452  for(
453  CSeqMasker::TMaskList::const_iterator it = mask_list->begin();
454  it != mask_list->end();
455  ++it
456  ){
457  tmp_loc.SetInt().SetFrom( it->first );
458  tmp_loc.SetInt().SetTo( it->second );
459  rv.mask_locs.push_back( CSeqUtils::RemapChildToParent( loc, tmp_loc ) );
460  }
461 
462  if( rv.mask_locs.empty() ){
463  ++count_failures;
464  continue;
465  }
466  mask_res.push_back( rv );
467  }
468  }
469 
470  int output_type = m_Params.GetOutputType();
471 
472  if (output_type == 0 || output_type == 2) {
473 
474  NON_CONST_ITERATE(vector<SMaskResult>, mritr, mask_res){
475 
476  // Make corresponding features
477  CRef<CSeq_annot> annot(new CSeq_annot());
478 
479  annot->SetNameDesc(mritr->name);
481 
482  NON_CONST_ITERATE(vector<CRef<CSeq_loc> >, locitr, mritr->mask_locs) {
483  CRef<CSeq_feat> feat(new CSeq_feat());
484  feat->SetData().SetImp().SetKey("repeat_region");
485  feat->SetLocation(**locitr);
486  annot->SetData().SetFtable().push_back(feat);
487  }
488  //reply.AddObject(*annot, &project);
489 
490  /// now create a Project Item for the data
491  CRef<CProjectItem> item(new CProjectItem());
492  item->SetItem().SetAnnot(*annot);
493 
494  // TODO we need to generate title properly
495  item->SetLabel(mritr->name);
496  AddProjectItem(*item);
497  }
498  }
499 
500 
501  if (output_type == 1 || output_type == 2) {
502 
503  NON_CONST_ITERATE(vector<SMaskResult>, mritr, mask_res){
504 
505  // Make one big loc
506  CRef<CSeq_loc> big_mask_loc(new CSeq_loc());
507  NON_CONST_ITERATE(vector<CRef<CSeq_loc> >, locitr, mritr->mask_locs) {
508  big_mask_loc->Add(**locitr);
509  }
510  //reply.AddObject(*big_mask_loc, &project);
511  CRef<CProjectItem> item(new CProjectItem());
512  CSeq_id & idref = const_cast<CSeq_id &>(*big_mask_loc->GetId());
513  item->SetItem().SetId(idref);
514 
515  // TODO we need to generate title properly
516  item->SetLabel(mritr->name);
517  AddProjectItem(*item);
518  }
519  }
520 }
521 
User-defined methods of the data storage class.
CAlgoToolManagerBase This is base class for simple algorithmic tool managers.
virtual void InitUI()
override this function in a derived class and initialize extra members
string m_RegPath
registry path to the settings
wxWindow * m_ParentWindow
a window that will serve as a parent for our panels
virtual void CleanUI()
override this function in a derived class and clean extra members
void x_ConvertInputObjects(const CTypeInfo *typeInfo, map< string, TConstScopedObjects > &results)
CAlgoToolManagerParamsPanel.
virtual void SetRegistryPath(const string &reg_path)
CAlgoToolManagerParamsPanel.
CBioseq_Handle –.
CConn_PipeStream for command piping.
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void AddProjectItem(objects::CProjectItem &item)
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
CProcess –.
CScope –.
Definition: scope.hpp:92
Main interface to window based masker functionality.
Definition: seq_masker.hpp:53
CSeqVector –.
Definition: seq_vector.hpp:65
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetCreateDate(const CTime &dt)
Definition: Seq_annot.cpp:121
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
static wxString GetStdPath(void)
the applicaton installation directory.
Definition: sys_path.cpp:159
CTime –.
Definition: ncbitime.hpp:296
wxString GetPath() const
static CWinMaskerFileStorage & GetInstance()
access the application-wide singleton
CWindowMaskerJob.
CWindowMaskerJob(const CWindowMaskerParams &params)
TProcessHandle m_ProcHandle
virtual void RequestCancel()
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
CWindowMaskerParams m_Params
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
void SetObjects(TConstScopedObjects *objects)
CWindowMaskerParams & GetData()
Data access.
void SetData(const CWindowMaskerParams &data)
bool Create(wxWindow *parent, wxWindowID id=ID_CWINDOWMASKERPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
Creation.
virtual void LoadSettings()
const TConstScopedObjects & GetObjects() const
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
virtual void InitUI()
override this function in a derived class and initialize extra members
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
CWindowMaskerParams m_Params
CWindowMaskerTool()
CWindowMaskerTool.
virtual void CleanUI()
override this function in a derived class and clean extra members
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
TConstScopedObjects m_Objects
CWindowMaskerPanel * m_Panel
virtual void x_SelectCompatibleInputObjects()
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
CPipe & GetPipe(void)
Return an underlying CPipe; it's valid for as long as the stream exists.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Trace(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1179
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static CRef< objects::CSeq_loc > RemapChildToParent(const objects::CSeq_loc &parent, const objects::CSeq_loc &child, objects::CScope *scope=NULL)
remap a location to a parent location.
Definition: utils.cpp:498
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
string m_Descr
mutex to sync our internals
EDialogReturnValue NcbiMessageBoxW(const wxString &message, TDialogType type=eDialog_Ok, EDialogIcon icon=eIcon_Exclamation, const wxString &title=wxT("Error"), EDialogTextMode text_mode=eRaw)
virtual bool IsCanceled() const override
CFastMutex m_Mutex
virtual void RequestCancel() override
RequestCancel() is called to notify the Job that it shall exit Run() function ASAP.
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eDefault
Definition: label.hpp:73
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
bool IsNa(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TProcessHandle GetProcessHandle(void) const
Get the process handle for the piped child.
Definition: ncbi_pipe.cpp:2126
EIO_Status CloseHandle(EChildIOHandle handle)
Close the specified child's pipe handle (even for CPipe opened with OpenSelf()).
Definition: ncbi_pipe.cpp:1974
EIO_Status Close(int *exitcode=0)
Close pipe.
Definition: ncbi_pipe.cpp:1964
@ eStdIn
Definition: ncbi_pipe.hpp:119
@ fStdErr_Share
Keep stderr (share it with child)
Definition: ncbi_pipe.hpp:91
TPid TProcessHandle
bool Kill(unsigned long timeout=kDefaultKillTimeout)
Terminate process.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
EIO_Status
I/O status.
Definition: ncbi_core.h:132
const char * IO_StatusStr(EIO_Status status)
Get the text form of an enum status value.
Definition: ncbi_core.c:56
#define INVALID_HANDLE_VALUE
A value for an invalid file handle.
Definition: mdb.c:389
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
@ e_Int
from to
Definition: Seq_loc_.hpp:101
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
@ eIcon_Stop
Definition: types.hpp:66
@ eDialog_Ok
Definition: types.hpp:47
#define wxT(x)
Definition: muParser.cpp:41
#define NCBI_OS_MSWIN
Definition: ncbiconf_msvc.h:19
Format
Definition: njn_ioutil.hpp:52
The Object manager core.
vector< CRef< CSeq_loc > > mask_locs
static const TProcessHandle kInvalidProcHandle
USING_SCOPE(ncbi::objects)
wxString s_GetPath(int taxId)
Modified on Tue Jul 16 13:17:09 2024 by modify_doxy.py rev. 669887