NCBI C++ ToolKit
create_needleman_wunsch_tool_manager.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: create_needleman_wunsch_tool_manager.cpp 47485 2023-05-02 14:46:59Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 
35 #include <gui/objutils/label.hpp>
37 
38 #include <objects/general/Date.hpp>
45 
47 
51 
52 #include <objmgr/seq_vector.hpp>
53 #include <objmgr/util/sequence.hpp>
55 
57 
62 
63 
66 
67 
69 : CAlgoToolManagerBase("Needleman-Wunsch Alignment",
70  "",
71  "Create a Needleman-Wunsch Alignment",
72  "Generate a global alignment using the "
73  "Needleman-Wunsch algorithm",
74  "https://www.ncbi.nlm.nih.gov/tools/gbench/",
75  "Alignment Creation"),
76  m_Panel(NULL)
77 {
78 }
79 
80 
82 {
83  return "create_needleman_wunsch_tool_manager";
84 }
85 
86 
88 {
89  return "Needleman-Wunsch Tool";
90 }
91 
92 
94 {
96 
97  m_Panel = NULL;
98 }
99 
100 
102 {
103  m_Panel = NULL;
104 
106 }
107 
108 
110 {
111  if(m_Panel == NULL) {
113 
115  m_Panel->Hide(); // to reduce flicker
117 
120 
121  m_Panel->SetRegistryPath(m_RegPath + ".ParamsPanel");
123  }
124  return true;
125 }
126 
127 
128 /// select only Seq-aligns
130 {
131  x_ConvertInputObjects(CSeq_loc::GetTypeInfo(), m_Objects);
132 }
133 
134 
136 {
137  return m_Panel;
138 }
139 
140 
142 {
143  return &m_Params;
144 }
145 
146 
148 {
149  if (m_Panel) m_Params = m_Panel->GetData();
151  return job;
152 }
153 
154 
155 
156 
157 ///////////////////////////////////////////////////////////////////////////////
158 /// CCreateNeedlemanWunschJob
159 
161 : m_Params(params), m_Done(0)
162 {
163  m_Descr = "Creating alignments"; //TODO
164 }
165 
167 {
169  reinterpret_cast<CCreateNeedlemanWunschJob*>(info->m_data);
170  if ( !job ) {
171  return false;
172  }
173 
174  char buf[128];
175  float pct_done = (100.0f / info->m_iter_total) * info->m_iter_done;
176  sprintf( buf, "%2.0lf %% completed", pct_done);
177 
178  return job->ProgressCallback(buf, pct_done);
179 }
180 
181 bool CCreateNeedlemanWunschJob::ProgressCallback(const string& status, float done)
182 {
183  if (IsCanceled())
184  return true;
185 
186  CFastMutexGuard lock(m_Mutex);
187 
188  m_Status = status;
189  m_Done = done;
190  return false;
191 }
192 
194 {
195  CFastMutexGuard lock(m_Mutex);
197  return pr;
198 }
199 
201 {
203  if (locs.size() != 2) {
205  "Algorithm acepts exactly two sequences");
206  }
207 
208  // make sure that the sequences are of a known type; fetch the sequences
209  CRef<CScope> new_scope;
210  vector<string> seqs;
211  vector<string> seq_labels;
212 
213  typedef vector< CConstRef<CSeq_id> > TIds;
214  TIds seq_ids;
215  const SNCBIPackedScoreMatrix* scoremat = 0;
216 
217  NON_CONST_ITERATE (TConstScopedObjects, loc_iter, locs) {
218  const CSeq_loc& loc = dynamic_cast<const CSeq_loc&>(loc_iter->object.GetObject());
219  CScope& scope = loc_iter->scope.GetObject();
220  if ( !new_scope ) {
221  new_scope.Reset(&scope);
222  }
223 
224  if ( !sequence::IsOneBioseq(loc, &scope) ) {
225  string str;
226  CLabel::GetLabel(loc, &str, CLabel::eDefault, &scope);
227  LOG_POST(Info << "CAlgoPlugin_NeedlemanWunsch: "
228  "location on multiple bioseqs ignored: " << str);
229  continue;
230  }
231 
232  CBioseq_Handle handle =
233  scope.GetBioseqHandle(sequence::GetId(loc, &scope));
234 
235  CSeqVector vec(loc, scope, CBioseq_Handle::eCoding_Iupac);
236 
237  // save our sequence
238  seqs.push_back(string());
239  vec.GetSeqData(0, vec.size(), seqs.back());
240  NStr::ToUpper(seqs.back());
241 
242  // save a label for this sequence
243  string loc_str;
244  CLabel::GetLabel(loc, &loc_str, CLabel::eDefault, &scope);
245  seq_labels.push_back(loc_str);
246 
247  // save the gi for this sequence
248  seq_ids.push_back(CConstRef<CSeq_id>(handle.GetSeqId()));
249 
250  scoremat = vec.IsNucleotide() ? 0: &NCBISM_Blosum62;
251  }
252 
253  const size_t band = m_Params.GetBandSize();
254  bool use_myers_miller = false;
255  if(band == 0) {
256  const size_t nw_limit = 200*1024*1024;
257  vector<string>::const_iterator iter_seqs = seqs.begin();
258  double dim_square = (iter_seqs++)->length();
259  dim_square *= iter_seqs->length();
260  use_myers_miller = dim_square > nw_limit;
261  }
262 
263  //
264  // main algorithm
265  //
266  string output;
267 
268  const char* seq1 = seqs[0].c_str(), * seq2 = seqs[1].c_str();
269  size_t dim1 = seqs[0].size(), dim2 = seqs[1].size();
270 
271  unique_ptr<CNWAligner> aligner;
272  if(band > 0) {
273 
274  aligner.reset(new CBandAligner (seq1, dim1, seq2, dim2,
275  scoremat, band));
276  }
277  else {
278 
279  if(use_myers_miller) {
280 
281  aligner.reset(new CMMAligner (seq1, dim1, seq2, dim2,
282  scoremat));
283 
284  LOG_POST( Info << "CAlgoPlugin_NeedlemanWunsch: "
285  "Using Myers-Miller method");
286 
287  }
288  else {
289 
290  aligner.reset(new CNWAligner (seq1, dim1, seq2, dim2,
291  scoremat));
292  }
293  }
294 
295  if(scoremat == 0) {
296  aligner->SetWm (m_Params.GetMatchCost());
297  aligner->SetWms(m_Params.GetMismatchCost());
298  aligner->SetScoreMatrix(NULL);
299  }
300 
301  aligner->SetWg (m_Params.GetGapOpenCost());
302  aligner->SetWs (m_Params.GetGapExtendCost());
303 
304  // end-space free alignment setup
313  aligner->SetEndSpaceFree(left1, right1, left2, right2);
314 
315  aligner->SetProgressCallback(s_ProgressCallback, this);
316  aligner->Run();
317 
318  if (IsCanceled())
319  return;
320 
321  // create a seq-align structure for our alignment
322  CNWFormatter formatter (*aligner);
323  CRef<CSeq_align> align =
324  formatter.AsSeqAlign(0, eNa_strand_plus, 0, eNa_strand_plus);
325 
326  // we need to set the IDs correctly, and remap our alignment
327  list< CRef<CSeq_loc> > from_locs;
328  ITERATE (TConstScopedObjects, loc_iter, locs) {
329  const CSeq_loc& loc = dynamic_cast<const CSeq_loc&>(loc_iter->object.GetObject());
330 
331  CRef<CSeq_loc> ref(new CSeq_loc());
332  switch (loc.Which()) {
333  case CSeq_loc::e_Whole:
334  ref->Assign(loc);
335  break;
336 
337  default:
338  ref->SetInt().SetFrom(0);
339  ref->SetInt().SetTo(loc.GetTotalRange().GetLength());
340  ref->SetId(sequence::GetId(loc, NULL));
341  break;
342  }
343  from_locs.push_back(ref);
344  }
345 
346  TIds::iterator seq_id_iter = seq_ids.begin();
348  align->SetSegs().SetDenseg().SetIds()) {
349  (*iter)->Assign(**seq_id_iter++);
350 
351  }
352 
353  /// remap the alignment
354  list< CRef<CSeq_loc> >::const_iterator iter1 = from_locs.begin();
355  NON_CONST_ITERATE (TConstScopedObjects, loc_iter, locs) {
356  const CSeq_loc& loc = dynamic_cast<const CSeq_loc&>(loc_iter->object.GetObject());
357  CScope& scope = loc_iter->scope.GetObject();
358 
359  CRef<CSeq_loc> ref = *iter1++;
360  if (loc.IsWhole()) {
361  continue;
362  }
363 
364  CSeq_loc_Mapper mapper(*ref, loc, &scope);
365  align = mapper.Map(*align);
366  }
367 
368  // pack the alignment in a Seq-annot and label it appropriately
369  CRef<CSeq_annot> annot(new CSeq_annot());
370  annot->SetData().SetAlign().push_back(align);
371 
372  // prepare a title
373  string title;
374  ITERATE (vector<string>, iter, seq_labels) {
375  if ( !title.empty() ) {
376  title += " + ";
377  }
378  title += *iter;
379  }
380 
381  title = "Global alignment of " + title;
382  CTime time(CTime::eCurrent);
383  annot->SetNameDesc(title);
384  annot->SetCreateDate(time);
385 
386  string str;
387  str = "This alignment was produced on ";
388  str += time.AsString();
389  str += " using the Needleman-Wunsch alignment algorithm";
390  annot->AddComment(str);
391 
392  CRef<CAnnotdesc> desc(new CAnnotdesc());
393  desc->SetCreate_date().SetStr(time.AsString());
394  annot->SetDesc().Set().push_back(desc);
395 
396  CRef<CProjectItem> item(new CProjectItem());
397  item->SetItem().SetAnnot(*annot);
398  item->SetLabel(title);
399  AddProjectItem(*item);
400 }
401 
403 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAlgoToolManagerBase This is base class for simple algorithmic tool managers.
virtual void InitUI()
override this function in a derived class and initialize extra members
string m_RegPath
registry path to the settings
wxWindow * m_ParentWindow
a window that will serve as a parent for our panels
virtual void CleanUI()
override this function in a derived class and clean extra members
void x_ConvertInputObjects(const CTypeInfo *typeInfo, map< string, TConstScopedObjects > &results)
CAlgoToolManagerParamsPanel.
void SetObjects(TConstScopedObjects *objects)
CNeedlemanWunschToolParams & GetData()
virtual void SetRegistryPath(const string &path)
CAlgoToolManagerParamsPanel.
void SetData(const CNeedlemanWunschToolParams &data)
bool Create(wxWindow *parent, wxWindowID id=ID_CALIGNNEEDLEMANWUNSCHPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
CAnnotdesc –.
Definition: Annotdesc.hpp:66
CAppJobProgress Default implementation for IAppJobProgress - the class encapsulates a text message an...
CBioseq_Handle –.
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void AddProjectItem(objects::CProjectItem &item)
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetCreateDate(const CTime &dt)
Definition: Seq_annot.cpp:121
void AddComment(const string &comment)
Definition: Seq_annot.cpp:113
CSeq_loc_Mapper –.
CTime –.
Definition: ncbitime.hpp:296
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
USING_SCOPE(objects)
static bool s_ProgressCallback(CNWAligner::SProgressInfo *info)
static SQLCHAR output[256]
Definition: print.c:5
static const char * str(char *buf, int n)
Definition: stats.c:84
CRef< objects::CSeq_align > AsSeqAlign(TSeqPos query_start=0, objects::ENa_strand query_strand=objects::eNa_strand_plus, TSeqPos subj_start=0, objects::ENa_strand subj_strand=objects::eNa_strand_plus, int SAFF_flags=eSAFF_None) const
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
virtual CConstIRef< IAppJobProgress > GetProgress()
implement Run() in derived classes, make sure that exceptions are handled and m_StopRequested flag is...
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
bool ProgressCallback(const string &status, float done)
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
CCreateNeedlemanWunschJob(const CNeedlemanWunschToolParams &params)
CCreateNeedlemanWunschJob.
virtual void InitUI()
override this function in a derived class and initialize extra members
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
virtual void CleanUI()
override this function in a derived class and clean extra members
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
string m_Descr
mutex to sync our internals
virtual bool IsCanceled() const override
CFastMutex m_Mutex
string m_Status
Current status of the Job.
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eDefault
Definition: label.hpp:73
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsNucleotide(void) const
Definition: seq_vector.hpp:357
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
position_type GetLength(void) const
Definition: range.hpp:158
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
Definition: ncbitime.cpp:1512
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
TStr & SetStr(void)
Select the variant.
Definition: Date_.hpp:313
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TCreate_date & SetCreate_date(void)
Select the variant.
Definition: Annotdesc_.cpp:212
char * buf
static MDB_envinfo info
Definition: mdb_load.c:37
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92
done
Definition: token1.c:1
Modified on Fri May 10 16:24:39 2024 by modify_doxy.py rev. 669887