NCBI C++ ToolKit
merge_alignments_tool_manager.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: merge_alignments_tool_manager.cpp 47080 2022-07-22 18:11:54Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
33 
35 
37 
39 
40 #include <gui/objutils/label.hpp>
41 
43 
46 
55 
57 
60 
61 ///////////////////////////////////////////////////////////////////////////////
62 /// CMergeAlignmentsToolManager
64 : CAlgoToolManagerBase("Merge Alignments",
65  "",
66  "Combine alignments using many possible strategies",
67  "Create a pseudo-multiple alignment using many possible strategies",
68  "https://www.ncbi.nlm.nih.gov/tools/gbench/",
69  "Alignment Creation"),
70  m_ParamsPanel(NULL)
71 {
72 }
73 
74 
76 {
77  return "merge_alignments_tool_manager";
78 }
79 
80 
82 {
83  return "Merge Alignments Tool";
84 }
85 
86 
88 {
90 
92 }
93 
94 
96 {
98 
100 }
101 
102 
104 {
105  if(m_ParamsPanel == NULL) {
107 
109  m_ParamsPanel->Hide(); // to reduce flicker
112 
113  m_ParamsPanel->SetRegistryPath(m_RegPath + ".ParamsPanel");
115  }
116  return true;
117 }
118 
119 
121 {
122  if( m_Params.m_Alignments.size() < 1 ){
123  string err = "Please select at least one alignment!";
124 
126  return false;
127  }
128 
129  return true;
130 }
131 
132 
133 /// select only Seq-aligns
135 {
136  m_Params.m_Alignments.clear();
137  m_Alignments.clear();
138  ITERATE(vector<TConstScopedObjects>, it, m_InputObjects) {
139  ITERATE(TConstScopedObjects, it2, *it) {
140  const CSeq_align* align = dynamic_cast<const CSeq_align*>(it2->object.GetPointerOrNull());
141  if (align) m_Alignments.push_back(*it2);
142  }
143  }
144 
145  if (m_Alignments.empty()) {
146  x_ConvertInputObjects(CSeq_align::GetTypeInfo(), m_Alignments);
147  }
148 }
149 
150 
152 {
153  return m_ParamsPanel;
154 }
155 
156 
158 {
159  return &m_Params;
160 }
161 
162 
164 {
166  return job;
167 }
168 
169 
171 {
174 }
175 
176 
177 ///////////////////////////////////////////////////////////////////////////////
178 /// CMergeAlignmentsJob
180 : m_Params(params)
181 {
182  m_Descr = "Merging alignments"; //TODO
183 }
184 
185 
187 {
189 
190  ///
191  /// assure we're all in one scope
192  ///
193  CRef<CScope> scope;
194  {{
195  ITERATE (TConstScopedObjects, iter, aligns) {
196  if ( !scope ) {
197  scope.Reset(const_cast<CScope*>(&*iter->scope));
198  } else if (scope != &*iter->scope) {
199  scope.Reset();
201  "All alignments must be within the same project");
202  }
203  }
204  }}
205 
206  ///
207  /// do our work
208  ///
209 
210  CAlnContainer aln_container;
211 
212  ///
213  /// step 1: add to alignment container
214  ///
215  int count = 0;
216  int count_invalid = 0;
217  //bool all_pairwise = true;
218  ITERATE (TConstScopedObjects, iter, aligns) {
219 
220  try {
221  ++count;
222  CConstRef<CSeq_align> aln(dynamic_cast<const CSeq_align*>(iter->object.GetPointer()));
223 
224  ///
225  /// validation is optional!
226  aln->Validate(true);
227 
228  // if (aln->GetSegs().IsDenseg() &&
229  // aln->GetSegs().GetDenseg().GetDim() != 2) {
230  // all_pairwise = false;
231  // }
232 
233  aln_container.insert(*aln);
234  }
235  catch (CException& e) {
237  << "CMergeAlignmentsJob::x_CreateProjectItems(): "
238  << "failed to validate: " << e.GetMsg());
239  ++count_invalid;
240  }
241  }
242 
243  if (count_invalid) {
244  string msg;
245  msg += NStr::IntToString(count_invalid);
246  msg += "/";
247  msg += NStr::IntToString(count);
248  msg += " alignments failed validation.";
249  if (count_invalid == count) {
251  } else {
252  LOG_POST(Warning << msg);
253  }
254  }
255 
256  /// Types we use here:
257  typedef CSeq_align::TDim TDim;
258 
259  /// Create a vector of seq-ids per seq-align
260  TIdExtract id_extract;
261  TAlnIdMap aln_id_map(id_extract, aln_container.size());
262  size_t count_accepted = 0;
263  ITERATE(CAlnContainer, aln_it, aln_container) {
264  try {
265  aln_id_map.push_back(**aln_it);
266  ++count_accepted;
267  }
268  catch (CAlnException& e) {
270  << "CMergeAlignmentsJob::x_CreateProjectItems(): "
271  << "failed to extract IDs: " << e.GetMsg());
272  }
273  }
274 
275  if (count_accepted != aln_container.size()) {
276  if (count_accepted == 0) {
278  "No valid alignments found");
279  return;
280  }
281 
283  << count_accepted << "/" << aln_container.size()
284  << " alignments had no IDs to extract.");
285  }
286 
287 
288  ///
289  /// gather statistics about our alignment
290  ///
291  TAlnStats aln_stats(aln_id_map);
292 
293 
294  // auto-detect self-alignments
295  // if the input set of sequences correspond to one and only one sequence,
296  // force row preservation
297  // bool preserve_rows = false;
298  {{
300  ITERATE (TAlnStats::TIdVec, i, aln_stats.GetIdVec()) {
301  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle((*i)->GetSeqId());
302  ids.insert(idh);
303  }
304  // if (ids.size() == 1) {
305  // preserve_rows = true;
306  // }
307  }}
308 
309  CAlnUserOptions opts;
310 
311 
312  /// always merge both directions
315 
316  ///
317  /// create a set of anchored alignments
318  ///
319  TAnchoredAlnVec anchored_aln_vec;
320  CreateAnchoredAlnVec(aln_stats, anchored_aln_vec, opts);
321 
324 
326 
327  opts.SetMergeFlags(flags, true);
328 
329  ///
330  /// now, build
331  ///
332  CAnchoredAln out_anchored_aln;
333  BuildAln(anchored_aln_vec, out_anchored_aln, opts);
334 
335  vector< CRef<CSeq_align> > ds_aligns;
337  (out_anchored_aln.GetPairwiseAlns(), out_anchored_aln.GetAnchorRow(),
338  ds_aligns, CSeq_align::TSegs::e_Denseg);
339 
340  typedef list< CRef<CSeq_align> > TAligns;
341  TAligns aligns_out;
342 
343  NON_CONST_ITERATE (vector< CRef<CSeq_align> >, it, ds_aligns) {
344  (*it)->SetType(CSeq_align::eType_partial);
345  aligns_out.push_back(*it);
346  }
347 
348  /// fill unaligned regions
350  NON_CONST_ITERATE (TAligns, align_iter, aligns_out) {
351  CRef<CDense_seg> ds = (*align_iter)->SetSegs().SetDenseg().FillUnaligned();
352  (*align_iter)->SetSegs().SetDenseg(*ds);
353  }
354  }
355 
356  if (aligns_out.size() == 0)
357  return;
358 
359  string annot_base_name("Merged Alignment: ");
360  CAlignGroup::TAnnotList annot_list;
361 
362  CAlignGroup align_group_sorter;
363  align_group_sorter.GroupByStrand(aligns_out,
364  annot_list,
365  annot_base_name,
366  *scope);
367 
368  // now create a Project Item for the data
369 
370  ITERATE(CAlignGroup::TAnnotList, iter, annot_list) {
371  CRef<objects::CSeq_annot> annot = *iter;
372 
373  annot->SetCreateDate(CTime(CTime::eCurrent));
374 
375  // encode the name correctly
376  // we previously used the 'name' not for a temporary computation
377  // we make this the real 'name' that the object manager will understand
378 
379  string name("Merged Alignment: ");
380  CLabel::GetLabel(*annot, &name, CLabel::eDefault, &*scope);
381  if ( !name.empty() ) {
382  annot->SetNameDesc(name);
383  annot->SetTitleDesc(name);
384  }
385 
386  CRef<CProjectItem> pitem(new CProjectItem());
387 
388  pitem->SetItem().SetAnnot(*annot);
389  pitem->SetLabel(name);
390 
391  AddProjectItem(*pitem);
392  }
393 }
394 
395 
User-defined methods of the data storage class.
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
CAnchoredAln::TDim TDim
void CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises, CAnchoredAln::TDim anchor, vector< CRef< CSeq_align > > &out_seqaligns, CSeq_align::TSegs::E_Choice choice, CScope *scope=NULL)
Create seq-align from each of the pairwise alignments vs the selected anchor row.
CAlgoToolManagerBase This is base class for simple algorithmic tool managers.
CUIObject m_Descriptor
describes the Manager's UI properties
virtual void InitUI()
override this function in a derived class and initialize extra members
string m_RegPath
registry path to the settings
wxWindow * m_ParentWindow
a window that will serve as a parent for our panels
virtual void CleanUI()
override this function in a derived class and clean extra members
void x_ConvertInputObjects(const CTypeInfo *typeInfo, map< string, TConstScopedObjects > &results)
vector< TConstScopedObjects > m_InputObjects
original input objects, the tool needs to select a subset of objects that can serve as valid input
SProjectSelectorParams m_ProjectParams
CAlgoToolManagerParamsPanel.
void GroupByStrand(const TAlignList &aligns, TAnnotList &align_groups, const string &annot_base_name, objects::CScope &scope)
Group alignments into bins for each set of strands.
list< CRef< objects::CSeq_annot > > TAnnotList
Definition: align_group.hpp:56
CSeq_align container.
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
Definition: aln_tests.hpp:56
void push_back(const CSeq_align &aln)
Adding an alignment.
Definition: aln_tests.hpp:87
IAlnSeqId extracting functor.
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
Definition: aln_stats.hpp:57
_TAlnIdVec::TIdVec TIdVec
Vector of ids used in all alignments.
Definition: aln_stats.hpp:70
const TIdVec & GetIdVec(void) const
Get vector of all ids from all alignments.
Definition: aln_stats.hpp:241
Options for different alignment manager operations.
EMergeAlgo m_MergeAlgo
EDirection m_Direction
@ eMergeAllSeqs
Merge all sequences (greedy algo).
@ eQuerySeqMergeOnly
Only put the query seq on same row (input order is not significant).
@ eBothDirections
No filtering: use both direct and reverse sequences.
@ eReverse
Use only sequences whose strand is opposite to that of the anchor.
void SetMergeFlags(TMergeFlags flags, bool set)
Set/clear merge flags.
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
TDim GetAnchorRow(void) const
Which is the anchor row?
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void AddProjectItem(objects::CProjectItem &item)
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
CProjectService - a service providing API for operations with Workspaces and Projects.
CScope –.
Definition: scope.hpp:92
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
CTime –.
Definition: ncbitime.hpp:296
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
static uch flags
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service.
Definition: service.hpp:91
bool Create(wxWindow *parent, wxWindowID id=ID_CBLASTSEARCHOPTIONSPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
virtual void x_InitProjectParams()
init m_ProjectParams, in particular can select target project based on the tool input
virtual void SetRegistryPath(const string &path)
CAlgoToolManagerParamsPanel.
void SetParams(SMergeAlignmentsParams *params, TConstScopedObjects *objects)
virtual void CleanUI()
override this function in a derived class and clean extra members
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
CMergeAlignmentsJob(const SMergeAlignmentsParams &params)
CMergeAlignmentsJob.
CMergeAlignmentsToolManager()
CMergeAlignmentsToolManager.
CMergeAlignmentsParamsPanel * m_ParamsPanel
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
void x_SelectCompatibleInputObjects()
select only Seq-aligns
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
virtual void InitUI()
override this function in a derived class and initialize extra members
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
string m_Descr
mutex to sync our internals
void NcbiErrorBox(const string &message, const string &title="Error")
specialized Message Box function for reporting critical errors
virtual const string & GetLabel() const
Definition: ui_object.cpp:124
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eDefault
Definition: label.hpp:73
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
int i
USING_SCOPE(objects)
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
void SelectProjectByObjects(TConstScopedObjects &objects, CProjectService *srv)
is all objects belong to the same project - selects the project
Modified on Wed Mar 27 11:16:50 2024 by modify_doxy.py rev. 669887