NCBI C++ ToolKit
build_alnvec_job.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: build_alnvec_job.cpp 44958 2020-04-28 18:04:11Z shkeda $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
37 
39 
40 
43 
44 /*
45 CBuildAlnVecTask::CBuildAlnVecTask()
46 : m_StopRequested(false)
47 {
48 }
49 
50 
51 CBuildAlnVecTask::~CBuildAlnVecTask()
52 {
53 }
54 
55 
56 void CBuildAlnVecTask::Reset(CScope& scope)
57 {
58  CMutexGuard lock(m_Mutex);
59 
60  if(! IsRunning()) {
61  m_Scope.Reset(&scope);
62 
63  m_Aligns.clear();
64  m_AlnVec.Reset();
65 
66  m_ShadowAligns.clear();
67  m_ShadowAlnVec.Reset();
68  } else {
69  ERR_POST("CBuildAlnVecTask::Reset() - ignored, the task is running");
70  }
71 }
72 
73 
74 void CBuildAlnVecTask::SetInput(const TAligns& aligns)
75 {
76  m_Aligns = aligns;
77 }
78 
79 
80 bool CBuildAlnVecTask::IsEmpty() const
81 {
82  return ! m_AlnVec;
83 }
84 
85 
86 string CBuildAlnVecTask::GetStatusMessage()
87 {
88  return m_StatusMessage;
89 }
90 
91 
92 CRef<CAlnVec> CBuildAlnVecTask::GetResults()
93 {
94  return m_AlnVec;
95 }
96 
97 // this is a qucik-and-dirty way to support synchronous processing
98 bool CBuildAlnVecTask::RunSync()
99 {
100  x_InitWorkContext();
101  bool ok = (x_Run() == eFinished);
102  if(ok) {
103  x_TransferResults();
104  }
105  x_ClearWorkContext();
106  return ok;
107 }
108 
109 
110 void CBuildAlnVecTask::SetTaskName(const string& name)
111 {
112  CFastMutexGuard lock(m_Mutex);
113  m_TaskName = name;
114 }
115 
116 
117 void CBuildAlnVecTask::SetTaskCompleted (int completed)
118 {
119  CFastMutexGuard lock(m_Mutex);
120  m_TaskCompleted = completed;
121 }
122 
123 
124 void CBuildAlnVecTask::SetTaskTotal(int total)
125 {
126  CFastMutexGuard lock(m_Mutex);
127  m_TaskTotal = total;
128 }
129 
130 
131 bool CBuildAlnVecTask::InterruptTask()
132 {
133  return m_StopRequested;
134 }
135 
136 
137 string CBuildAlnVecTask::GetProgressMessage()
138 {
139  CFastMutexGuard lock(m_Mutex);
140 
141  string s = m_TaskName + " ";
142  s += NStr::IntToString(m_TaskCompleted) + " of ";
143  s += NStr::IntToString(m_TaskTotal);
144  return s;
145 }
146 
147 
148 CUIAsyncTask::EStatus CBuildAlnVecTask::x_Run()
149 {
150  bool ok = false;
151  if(m_ShadowAligns.size()) {
152 
153  CAlnMix mix(*m_Scope);
154 
155  size_t n_align = m_ShadowAligns.size();
156  {
157  CFastMutexGuard lock(m_Mutex);
158  m_TaskName = "Adding Seq-aligns to the alignment";
159  m_TaskTotal = n_align;
160  }
161  //LOG_POST(Info << "Adding " << n_align << " alignments to CAlnMix");
162 
163  // add all Seq-aligns to the Alignment
164  for( size_t i = 0; i < n_align; i++ ) {
165  if(m_StopRequested) {
166  return eCanceled;
167  }
168  //LOG_POST(Info << "Adding " << m_ShadowAligns[i]);
169 
170  mix.Add(*m_ShadowAligns[i]);
171  SetTaskCompleted(i + 1);
172 
173  //LOG_POST("CBuildAlnVecTask::x_Run() - added Seq-align");
174  }
175 
176  // Merge alignment
177  try {
178  //LOG_POST("CBuildAlnVecTask::x_Run() - merging alignments...");
179  mix.Merge(CAlnMix::fGapJoin);
180 
181  //LOG_POST("CBuildAlnVecTask::x_Run() - Merged");
182  m_ShadowAlnVec.Reset(new CAlnVec(mix.GetDenseg(), mix.GetScope()));
183  ok = true;
184  //LOG_POST("CBuildAlnVecTask::x_Run() - replaced Shadow AlnVec");
185  }
186  catch (CException& e) {
187  m_StatusMessage = e.GetMsg();
188  ERR_POST(Error << "CAlignDataSource::x_BuildShadowAlnVec() "
189  << e.what());
190  }
191  }
192  return ok ? eFinished : eFailed;
193 }
194 
195 
196 void CBuildAlnVecTask::x_InitWorkContext()
197 {
198  m_ShadowAligns = m_Aligns;
199  m_ShadowAlnVec.Reset();
200 }
201 
202 
203 void CBuildAlnVecTask::x_TransferResults()
204 {
205  m_AlnVec = m_ShadowAlnVec;
206 }
207 
208 
209 void CBuildAlnVecTask::x_ClearWorkContext()
210 {
211  m_ShadowAligns.clear();
212  m_ShadowAlnVec.Reset();
213 }
214 
215 
216 void CBuildAlnVecTask::x_Cancel()
217 {
218  CMutexGuard lock(m_Mutex);
219  m_StopRequested = true;
220 }
221 */
222 
223 ///////////////////////////////////////////////////////////////////////////////
224 /// CBuildAlnVecJob
225 CBuildAlnVecJob::CBuildAlnVecJob(const TAligns& aligns, CScope& scope, bool select_anchor)
226 : m_Descr("Build CAlnVec-based alignment"),
227  m_Scope(&scope),
228  m_Aligns(aligns),
229  m_TaskCompleted(0),
230  m_TaskTotal(0),
231  m_SelectAnchor(select_anchor)
232 {
233 }
234 
235 
237 {
238 }
239 
240 
242 {
243  //LOG_POST(Info << "CBuildAlnVecJob::Run() Started " << m_Descr);
244 
245  m_Result.Reset();
246  m_Error.Reset();
247 
248  string err_msg, logMsg = "CBuildAlnVecJob - exception in Run() ";
249 
250  try {
251  Build();
252 
253  } catch( CAlnException& aex ){
254  switch( aex.GetErrCode() ){
260  err_msg = "Sorry, merging of multiple alignments has failed. Alignments of this type are not supported.";
261  LOG_POST( Warning << logMsg << "\n " << aex.GetMsg() );
262  break;
264  // HACK
265  // This error happens often due to some resource is not deallocated
266  // properly and persists until the node restart. There are not so
267  // many ways to convey this information back to worker node, so we
268  // put ShutDown as the first word of the error message to be
269  // handled in src/internal/gbench/app/sviewer/graphic_cgi_lib/graphic_cgi.cpp
270  err_msg = "ShutDown " + aex.GetMsg();
271  break;
272  default:
273  err_msg = logMsg + GetDescr() + ". " +aex.GetMsg();
274  LOG_POST(Error << err_msg);
275  LOG_POST(Error << aex.ReportAll());
276  break;
277  }
278 
279  } catch( CException& e ){
280  err_msg = logMsg + GetDescr() + ". " + e.GetMsg();
281  LOG_POST(Error << err_msg);
282  LOG_POST(Error << e.ReportAll());
283  } catch (std::exception& e) {
284  err_msg = logMsg + GetDescr() + ". " + e.what();
285  LOG_POST(Error << err_msg);
286  }
287 
288  if (IsCanceled())
289  return eCanceled;
290 
291  if (!err_msg.empty()) {
292  m_Error.Reset(new CAppJobError(err_msg));
293  return eFailed;
294  }
295 
296  //LOG_POST(Info << "CBuildAlnVecJob::Run() Finished " << m_Descr);
297  return eCompleted;
298 }
299 
300 
301 
302 
304 {
305  CAppJobProgress* prg = new CAppJobProgress();
306 
307  CFastMutexGuard lock(m_Mutex);
308 
309  string s = m_TaskName + " ";
310  s += NStr::IntToString(m_TaskCompleted) + " of ";
312  prg->SetText(s);
313  prg->SetNormDone( static_cast<float>( m_TaskCompleted ) / m_TaskTotal );
314 
315  return CConstIRef<IAppJobProgress>(prg);
316 }
317 
318 
320 {
322 }
323 
324 
326 {
328 }
329 
330 
332 {
333  return m_Descr;
334 }
335 
336 
337 void CBuildAlnVecJob::SetTaskName(const string& name)
338 {
339  CFastMutexGuard lock(m_Mutex);
340  m_TaskName = name;
341 }
342 
343 
345 {
346  CFastMutexGuard lock(m_Mutex);
347  m_TaskCompleted = completed;
348 }
349 
350 
352 {
353  CFastMutexGuard lock(m_Mutex);
354  m_TaskTotal = total;
355 }
356 
357 
359 {
360  return IsCanceled();
361 }
362 
363 
365 {
366  if (!m_Aligns.size())
367  return;
368 
369  CRef<CAlnVec> aln_vec;
370  CConstRef<CSeq_id> anchor_id;
371 
372  size_t n_align = m_Aligns.size();
373 
374  if (n_align == 1 && m_Aligns[0]->GetSegs().IsDenseg()) {
375  aln_vec.Reset(new CAlnVec(m_Aligns[0]->GetSegs().GetDenseg(), *m_Scope));
376  }
377  else {
378  CAlnMix mix(*m_Scope);
379  mix.SetTaskProgressCallback(this);
380 
381  {
382  CFastMutexGuard lock(m_Mutex);
383  m_TaskName = "Adding Seq-aligns to the alignment";
384  m_TaskTotal = (int)n_align;
385  }
386 
387  // add all Seq-aligns to the Alignment
388  CConstRef<CSeq_id> ids[2];
389  int anchor_index = -1;
390  bool check_anchor = m_SelectAnchor;
391  for (size_t i = 0; i < n_align; i++) {
392 
393  if(IsCanceled())
394  return;
395  const auto& aln = *m_Aligns[i];
396  check_anchor = check_anchor && aln.CheckNumRows() == 2;
397  if (check_anchor) {
398  if (i == 0) {
399  ids[0] = ConstRef(&aln.GetSeq_id(0));
400  ids[1] = ConstRef(&aln.GetSeq_id(1));
401  } else if (anchor_index == -1) {
402  if (ids[0]->Match(aln.GetSeq_id(0))) {
403  anchor_index = 0;
404  }
405  if (ids[1]->Match(aln.GetSeq_id(1))) {
406  anchor_index = (anchor_index == 0) ? -1 : 1;
407  }
408  } else if (ids[anchor_index]->Match(aln.GetSeq_id(anchor_index)) == false) {
409  check_anchor = false;
410  anchor_index = -1;
411  }
412  }
413  mix.Add(aln);
414  SetTaskCompleted((int)(i + 1));
415  }
416 
417  if (check_anchor && anchor_index != -1)
418  swap(anchor_id, ids[anchor_index]);
419  // Merge alignments
421  aln_vec.Reset(new CAlnVec(mix.GetDenseg(), mix.GetScope()));
422  }
423 
424  {
425  CFastMutexGuard lock(m_Mutex);
427  m_Result->m_AlnVec = aln_vec;
428  }
429 
430  // preload the sequences
431  const int row_num = aln_vec->GetNumRows();
432  SetTaskName("Loading sequences... ");
433  SetTaskTotal(row_num);
434  int anchor_row = -1;
435  for (int row = 0; row < row_num; ++row) {
436  if(IsCanceled())
437  return;
438  try {
439  if (anchor_id && anchor_id->Match(aln_vec->GetSeqId(row))) {
440  if (anchor_row == -1) {
441  anchor_row = row;
442  } else {
443  // Multiple rows with the same anchor id
444  // anchor can't be calcualted
445  // reset anchor index
446  anchor_id.Reset();
447  anchor_row = -1;
448  }
449  }
450  aln_vec->GetBioseqHandle(row);
451  } catch (std::exception&) {
452  // Preloading is just for performance consideration.
453  // we simply ignore any errors if a sequence fails to load.
454  }
455 
456  SetTaskCompleted(row + 1);
457  }
458 
459  // preset the segment type to avoid blocking the main thread
460  SetTaskName("Initializing alignment segment types ... ");
461  SetTaskTotal(row_num);
462  for (int row = 0; row < row_num; ++row) {
463  if(IsCanceled())
464  return;
465  try {
466  aln_vec->GetSegType(row, 0);
467  } catch (std::exception&) {
468  // Preloading is just for performance consideration.
469  // we simply ignore any errors if any failure
470  }
471 
472  SetTaskCompleted(row + 1);
473  }
474  if (anchor_row != -1)
475  m_Result->m_AlnVec->SetAnchor(anchor_row);
476 
477 }
478 
479 
static CRef< CScope > m_Scope
USING_SCOPE(ncbi::objects)
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
Definition: alnmap.hpp:503
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
void Add(const CDense_seg &ds, TAddFlags flags=0)
Definition: alnmix.cpp:120
@ fGapJoin
Definition: alnmix.hpp:103
void Merge(TMergeFlags flags=0)
Definition: alnmix.cpp:273
const CDense_seg & GetDenseg(void) const
Definition: alnmix.cpp:295
CScope & GetScope(void) const
Definition: alnmix.hpp:167
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
Definition: alnvec.cpp:86
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CAppJobProgress Default implementation for IAppJobProgress - the class encapsulates a text message an...
virtual void Build()
virtual CRef< CObject > GetResult()
Returns the Job Result.
vector< CConstRef< objects::CSeq_align > > TAligns
CBuildAlnVecJob(const TAligns &aligns, objects::CScope &scope, bool select_anchor=false)
CBuildAlnVecJob.
CRef< CBuildAlnVecResult > m_Result
virtual ~CBuildAlnVecJob()
virtual CConstIRef< IAppJobProgress > GetProgress()
return progress object, the function shall be synchronized internally.
virtual void SetTaskCompleted(int completed)
virtual void SetTaskTotal(int total)
virtual CConstIRef< IAppJobError > GetError()
Returns IAppJobError object describing internal error that caused the Job to fail.
virtual string GetDescr() const
Returns a human readable description of the Job (optional)
CRef< objects::CScope > m_Scope
virtual bool InterruptTask()
virtual void SetTaskName(const string &name)
virtual EJobState Run()
Function that does all the useful work, called by the Engine.
CRef< CAppJobError > m_Error
CBuildAlnVecResult.
CRef< objects::CAlnVec > m_AlnVec
CScope –.
Definition: scope.hpp:92
void SetTaskProgressCallback(ITaskProgressCallback *callback)
Hook a callback to a task.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
void SetNormDone(float done)
void SetText(const string &text)
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
@ eFailed
Definition: app_job.hpp:90
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
#define row(bind, expected)
Definition: string_bind.c:73
Modified on Sat Apr 13 11:47:23 2024 by modify_doxy.py rev. 669887