1 /* $Id: msa_tool_job.cpp 47464 2023-04-20 00:19:10Z evgeniev $
2 * ===========================================================================
3 *
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Roman Katargin, Vladislav Evgeniev
27 */
30 #include <ncbi_pch.hpp>
32 #include <corelib/ncbifile.hpp>
33 #include <corelib/ncbiexec.hpp>
34 #include <objmgr/seq_vector.hpp>
35 #include <objmgr/util/sequence.hpp>
38 #include <serial/iterator.hpp>
45 #include <gui/objutils/label.hpp>
52 #include <wx/filename.h>
53 #include <wx/utils.h>
54 #include <wx/msgdlg.h>
59 /*
60 TODO:
61 IdMap is used now only for tree remapping.
62 It seems more effective to store labels there, not SeqLocs.
63 It seems the most effective to retrieve this mapping
64 from CMappingRanges or from CSeq_loc_Mapper. Probably
65 CRef<CSeq_loc> Map(const CSeq_loc& src_loc) can be used.
66 */
73 {
76  : idmap(NULL)
77  {
78  }
81  {
82  _ASSERT(idmap);
83  string& label = node.GetValue().SetLabel();
84  if (label.empty()) {
85  return eTreeTraverse;
86  }
87  TIdMap::iterator id_iter = idmap->find(label);
88  if (id_iter == idmap->end()) {
89  return eTreeTraverse;
90  }
92  TLocPair p = id_iter->second;
93  CScope& scope = *p.second;
94  const CSeq_id& real_id =
95  sequence::GetId(*p.first, &scope);
96  label.erase();
97  CLabel::GetLabel(real_id, &label, CLabel::eDefault, &scope);
98  return eTreeTraverse;
99  }
100 };
103  CNcbiOstream& ostr,
104  TConstScopedObjects& locations,
105  TSeqTypeMap& seq_types,
106  CMappingRanges& ranges,
107  map<string, TLocPair>& idmap
108  ){
109  int dump_count = 0;
111  NON_CONST_ITERATE(TConstScopedObjects, iter, locations){
112  const CSeq_loc* loc = dynamic_cast<const CSeq_loc*>(iter->object.GetPointer());
113  CScope* scope = iter->scope;
115  string loc_id = "seq-" + NStr::SizetToString(++dump_count);
116  idmap["lcl|" + loc_id] = make_pair(CConstRef<CSeq_loc>(loc), CRef<CScope>(scope));
118  ostr << ">" << "lcl|" << loc_id << endl;
120  CSeqVector vec(*loc, *scope, CBioseq_Handle::eCoding_Iupac);
121  string data;
122  vec.GetSeqData(0, vec.size(), data);
123  ostr << data << endl;
125  CBioseq_Handle handle = scope->GetBioseqHandle(*loc->GetId());
127  CRef<CSeq_id> seq_id(new CSeq_id(CSeq_id::e_Local, loc_id));
128  CSeq_id_Handle local_sihd = CSeq_id_Handle::GetHandle(*seq_id);
129  seq_types[local_sihd] = handle.IsNucleotide();
131  CSeq_id_Handle base_sihd = CSeq_id_Handle::GetHandle(*loc->GetId());
132  seq_types[base_sihd] = handle.IsNucleotide();
134  int trcf = handle.IsNucleotide() ? 1 : 3;
136  // prepare for final mapping
137  ranges.AddConversion(
138  local_sihd,
139  0 * trcf, sequence::GetLength(*loc, scope) *trcf,
141  base_sihd,
142  sequence::GetStart(*loc, scope) *trcf,
144  );
145  }
146 }
149 ///////////////////////////////////////////////////////////////////////////////
150 /// CMSAToolJob
151 CMSAToolJob::CMSAToolJob(const wxString &tool_name)
152  : m_PId(0),
153  m_ToolName(tool_name)
154 {
156 }
159 {
160  /// we serialize this through a set of temporary files
161  /// we serialize this through a set of temporary files
162  m_TmpIn = wxFileName::CreateTempFileName(wxT("in"));
163  m_TmpOut = wxFileName::CreateTempFileName(wxT("out"));
165  bool is_nuc = true;
167  /// scoped to make sure we flush and close our file before we start!
168  {{
169  CNcbiOfstream ostr(m_TmpIn.fn_str());
172  wxT("Preparing data for alignment..."));
174  bool ors = false, ands = true;
175  ITERATE( TSeqTypeMap, iter, m_SeqTypes ){
176  ors = ors || iter->second;
177  ands = ands && iter->second;
178  }
180  if( ands != ors ){
181  // we have mix, quit
182  wxMessageBox(
183  m_ToolName + wxT(" tool: All input sequences must be either DNA or protein."),
184  wxT("Error"), wxOK | wxICON_ERROR
185  );
186  return false;
187  }
189  is_nuc = ors;
190  }}
193  LOG_POST(Info << "Launching " << m_ToolName << " executabe:");
194  LOG_POST(Info << m_CmdLine);
196  wxString working_dir = x_GetWorkingDirectory();
197  if (working_dir.IsEmpty()) {
198  m_PId = ::wxExecute(m_CmdLine);
199  }
200  else {
201  wxExecuteEnv env;
202  env.cwd = working_dir;
203  m_PId = ::wxExecute(m_CmdLine, wxEXEC_ASYNC, nullptr, &env);
204  }
205  if (m_PId <= 0) {
206  string error("Failed to launch ");
207  error += m_ToolName;
208  error += " executable.";
210  }
212  return true;
213 }
216 {
217  if (m_PId <= 0) {
218  NCBI_THROW(CException, eUnknown, string(m_ToolName + " not launched!"));
219  }
221  while (true) {
222  ::wxSleep(3);
224  if (IsCanceled()) {
225  ::wxKill(m_PId, wxSIGKILL);
226  return;
227  }
229  ncbi::CProcess proc(static_cast<int>(m_PId), ncbi::CProcess::ePid);
230  if (!proc.IsAlive()) {
231  break;
232  }
233  }
235  wxULongLong size = wxFileName::GetSize(m_TmpOut);
236  if (size == 0 || size == wxInvalidSize) {
237  NCBI_THROW(CException, eUnknown, string(m_ToolName + " executabale failed to produce results."));
238  }
240  string err_msg;
241  try {
242  CNcbiIfstream istr(m_TmpOut.fn_str());
243  CStreamLineReader stream_line(istr);
244  CFastaReader fasta_reader(stream_line, CFastaReader::fParseGaps);
246  /// -1 = multiple alignment
247  CRef<CSeq_entry> entry = fasta_reader.ReadAlignedSet(-1);
248  CRef<CSeq_align> align;
249  CTypeIterator<CSeq_align> iter(*entry);
250  if (iter) {
251  align.Reset(&*iter);
252  ++iter;
253  if (iter) {
254  LOG_POST(Error << "CMSAToolJob::x_CreateProjectItems(): more than one alignment!");
255  }
256  }
258  ///
259  /// perform any necessary remappings
260  ///
261  CSeq_loc_Mapper mapper( &m_Ranges );
262  ITERATE( TSeqTypeMap, st_itr, m_SeqTypes ){
263  mapper.SetSeqTypeById(
264  st_itr->first,
266  );
267  }
269  //- translate locals to proper ids
270  //- translate coords to proper offsets
271  align = mapper.Map( *align );
273  ///
274  /// create an annotation to hold our results
275  CRef<CSeq_annot> annot(new CSeq_annot());
276  annot->SetData().SetAlign().push_back(align);
278  string title;
280  string align_title(m_ToolName + " alignment: ");
281  align_title += title;
282  annot->SetNameDesc(align_title);
285  string comment = "Generated by ";
286  comment += m_ToolName;
287  comment += " tool with command line : \n";
288  comment += m_CmdLine.ToUTF8();
290  if (annot) {
291  CRef<CProjectItem> item(new CProjectItem());
292  item->SetItem().SetAnnot(*annot);
293  item->SetLabel(align_title);
294  CRef<CAnnotdesc> descr(new CAnnotdesc());
295  descr->SetComment(comment);
296  item->SetDescr().push_back(descr);
297  AddProjectItem(*item);
298  }
300  if (!m_TmpTreeOut.empty())
301  x_AddTreeProjectItem(title, comment);
303  }
304  catch (CException& e) {
305  err_msg = "Failed to generate alignment:\n" + e.GetMsg();
306  }
307  catch (std::exception& e) {
308  err_msg = "Failed to generate alignment:\n" + string(e.what());
309  }
311  if (!err_msg.empty())
312  NCBI_THROW(CException, eUnknown, err_msg);
313 }
315 void CMSAToolJob::x_AddTreeProjectItem(const string &title, const string &comment)
316 {
317  if (m_TmpTreeOut.empty())
318  return;
320  ///
321  /// read the tree, if we created one
322  ///
325  try {
326  CNcbiIfstream tree_istr(m_TmpTreeOut.fn_str());
327  unique_ptr<TPhyTreeNode> tree(ReadNewickTree(tree_istr));
329  /// we need to convert the labels to the correct seq-id strings
330  STreeNodeMapper mapper;
331  mapper.idmap = &m_IdMap;
332  TreeDepthFirstTraverse(*tree, mapper);
334  /// convert to the serializable form
335  btc = MakeBioTreeContainer(tree.get());
337  /// adding seq-id prop
338  const int kLabelId = 0;
339  const int kSeqIdId = 2;
341  CRef<CFeatureDescr> feat_descr(new CFeatureDescr);
342  feat_descr->SetId(kSeqIdId);
343  feat_descr->SetName("seq-id");
344  btc->SetFdict().Set().push_back(feat_descr);
346  NON_CONST_ITERATE(CNodeSet::Tdata, node, btc->SetNodes().Set()) {
347  if (!(*node)->CanGetFeatures()) {
348  continue;
349  }
351  (*node)->SetFeatures().Set()) {
352  if ((*node_feature)->GetFeatureid() == kLabelId) {
353  CRef<CNodeFeature> id_node_feature(new CNodeFeature);
354  id_node_feature->SetFeatureid(kSeqIdId);
355  id_node_feature->SetValue((*node_feature)->GetValue());
356  (*node)->SetFeatures().Set().push_back(id_node_feature);
357  }
358  }
359  }
360  }
361  catch (const CException& e) {
363  << "Failed to read phylogenetic tree:\n"
364  << e.GetMsg()
365  << "\nPhylogenetic tree output will not be available.");
366  btc.Reset();
367  m_TmpTreeOut.erase();
368  }
369  catch (const std::exception& e) {
371  << "Failed to read phylogenetic tree:\n"
372  << e.what()
373  << "\nPhylogenetic tree output will not be available.");
374  btc.Reset();
375  m_TmpTreeOut.erase();
376  }
378  if (!btc)
379  return;
381  string tree_title = "Phylogenetic tree: " + title;
382  btc->SetTreetype(tree_title);
384  CRef<CProjectItem> item(new CProjectItem());
385  item->SetItem().SetOther().Set(*btc);
386  item->SetLabel(tree_title);
387  CRef<CAnnotdesc> descr(new CAnnotdesc());
388  descr->SetComment(comment);
389  item->SetDescr().push_back(descr);
390  AddProjectItem(*item);
391 }
394 {
395  if (!m_TmpIn.empty())
396  ::wxRemoveFile(m_TmpIn);
397  if (!m_TmpOut.empty())
398  ::wxRemoveFile(m_TmpOut);
399  if (!m_TmpTreeOut.empty())
400  ::wxRemoveFile(m_TmpTreeOut);
401 }
