NCBI C++ ToolKit
update_seq_worker.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: update_seq_worker.cpp 44179 2019-11-12 20:55:18Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  */
28 
29 
30 #include <ncbi_pch.hpp>
32 #include <objmgr/bioseq_handle.hpp>
41 
45 #include <wx/msgdlg.h>
46 
47 
50 
52  : m_Type(CSeqUpdateWorker::eUpdateNotSet)
53 {
54 }
55 
56 static string s_WarnAboutIgnoredResidues(ILineErrorListener& msg_listener)
57 {
58  string message;
59  for (size_t i = 0; i < msg_listener.Count(); ++i) {
60  const ILineError& line_err = msg_listener.GetError(i);
61  if (line_err.Problem() == ILineError::eProblem_IgnoredResidue) {
62  if (line_err.GetSeverity() == eDiag_Warning
63  && NStr::Find(line_err.ErrorMessage(), "Hyphens") != NPOS) {
64  message += ("Hyphens in the update sequence have been ignored.\n");
65  }
66  }
67  else if (line_err.Problem() == ILineError::eProblem_InvalidResidue) {
68  if (NStr::Find(line_err.ErrorMessage(), "Ignoring invalid residues") != NPOS) {
69  SIZE_TYPE pos = NStr::Find(line_err.ErrorMessage(), "On line");
70  if (pos != NPOS) {
71  message += ("Invalid character was found (and ignored):");
72  message += line_err.ErrorMessage().substr(pos + 2, NPOS);
73  message += "\n";
74  }
75  }
76  }
77  }
78 
79  return message;
80 }
81 
83 {
84  CUpdateSeq_Input updseq_in;
86  bool input_ok = false;
87  string warn_message;
88  try {
89  input_ok = updseq_in.SetOldBioseqAndScope(workbench, seh);
90  switch (m_Type) {
91  case eSingle_File:
92  {
93  CMessageListenerLenient msg_container;
94  input_ok = input_ok && updseq_in.ReadSingleUpdateSequence(&msg_container);
95  warn_message = s_WarnAboutIgnoredResidues(msg_container);
96  break;
97  }
98  case eSingle_Clipboard:
99  {
100  CMessageListenerLenient msg_container;
101  input_ok = input_ok && updseq_in.ReadUpdateSequenceFromClipboard(&msg_container);
102  warn_message = s_WarnAboutIgnoredResidues(msg_container);
103  break;
104  }
105  case eSingle_Accession:
106  input_ok = input_ok && updseq_in.ReadSingleAccession();
108  break;
109  default:
110  input_ok = false;
111  break;
112  }
113  // sequences are read at this point, but no alignment is generated for now
114 
115  if (!input_ok)
116  return;
117  }
118  catch (const CSeqUpdateException& e) {
119  NcbiMessageBox(e.GetMsg());
120  return;
121  }
122  catch (const CException& e) {
123  ERR_POST(e.what());
124  return;
125  }
126 
127  unsigned int count_bioseqs = updseq_in.CountBioseqs();
128  if (count_bioseqs > 1) {
129  string msg("You selected update single sequence but the update file contains "
130  + NStr::UIntToString(count_bioseqs) +" bioseqs. Do you want to continue and use the first record only?");
131  if (eYes != NcbiMessageBox(msg, eDialog_YesNo, eIcon_Question, "Confirm")) {
132  return;
133  }
134  }
135  if (!warn_message.empty()) {
136  NcbiWarningBox(warn_message);
137  }
138 
139  x_LaunchSingleUpdateSequence(updseq_in, params, seh, cmdProcessor);
140 }
141 
143 {
144  CUpdateSeq_Input updseq_in;
146  string warn_message;
147 
148  try {
149  bool input_ok = updseq_in.SetOldBioseqAndScope(workbench, seh);
150  if (!input_ok) return;
151  }
152  catch (const CSeqUpdateException& e) {
153  NcbiMessageBox(e.GetMsg());
154  return;
155  }
156  catch (const CException& e) {
157  ERR_POST(e.what());
158  return;
159  }
160 
162  do {
163  try {
164  if (m_Type == eSingle_File) {
165  CMessageListenerLenient msg_container;
166  bool input_ok = updseq_in.ReadSingleUpdateSequence(&msg_container);
167  warn_message = s_WarnAboutIgnoredResidues(msg_container);
168  if (!input_ok) return;
169  }
170  // sequences are read at this point, but no alignment is generated for now
171  }
172  catch (const CSeqUpdateException& e) {
173  NcbiMessageBox(e.GetMsg());
174  return;
175  }
176  catch (const CException& e) {
177  ERR_POST(e.what());
178  return;
179  }
180 
181  auto count_bioseqs = updseq_in.CountBioseqs();
182  if (count_bioseqs > 1) {
183  string msg("You selected update single sequence but the update file contains "
184  + NStr::UIntToString(count_bioseqs) + " bioseqs. Do you want to continue and use the first record only?");
185 
186  res = NcbiMessageBox(msg, eDialog_YesNo, eIcon_Question, "Confirm");
187  if (res == eNo) {
188  updseq_in.ResetUpdateSequence();
189  warn_message.clear();
190  }
191  }
192 
193  } while (res == eNo);
194 
195  if (!warn_message.empty()) {
196  NcbiWarningBox(warn_message);
197  }
198 
199  x_LaunchSingleUpdateSequence(updseq_in, params, seh, cmdProcessor);
200 }
201 
203  CUpdateSeq_Input& updseq_in,
204  SUpdateSeqParams& params,
205  const CSeq_entry_Handle& seh,
206  ICommandProccessor* cmdProcessor) const
207 {
208  const CBioseq_Handle& oldBsh = updseq_in.GetOldBioseq();
209  const CBioseq_Handle& updBsh = updseq_in.GetUpdateBioseq();
211  && updBsh.IsSetInst_Repr() && (updBsh.GetInst_Repr() == CSeq_inst::eRepr_raw)) {
212  string msg("You are about to update a delta sequence with a raw sequence, which\n will convert the delta sequence to raw. Do you want to continue?");
213  if (eYes != NcbiMessageBox(msg, eDialog_YesNo, eIcon_Question, "Confirm")) {
214  return;
215  }
216  }
218  string msg("The update sequence is a delta sequence with far pointers. Do you want to continue?");
219  if (eYes != NcbiMessageBox(msg, eDialog_YesNo, eIcon_Question, "Confirm")) {
220  return;
221  }
222  }
223 
224  try {
225  CUpdateSeq_Dlg dlg(NULL, updseq_in);
226  dlg.SetData(params);
227 
228  if (dlg.ShowModal() == wxID_OK) {
229  params = dlg.GetData();
230  if (!params.AreUpdateParamsValid()) {
231  NcbiMessageBox("Invalid options selected!");
232  return;
233  }
234  CSequenceUpdater updater(updseq_in, params);
235  bool create_general_only = edit::IsGeneralIdProtPresent(seh.GetTopLevelEntry());
236  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
237  if (cmd) {
238  cmdProcessor->Execute(cmd);
239  const string& citsub_msg = updater.GetCitSubMessage();
240  if (!citsub_msg.empty()) {
241  NcbiInfoBox(citsub_msg);
242  }
243 
244  string report;
245  CNcbiOstrstream oss;
246  updater.GetNotImportedFeatsReport(oss);
247  if (!IsOssEmpty(oss)) {
248  report = string(CNcbiOstrstreamToString(oss));
249  report.append("\n");
250  }
251  report.append(updater.GetRevCompReport());
252 
253  if (!report.empty()) {
254  CGenericReportDlg* reportdlg = new CGenericReportDlg(NULL);
255  reportdlg->SetTitle("Update Sequence Log");
256  reportdlg->SetText(ToWxString(report));
257  reportdlg->Show(true);
258  }
259  }
260  }
261  }
262  catch (const CSeqUpdateException& e) {
263  NcbiMessageBox(e.GetMsg());
264  }
265  catch (const CException& e) {
266  LOG_POST(Error << e.what());
267  NcbiMessageBox("Could not update the sequence");
268  }
269 }
270 
272 {
273  CUpdateMultipleSeq_Input multiupdseq_in;
274  multiupdseq_in.SetOldEntryAndScope(seh);
275  bool input_ok = false;
276 
277  try {
278  switch (m_Type) {
279  case eMultiple_File:
280  {
281  CMessageListenerLenient msg_container;
282  input_ok = multiupdseq_in.ReadUpdSeqs_FromFile(&msg_container);
283  x_GetSeqsWithIgnoredResidues(msg_container);
284  break;
285  }
286  case eMultiple_Clipboard:
287  {
288  CMessageListenerLenient msg_container;
289  input_ok = multiupdseq_in.ReadUpdSeqs_FromClipboard(&msg_container);
290  x_GetSeqsWithIgnoredResidues(msg_container);
291  break;
292  }
293  default:
294  input_ok = false;
295  break;
296  }
297 
298  if (!input_ok)
299  return;
300  }
301  catch (const CSeqUpdateException& e) {
302  NcbiMessageBox(e.GetMsg());
303  return;
304  }
305  catch (const CException& e) {
306  string msg = e.GetMsg();
307  msg[0] = toupper(msg[0]);
309  return;
310  }
311 
312  try {
313  CRef<CCmdComposite> update_cmd(new CCmdComposite("Update multiple sequences"));
314  CUpdateMultiSeq_Dlg dlg(NULL, multiupdseq_in, update_cmd);
315  int retcode = dlg.ShowModal();
316  if (retcode == wxOK || retcode == wxCLOSE) {
317  // execute the command of updates
318  if (update_cmd && dlg.AnyUpdated()) {
319  cmdProcessor->Execute(update_cmd);
320  dlg.ReportStats();
321  string msg = dlg.GetRevCompReport();
322  if (!msg.empty()) {
324  report->SetTitle("Update Sequence Log");
325  report->SetText(ToWxString(msg));
326  report->Show(true);
327  }
328  }
329  }
330  }
331  catch (const CSeqUpdateException& e) {
332  NcbiMessageBox(e.GetMsg());
333  }
334  catch (const CException& e) {
335  LOG_POST(Error << e.what());
336  NcbiMessageBox("Could not update the sequences");
337  }
338 }
339 
341 {
342  string hyphens_msg;
343  string invalidRes_msg;
344  string duplicateids_msg;
345 
346  for (size_t i = 0; i < msg_listener.Count(); ++i) {
347  const ILineError& line_err = msg_listener.GetError(i);
348  auto type = line_err.Problem();
349  auto msg = line_err.ErrorMessage();
350 
352  if (line_err.GetSeverity() == eDiag_Warning
353  && NStr::Find(msg, "Hyphens") != NPOS) {
354  if (NStr::Find(hyphens_msg, line_err.SeqId()) == NPOS) {
355  if (!hyphens_msg.empty()) {
356  hyphens_msg += ", ";
357  }
358  hyphens_msg += line_err.SeqId();
359  }
360  }
361  }
363  if (NStr::Find(msg, "Ignoring invalid residues") != NPOS) {
364  SIZE_TYPE pos = NStr::Find(msg, "On line");
365  if (pos != NPOS) {
366  if (!invalidRes_msg.empty()) {
367  invalidRes_msg += "\n";
368  }
369  invalidRes_msg += line_err.SeqId();
370  invalidRes_msg += msg.substr(pos + 2, NPOS);
371  }
372  }
373  }
375  if (msg.find("Seq-id") != NPOS && msg.find("is a duplicate") != NPOS) {
376  if (duplicateids_msg.find(line_err.SeqId()) == NPOS) {
377  if (!duplicateids_msg.empty()) {
378  duplicateids_msg += "\n";
379  }
380  duplicateids_msg += line_err.SeqId();
381  }
382  }
383  }
384  }
385 
386  string message;
387  if (!hyphens_msg.empty()) {
388  message += "Hyphens have been ignored in the following update sequences:\n";
389  message += hyphens_msg;
390  }
391 
392  if (!invalidRes_msg.empty()) {
393  if (!message.empty()) {
394  message += "\n";
395  }
396  message += "Invalid residues were found and ignored in the following update sequences:\n";
397  message += invalidRes_msg;
398  }
399 
400  if (!message.empty()) {
401  NcbiWarningBox(message);
402  }
403 
404  if (!duplicateids_msg.empty()) {
405  duplicateids_msg = "Duplicate sequence ids were found in the update sequence file:\n" + duplicateids_msg;
406  CGenericReportDlg* reportdlg = new CGenericReportDlg(NULL);
407  reportdlg->SetTitle("Duplicate seq-ids");
408  reportdlg->SetText(ToWxString(duplicateids_msg));
409  reportdlg->Show(true);
410  }
411 }
412 
413 
bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse)
CBioseq_Handle –.
void SetText(const wxString &text)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Sequence update exception class.
void UpdateSingleSequence_Ext(IWorkbench *workbench, const objects::CSeq_entry_Handle &tse, ICommandProccessor *cmdProcessor) const
void x_LaunchSingleUpdateSequence(CUpdateSeq_Input &updseq_in, SUpdateSeqParams &params, const objects::CSeq_entry_Handle &tse, ICommandProccessor *cmdProcessor) const
void UpdateMultipleSequences(IWorkbench *workbench, const objects::CSeq_entry_Handle &tse, ICommandProccessor *cmdProcessor)
void UpdateSingleSequence(IWorkbench *workbench, const objects::CSeq_entry_Handle &tse, ICommandProccessor *cmdProcessor) const
ESeqUpdateType m_Type
void x_GetSeqsWithIgnoredResidues(objects::ILineErrorListener &msg_listener)
CSeq_entry_Handle –.
Class responsible for executing the sequence update for one old-update sequence pair.
Definition: update_seq.hpp:57
string GetRevCompReport() const
const string & GetCitSubMessage() const
Definition: update_seq.hpp:94
void GetNotImportedFeatsReport(CNcbiOstream &out) const
CRef< CCmdComposite > Update(bool create_general_only)
Main function responsible to update the old sequence with the update sequence.
Definition: update_seq.cpp:564
const string & GetRevCompReport() const
bool ReadUpdSeqs_FromClipboard(objects::ILineErrorListener *msg_listener)
bool SetOldEntryAndScope(const objects::CSeq_entry_Handle &tse)
bool ReadUpdSeqs_FromFile(objects::ILineErrorListener *msg_listener)
void SetData(const SUpdateSeqParams &params)
SUpdateSeqParams GetData() const
Sets up the old and the update sequences, and generates the alignment between them.
bool SetOldBioseqAndScope(IWorkbench *workbench, const objects::CSeq_entry_Handle &tse)
unsigned int CountBioseqs()
bool ReadSingleAccession(void)
Reads single update sequence identified by an accession.
bool ReadSingleUpdateSequence(objects::ILineErrorListener *msg_listener)
Reads (ASN.1 or FASTA) single update sequence from a file.
static bool s_IsDeltaWithFarPointers(const objects::CBioseq &bseq)
const objects::CBioseq_Handle & GetUpdateBioseq(void) const
bool ReadUpdateSequenceFromClipboard(objects::ILineErrorListener *msg_listener)
Reads single update sequence from the clipboard. It supports FASTA and text ASN formats.
const objects::CBioseq_Handle & GetOldBioseq(void) const
static bool s_IsDeltaWithNoGaps(const objects::CBioseq &bseq)
Undo/Redo interface for editing operations.
virtual void Execute(IEditCommand *command, wxWindow *window=0)=0
virtual size_t Count() const =0
virtual const ILineError & GetError(size_t) const =0
0-based error retrieval.
@ eProblem_InvalidResidue
Definition: line_error.hpp:79
@ eProblem_IgnoredResidue
Definition: line_error.hpp:88
@ eProblem_GeneralParsingError
Definition: line_error.hpp:105
virtual EProblem Problem(void) const =0
virtual const string & SeqId(void) const =0
virtual const string & ErrorMessage() const
Definition: line_error.cpp:140
virtual EDiagSev GetSeverity(void) const
Definition: line_error.hpp:150
IWorkbench is the central interface in the application framework.
Definition: workbench.hpp:113
Stores parameters regarding the type of sequence update, on how to handle existing features and on ho...
ESequenceUpdateType m_SeqUpdateOption
bool AreUpdateParamsValid(void)
@ eSeqUpdateReplace
do not change the old sequence
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
EDialogReturnValue NcbiInfoBox(const string &message, const string &title="Info")
specialized Message Box function for reporting general information messages
void NcbiWarningBox(const string &message, const string &title="Warning")
specialized Message Box function for reporting non-critical errors
EDialogReturnValue NcbiMessageBox(const string &message, TDialogType type=eDialog_Ok, EDialogIcon icon=eIcon_Exclamation, const string &title="Error", EDialogTextMode text_mode=eRaw)
Definition: message_box.cpp:48
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
bool IsSetInst_Repr(void) const
TInst_Repr GetInst_Repr(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
bool IsOssEmpty(CNcbiOstrstream &oss)
Definition: ncbistre.hpp:831
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5111
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eIcon_Question
Definition: types.hpp:64
EDialogReturnValue
enumerated return values for dialog boxes, starting from 1 to undermine attempts to cast it "bool"
Definition: types.hpp:71
@ eCancel
Definition: types.hpp:72
@ eDialog_YesNo
Definition: types.hpp:49
int i
int toupper(Uchar c)
Definition: ncbictype.hpp:73
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: type.c:6
static string s_WarnAboutIgnoredResidues(ILineErrorListener &msg_listener)
USING_SCOPE(objects)
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
Modified on Wed Sep 04 15:03:11 2024 by modify_doxy.py rev. 669887