NCBI C++ ToolKit
splign_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: splign_tool.cpp 47479 2023-05-02 13:24:02Z ucko $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Roman Katargin
27 *
28 */
29 
30 #include <ncbi_pch.hpp>
31 
34 
35 
37 
38 #include <serial/typeinfo.hpp>
39 
41 #include <objmgr/seqdesc_ci.hpp>
42 #include <objects/seq/MolInfo.hpp>
43 
45 #include <gui/objutils/label.hpp>
46 
50 
51 #include <wx/msgdlg.h>
52 
53 //#include <algo/align/splign/splign_simple.hpp>
54 
57 USING_SCOPE(blast);
58 
59 
60 ///////////////////////////////////////////////////////////////////////////////
61 /// CSplignTool
63 : CAlgoToolManagerBase("SPLIGN (SPliced aLIGNment)",
64  "",
65  "SPLIGN (SPliced aLIGNment)",
66  "Compute cDNA-to-genomic spliced alignments",
67  "https://www.ncbi.nlm.nih.gov/tools/gbench/tutorial2/",
68  "Alignment Creation"),
69  m_Panel()
70 {
71 }
72 
74 {
75  return "splign_tool";
76 }
77 
78 
80 {
81  return "SPLIGN Tool";
82 }
83 
85 {
87 
88  m_Panel = NULL;
89 }
90 
92 {
93  m_Panel = NULL;
95 }
96 
98 {
99  if (m_Panel == NULL) {
102  // canceled, or no input objects were found
103  return false;
104  }
105 
107  wxDefaultPosition, wxSize(0, 0), SYMBOL_CSPLIGNPANEL_STYLE, false);
110 
111  m_Panel->SetRegistryPath(m_RegPath + ".ParamsPanel");
113  }
114  return true;
115 }
116 
118 {
119  return true;
120 }
121 
123 {
125  m_NAObjects.clear();
126 
127  TObjects results;
128  x_ConvertInputObjects (CSeq_loc::GetTypeInfo(), results);
129  if (results.empty()) {
130  return;
131  }
132  NON_CONST_ITERATE(TObjects, it, results) {
133  TConstScopedObjects genomic, na;
134  NON_CONST_ITERATE(TConstScopedObjects, it2, it->second) {
135  const CSeq_loc* loc = dynamic_cast<const CSeq_loc*>(it2->object.GetPointer());
136  if( !loc ) continue;
137 
138  if( !loc->IsInt() && !loc->IsWhole() ) continue;
139 
140  CBioseq_Handle handle = it2->scope->GetBioseqHandle(*loc->GetId());
141 
142  const CMolInfo* molInfo = 0;
143  CSeqdesc_CI desc_iter(handle, CSeqdesc::e_Molinfo);
144  if (desc_iter) molInfo = &desc_iter->GetMolinfo();
145  if (molInfo) {
146  CMolInfo::TBiomol biomol = molInfo->GetBiomol();
147  switch(biomol) {
149  genomic.push_back(*it2);
150  break;
152  genomic.push_back(*it2);
153  na.push_back(*it2);
154  break;
156  break;
157  default :
158  na.push_back(*it2);
159  break;
160  }
161  }
162  else {
163  genomic.push_back(*it2);
164  na.push_back(*it2);
165  }
166  }
167 
168  if (!genomic.empty()) {
169  TConstScopedObjects& vec = m_GenomicObjects[it->first];
170  copy(genomic.begin(), genomic.end(), back_inserter(vec));
171  }
172  if (!na.empty()) {
173  TConstScopedObjects& vec = m_NAObjects[it->first];
174  copy(na.begin(), na.end(), back_inserter(vec));
175  }
176  }
177 
178 }
179 
181 {
182  CSplignPanel* panel = new CSplignPanel();
183  panel->Create(m_ParentWindow);
184  return panel;
185 }
186 
188 {
189  return m_Panel;
190 }
191 
193 {
194  return &m_Params;
195 }
196 
197 ///////////////////////////////////////////////////////////////////////////////
198 /// CSplignJob
200 {
201 public:
202  CSplignJob (const CSplignParams& params,
203  const TConstScopedObjects& genomic,
204  const TConstScopedObjects& cdna);
207 
208 protected:
209  virtual void x_CreateProjectItems(); // overriding virtual function
210 
211 protected:
215 };
216 
218 {
219  if (prog && prog->m_data) {
220  CSplignJob* job =
221  reinterpret_cast<CSplignJob*>(prog->m_data);
222  if(job->IsCanceled()) {
223  return true;
224  }
225  }
226  return false;
227 }
228 
230 {
231  if (prog && prog->user_data) {
232  CSplignJob* job =
233  reinterpret_cast<CSplignJob*>(prog->user_data);
234  if(job->IsCanceled()) {
235  return TRUE;
236  }
237  }
238  return FALSE;
239 }
240 
241 
243 {
244  TConstScopedObjects genomic, cdna;
245 
246  if (m_Panel) {
247  m_Params = m_Panel->GetData();
248  cdna = m_Panel->GetCDNASeqs();;
249  genomic.push_back(m_Panel->GetGenomicSeq());
250  }
251  else {
253  TConstScopedObjects& vec = m_GenomicObjects[it->first];
254  copy(vec.begin(), vec.end(), back_inserter(genomic));
255  }
257  TConstScopedObjects& vec = m_NAObjects[it->first];
258  copy(vec.begin(), vec.end(), back_inserter(cdna));
259  }
260  }
261 
262  #ifdef _DEBUG
263  m_Params.DebugDumpText( cerr, "Splign Options", 0 );
264  #endif
265 
266  CSplignJob* job = new CSplignJob(m_Params, genomic, cdna);
267  return job;
268 }
269 
271 {
273 
274  if (m_GenomicObjects.empty()) {
275  wxMessageBox(wxT("No genomic objects sequences can be proccessed were selected."),
276  wxT("SPLIGN Tool Error"), wxOK | wxICON_ERROR);
277  return 0;
278  }
279 
280  if (m_NAObjects.empty()) {
281  wxMessageBox(wxT("No cDNA/Transcript sequences that can be proccessed were selected."),
282  wxT("SPLIGN Tool Error"), wxOK | wxICON_ERROR);
283  return 0;
284  }
285 
287 
288  vector<CProjectService::TProjectId> projects;
289 
290  CProjectService* prj_srv =
292  {{
295  TConstScopedObjects& vec = m_GenomicObjects[it->first];
296  copy(vec.begin(), vec.end(), back_inserter(objects));
297  }
299  TConstScopedObjects& vec = m_NAObjects[it->first];
300  copy(vec.begin(), vec.end(), back_inserter(objects));
301  }
302 
303  prj_srv->GetObjProjects(objects, projects);
304  }}
305 
306  CSelectProjectOptions options;
307  if (projects.empty())
308  options.Set_CreateNewProject();
309  else
310  options.Set_AddToExistingProject(projects.front());
311 
312  CRef<CDataLoadingAppTask> task(new CDataLoadingAppTask(prj_srv, options, *job));
313  return task.Release();
314 }
315 
316 ///////////////////////////////////////////////////////////////////////////////
317 /// CSplignJob
319  const TConstScopedObjects& genomic,
320  const TConstScopedObjects& cdna)
321  : m_Params(params), m_Genomic(genomic), m_cDNA(cdna)
322 {
323  m_Descr = "SPLIGN Job"; //TODO
324 }
325 
327 {
329  const CSeq_loc& seqloc_genomic =
330  dynamic_cast<const CSeq_loc&>(it->object.GetObject());
331 
332  vector<string> not_found_labels;
333 
335  if (IsCanceled()) return;
336 
337  const CSeq_loc& seqloc_cdna = dynamic_cast<const CSeq_loc&>(iter->object.GetObject());
338  CScope& scope = *iter->scope;
339 
340  CBl2Seq blast(
341  SSeqLoc(seqloc_cdna, scope),
342  SSeqLoc(seqloc_genomic, scope),
343  eMegablast
344  );
345 
346  CConstRef<objects::CSeq_id> transcript_id( &sequence::GetId( seqloc_cdna, &scope ) );
347  CConstRef<objects::CSeq_id> genomic_id( &sequence::GetId( seqloc_genomic, &scope ) );
348 
349  //true for EST, false for mrna
350  const bool query_low_quality = ( m_Params.GetDiscType() == 0 );
351 
352  CSplign splign;
353  splign.SetAligner() = CSplign::s_CreateDefaultAligner( query_low_quality );
354  splign.SetAligner()->SetSpaceLimit( numeric_limits<Uint4>::max() );
355  splign.SetScope().Reset(&scope);
356  splign.PreserveScope();
357 
360  splign.SetStrand( m_Params.GetCDNAStrand() != 1 );
364  splign.SetMaxIntron( m_Params.GetMaxIntron() );
365 
366  /// NB: these two values should be the same, if min signleton identity is not
367  /// exposed for user modification
370 
371  // set progress interrupt callbacks for Blast and NWAligner
372  {
373  splign.SetAligner()->SetProgressCallback( s_SplignInterruptCallback, this );
375  }
376 
377  //splign_simple.Run();
378 
379  TSeqAlignVector blres( blast.Run() );
380 
381  CSplign::THitRefs hitrefs;
382  ITERATE( TSeqAlignVector, ii, blres ){
383  if( (*ii)->IsSet() ){
384  const CSeq_align_set::Tdata &sas0 = (*ii)->Get();
385  ITERATE( CSeq_align_set::Tdata, sa_iter, sas0 ){
386  CSplign::THitRef hitref( new CSplign::THit(**sa_iter) );
387  if( hitref->GetQueryStrand() == false ){
388  hitref->FlipStrands();
389  }
390  hitrefs.push_back( hitref );
391  }
392  }
393  }
394 
395  if( hitrefs.size() ){
396  splign.Run( &hitrefs );
397  }
398 
399  CSplignFormatter sf( splign );
400  sf.SetSeqIds( transcript_id, genomic_id );
401  CRef<CSeq_align_set> seqaln_set(
403  );
404 
405  if( m_Params.GetCDNAStrand() == 2 ){
406  splign.SetStrand( false );
407 
408  if( hitrefs.size() ){
409  splign.Run( &hitrefs );
410  }
411 
412  CSplignFormatter sf( splign );
413  sf.SetSeqIds( transcript_id, genomic_id );
414  CRef<CSeq_align_set> seqaln_set_neg(
416  );
417 
418  if( !seqaln_set_neg->IsEmpty() ){
419  seqaln_set->Set().merge( seqaln_set_neg->Set() );
420  }
421  }
422 
423  typedef CSeq_annot::TData::TAlign TAlign;
424 
425  if( !seqaln_set->Get().empty() ){
426  // pack the alignment in a Seq-annot and label it appropriately
427  CRef<CSeq_annot> annot(new CSeq_annot());
428  TAlign& seqannot_align = annot->SetData().SetAlign();
429  ITERATE( CSeq_align_set::Tdata, align, seqaln_set->Get() ){
430  seqannot_align.push_back(*align);
431  }
432 
433  // prepare a title
434  string str;
435  CLabel::GetLabel(seqloc_genomic, &str, CLabel::eDefault, &scope);
436  str += " x ";
437  CLabel::GetLabel(seqloc_cdna, &str, CLabel::eDefault, &scope);
438  str += ": SPLIGN alignment";
439  annot->SetNameDesc(str);
440 
441  annot->SetCreateDate(CurrentTime());
442 
443  /// now create a Project Item for the data
444  CRef<CProjectItem> item(new CProjectItem());
445  item->SetItem().SetAnnot(*annot);
446 
447  // TODO we need to generate title properly
448  item->SetLabel(str);
449  AddProjectItem(*item);
450  }
451  else {
452  // no alignments - save cDNA label for info message
453  string label;
454  CLabel::GetLabel(seqloc_cdna, &label, CLabel::eDefault, &scope);
455  not_found_labels.push_back(label);
456  }
457  }
458  }
459 }
460 
461 
Declares the CBl2Seq (BLAST 2 Sequences) class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
CAlgoToolManagerBase This is base class for simple algorithmic tool managers.
virtual void InitUI()
override this function in a derived class and initialize extra members
string m_RegPath
registry path to the settings
wxWindow * m_ParentWindow
a window that will serve as a parent for our panels
virtual void CleanUI()
override this function in a derived class and clean extra members
void x_ConvertInputObjects(const CTypeInfo *typeInfo, map< string, TConstScopedObjects > &results)
CAlgoToolManagerParamsPanel.
CBioseq_Handle –.
Runs the BLAST algorithm between 2 sequences.
Definition: bl2seq.hpp:58
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void AddProjectItem(objects::CProjectItem &item)
CDataLoadingAppTask - a task that executes CDataLoadingAppJob.
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
CProjectService - a service providing API for operations with Workspaces and Projects.
void GetObjProjects(TConstScopedObjects &objects, vector< TProjectId > &ids)
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CProjectSelectOptions - describes how new Project Items shall be added to a workspace.
void Set_CreateNewProject(const string &folder=kEmptyStr)
void Set_AddToExistingProject(TProjectId &project_id, const string &folder=kEmptyStr)
bool IsEmpty() const
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetCreateDate(const CTime &dt)
Definition: Seq_annot.cpp:121
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
CRef< objects::CSeq_align_set > AsSeqAlignSet(const CSplign::TResults *results=0, int flags=eAF_SplicedSegWithParts) const
Format alignment as a seq-align-set.
void SetSeqIds(CConstRef< objects::CSeq_id > id1, CConstRef< objects::CSeq_id > id2)
CSplignJob.
static Boolean s_BlastInterruptCallback(SBlastProgress *prog)
TConstScopedObjects m_cDNA
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
CSplignJob(const CSplignParams &params, const TConstScopedObjects &genomic, const TConstScopedObjects &cdna)
CSplignJob.
CSplignParams m_Params
static bool s_SplignInterruptCallback(CNWAligner::SProgressInfo *prog)
TConstScopedObjects m_Genomic
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
Definition: splign.hpp:74
void SetPolyaDetection(bool on)
Definition: splign.cpp:374
void Run(THitRefs *hitrefs)
Definition: splign.cpp:1162
void SetCompartmentPenalty(double penalty)
Definition: splign.cpp:588
void SetMinSingletonIdentity(double idty)
Definition: splign.cpp:432
CRef< objects::CScope > & SetScope(void)
Definition: splign.cpp:576
void SetMaxIntron(size_t max_intron)
Definition: splign.cpp:465
void SetMinExonIdentity(double idty)
Definition: splign.cpp:390
void SetStrand(bool strand)
Definition: splign.cpp:382
void PreserveScope(bool preserve=true)
Controls whether to clean the scope object's cache on a new sequence.
Definition: splign.cpp:582
vector< THitRef > THitRefs
Definition: splign.hpp:295
void SetMaxGenomicExtent(size_t mge)
Definition: splign.cpp:453
CRef< TAligner > & SetAligner(void)
Access the spliced aligner core object.
Definition: splign.cpp:213
void SetMinCompartmentIdentity(double idty)
Definition: splign.cpp:422
static CRef< CSplicedAligner > s_CreateDefaultAligner(void)
Definition: splign.cpp:235
void SetEndGapDetection(bool on)
Definition: splign.cpp:366
IAppTask.
Definition: app_task.hpp:83
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
static const char * str(char *buf, int n)
Definition: stats.c:84
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, void *user_data=NULL)
Set a function callback to be invoked by the CORE of BLAST to allow interrupting a BLAST search in pr...
Definition: bl2seq.hpp:302
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
Definition: bl2seq.cpp:173
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service.
Definition: service.hpp:91
bool Create(wxWindow *parent, wxWindowID id=ID_CSPLIGNPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxCLIP_CHILDREN|wxTAB_TRAVERSAL, bool visibale=true)
virtual void LoadSettings()
CSplignTool()
CSplignTool.
Definition: splign_tool.cpp:62
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
Definition: splign_tool.cpp:73
TObjects m_NAObjects
Definition: splign_tool.hpp:88
virtual void CleanUI()
override this function in a derived class and clean extra members
Definition: splign_tool.cpp:91
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
virtual void x_SelectCompatibleInputObjects()
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
CSplignParams m_Params
Definition: splign_tool.hpp:89
CSplignPanel * m_Panel
Definition: splign_tool.hpp:90
virtual IAppTask * QuickLaunch()
double GetCompPenalty() const
long GetMaxGenomicExtent() const
CSplignParams & GetData()
TConstScopedObjects GetCDNASeqs() const
bool GetPolyADetect() const
TObjects m_GenomicObjects
Definition: splign_tool.hpp:87
double GetMinExonIdentity() const
virtual wxPanel * GetMaxPanel()
Return the panel that occupies max size on display, to calculate dialog window size.
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
int GetDiscType() const
bool GetEndGapDetect() const
virtual void SetRegistryPath(const string &path)
CAlgoToolManagerParamsPanel.
long GetMaxIntron() const
SConstScopedObject GetGenomicSeq() const
virtual void InitUI()
override this function in a derived class and initialize extra members
Definition: splign_tool.cpp:84
double GetMinCompIdentity() const
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
Definition: splign_tool.cpp:97
void SetData(const CSplignParams &data)
void SetObjects(map< string, TConstScopedObjects > *genomic, map< string, TConstScopedObjects > *na)
int GetCDNAStrand() const
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
Definition: splign_tool.cpp:79
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
string m_Descr
mutex to sync our internals
virtual bool IsCanceled() const override
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eDefault
Definition: label.hpp:73
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
Definition: ncbiobj.hpp:846
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CTime CurrentTime(CTime::ETimeZone tz=CTime::eLocal, CTime::ETimeZonePrecision tzp=CTime::eTZPrecisionDefault)
Definition: ncbitime.hpp:2185
static const char label[]
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeq_align > > TAlign
Definition: Seq_annot_.hpp:194
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
static char * prog
Definition: mdb_load.c:33
#define wxT(x)
Definition: muParser.cpp:41
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
T max(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
static static static wxID_ANY
#define SYMBOL_CSPLIGNPANEL_STYLE
USING_SCOPE(objects)
Progress monitoring structure.
Definition: blast_def.h:341
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
Modified on Wed Apr 17 13:10:54 2024 by modify_doxy.py rev. 669887