NCBI C++ ToolKit
alnmulti_ds_builder.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alnmulti_ds_builder.cpp 44958 2020-04-28 18:04:11Z shkeda $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
37 
40 
41 #include <gui/objutils/utils.hpp>
45 
46 #include <corelib/ncbitime.hpp>
47 #include <objmgr/align_ci.hpp>
48 #include <objmgr/bioseq_handle.hpp>
49 
52 
53 
56 
57 
59  : m_CreateSparse( true )
60  , m_SyncCreate( false )
61  , m_Listener(0)
62 {
63 }
64 
65 
67 {
68  x_Clear();
69 }
70 
71 
72 void CAlnMultiDSBuilder::Init(objects::CScope& scope, const objects::CSeq_align& align)
73 {
74  x_Clear();
75 
76  m_Scope.Reset(&scope);
77  m_OrigAligns.push_back(CConstRef<CSeq_align>(&align));
78 }
79 
80 
81 void CAlnMultiDSBuilder::Init(objects::CScope& scope, const objects::CSeq_annot& annot)
82 {
83  x_Clear();
84 
85  m_Scope.Reset(&scope);
87 }
88 
89 
90 void CAlnMultiDSBuilder::Init(objects::CScope& scope, const objects::CBioseq& bioseq)
91 {
92  x_Clear();
93 
94  m_Scope.Reset(&scope);
96 }
97 
98 void CAlnMultiDSBuilder::Init(objects::CScope& scope, const objects::CSeq_entry& seq_entry)
99 {
100  x_Clear();
101 
102  m_Scope.Reset(&scope);
103  ExtractSeqAligns(seq_entry, m_OrigAligns);
104 
106  while(it) {
107  m_Scope->AddBioseq(*it);
108  ++it;
109  }
110 }
111 
112 void CAlnMultiDSBuilder::Init(objects::CScope& scope, const objects::CBioseq_Handle& handle)
113 {
114  x_Clear();
115 
116  m_Scope.Reset(&scope);
117 
119  CAlign_CI it(handle, sel);
120  int i = 0;
121  for ( ; it; ++it) {
122  m_OrigAligns.push_back(CConstRef<CSeq_align>(&*it));
123  if(i++ > 20)
124  return;
125  }
126 }
127 
128 
129 void CAlnMultiDSBuilder::Init(objects::CScope& scope, TAlignVector& aligns)
130 {
131  x_Clear();
132 
133  m_Scope.Reset(&scope);
134  m_OrigAligns = aligns;
135 }
136 
137 
138 /// initial data set from which an alignment will be build
139 void CAlnMultiDSBuilder::Init(CScope& scope, TAnnotVector& annots)
140 {
141  x_Clear();
142 
143  m_Scope.Reset(&scope);
144 
145  ITERATE(TAnnotVector, it_annot, annots) {
146  const CSeq_annot& annot = **it_annot;
147  if(annot.GetData().IsAlign() ) {
149  }
150  }
151 }
152 
153 
155 {
156  m_Scope.Reset();
157  m_OrigAligns.clear();
158  m_AlnStats.Reset();
159  m_AnchoredAlns.clear();
160  m_MasterId.Reset();
161  m_CreateSparse = true;
162 }
163 
164 
166 {
167  m_CreateSparse = sparse;
168  if(m_CreateSparse) {
170  }
171 }
172 
173 
176 {
177  _TRACE( (unsigned long)m_OrigAligns.size() << " m_OrigAligns");
178 
180 
181  if(m_CreateSparse) {
182  return x_CreateSparseDataSource();
183  } else {
184  return x_CreateAlnVecDataSource();
185  }
186 }
187 
188 
189 void CAlnMultiDSBuilder::GetBioseqHandles(vector<CBioseq_Handle>& handles)
190 {
191  if(m_AlnStats) {
192  ITERATE (TAlnStats::TIdVec, it, m_AlnStats->GetIdVec()) {
193  const CSeq_id& sid = (*it)->GetSeqId();
194  const CBioseq_Handle h = m_Scope->GetBioseqHandle(sid);
195  handles.push_back(h);
196  }
197  }
198 }
199 
200 
202 {
203  m_AlnStats.Reset();
204  m_AnchoredAlns.clear();
205 
206  if(! m_OrigAligns.empty()) {
207  TIdExtract extractor;
208  TAlnIdMap aln_id_map(extractor, m_OrigAligns.size());
210  aln_id_map.push_back(**it);
211  }
212 
213  /// Crete align statistics object
214  m_AlnStats.Reset(new TAlnStats(aln_id_map));
215  //TAlnStats aln_stats(aln_vector, ,
216  // m_SeqIdAlnBitmap->GetAnchorRows(),
217  // m_SeqIdAlnBitmap->GetBaseWidths());
218 
219  /// Construct a vector of anchored alignments
221 
222  //TODO use aln_stats to init m_Options
223  }
224 }
225 
226 
229 {
230  CStopWatch sw;
231  sw.Start();
232 
234  if(m_MasterId.GetPointer() && ! m_OrigAligns.empty()) {
235  _TRACE("Creating CSparseMultiDataSource");
237  if (m_Listener) sp_ds->SetListener(m_Listener);
238  ds.Reset(sp_ds);
240  }
241  _TRACE( 1000 * sw.Elapsed() << " ms" );
242  return ds;
243 }
244 
245 
248 {
250  if (m_Listener) ds->SetListener(m_Listener);
253 }
254 
255 
256 /// Analyzes m_OrigAligns and decides how to build an alignment from it
258 {
259  TAlignVector good_aligns;
260  x_GetLinearAlignments(good_aligns);
261 
262  // build alignment map
264  typedef map<CSeq_id_Handle, TAlignSet> TIDToAligns;
265  TIDToAligns align_map;
266 
267  ITERATE(TAlignVector, it_al, good_aligns) {
268  const CSeq_align& al = **it_al;
269 
270  for ( CTypeConstIterator<CSeq_id> it_id(al); it_id; ++it_id) {
272 
273  TIDToAligns::const_iterator it = align_map.find(idh);
274  if(it == align_map.end()) {
275  it = align_map.insert(TIDToAligns::value_type(idh, TAlignSet())).first;
276  }
277  TAlignSet& aln_set = const_cast<TAlignSet&>(it->second);
278  aln_set.insert(&al);
279  }
280  }
281 
282  /// select the ID that exist in max number of alignments
283  size_t max_al = 0; /// max number of alignments for a sinegle ID
284  CSeq_id_Handle max_h;
285  ITERATE(TIDToAligns, it_map, align_map) {
286  const TAlignSet& aln_set = it_map->second;
287  if(aln_set.size() > max_al) {
288  max_al = aln_set.size();
289  max_h = it_map->first;
290  }
291  }
292 
293  if(max_h) {
294  m_MasterId = max_h.GetSeqId();
295  }
296  string s_id = m_MasterId ? m_MasterId->GetSeqIdString() : "NULL";
297  _TRACE("Master ID " << s_id);
298 }
299 
300 
301 // selects alignments that have the same length on all sequences
303 {
304  typedef CSeq_align::C_Segs TSegs;
305  aligns.reserve(m_OrigAligns.size());
306 
307  // test every CSeq-aling
309  const CSeq_align& align = **it;
310  const TSegs& segs = align.GetSegs();
311  bool linear = true;
312 
313  switch(segs.Which()) {
314  case TSegs::e_Denseg:
315  case TSegs::e_Dendiag:
316  break;
317  case TSegs::e_Std: {
318  ITERATE(TSegs::TStd, it_s, segs.GetStd()) {
319  const CStd_seg& std_seg = **it_s;
320  if(! x_IsLinear(std_seg)) {
321  linear = false;
322  break;
323  }
324  }
325  break;
326  }
327  case TSegs::e_Sparse:
328  case TSegs::e_Spliced:
329  break;
330  default:
331  linear = false; // other types currently not supported
332  break;
333  }
334  if(linear) {
335  aligns.push_back(*it);
336  }
337  }
338 }
339 
340 /// returns true if CStd_seg is linear
342 {
343  return false;
344 }
345 
346 
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
USING_SCOPE(ncbi::objects)
CAlign_CI –.
Definition: align_ci.hpp:63
Container mapping seq-aligns to vectors of participating seq-ids.
Definition: aln_tests.hpp:56
void push_back(const CSeq_align &aln)
Adding an alignment.
Definition: aln_tests.hpp:87
CConstRef< objects::CSeq_id > m_MasterId
CDataChangeNotifier::IListener * m_Listener
Try to set anchor automatically.
void GetBioseqHandles(vector< CBioseq_Handle > &handles)
bool m_CreateSparse
control alignment building
TAnchoredAlnVector m_AnchoredAlns
bool x_IsLinear(const objects::CStd_seg &seg)
returns true if CStd_seg is linear
void x_GetLinearAlignments(TAlignVector &aligns)
vector< CConstRef< objects::CSeq_align > > TAlignVector
void Init(objects::CScope &scope, const objects::CSeq_align &align)
initial data set from which an alignment will be build
CAlnStats< TAlnIdMap > TAlnStats
CRef< objects::CScope > m_Scope
CRef< IAlnMultiDataSource > x_CreateSparseDataSource()
vector< CConstRef< objects::CSeq_annot > > TAnnotVector
CRef< IAlnMultiDataSource > CreateDataSource()
CRef< IAlnMultiDataSource > x_CreateAlnVecDataSource()
CRef< TAlnStats > m_AlnStats
void x_TestAlignments()
Analyzes m_OrigAligns and decides how to build an alignment from it.
void PreCreateDataSource(bool sparse)
IAlnSeqId extracting functor.
_TAlnIdVec::TIdVec TIdVec
Vector of ids used in all alignments.
Definition: aln_stats.hpp:70
CAlnVecMultiDataSource - implementation of IAlnMultiDataSource for CAlnVec-based alignments.
CBioseq_Handle –.
virtual void SetListener(IListener *pListener)
Subscribe a new listener.
CScope –.
Definition: scope.hpp:92
virtual void Init(CSparseAln &align)
CStopWatch –.
Definition: ncbitime.hpp:1937
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
size_type size() const
Definition: set.hpp:132
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define _TRACE(message)
Definition: ncbidbg.hpp:122
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2775
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2764
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsAlign(void) const
Check if variant Align is selected.
Definition: Seq_annot_.hpp:635
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
static CStopWatch sw
int i
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
Defines: CTimeFormat - storage class for time format.
set< CGeneModel, SAlignOrder > TAlignSet
Definition: score.cpp:195
void ExtractSeqAligns(const T &obj, vector< CConstRef< objects::CSeq_align > > &aligns)
This is the place where elements of the new Alignment Library will be collected.
SAnnotSelector –.
Modified on Sat May 25 14:22:07 2024 by modify_doxy.py rev. 669887