32 #include <ncbi_pch.hpp>
36 #include <corelib/ncbitime.hpp>
46 // Conversion function CSparse_align -> SAlignedSeq
47 SAlignedSeq* CreateAlignRow(const CSparse_align& align, bool master_first)
48 {
49  unique_ptr<SAlignedSeq> aln_seq(new SAlignedSeq());
50  aln_seq->m_SeqId.Reset(master_first ? &align.GetSecond_id()
51  : &align.GetFirst_id());
52  SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange;
54  // get references to the containers inside CSparse_align
55  const CSparse_align::TFirst_starts& starts_1 = align.GetFirst_starts();
56  const CSparse_align::TSecond_starts& starts_2 = align.GetSecond_starts();
57  const CSparse_align::TLens& lens = align.GetLens();
58  const CSparse_align::TSecond_strands* strands =
59  align.IsSetSecond_strands() ? &align.GetSecond_strands() : 0;
61  // create a new Align Collection
63  range.SetFrom(0).SetLength(0);
64  SAlignedSeq::TPos aln_from = -1, from = -1;
66  // iterate on Sparse-seg elements
67  typedef CSparse_align::TNumseg TNumseg;
68  for( TNumseg i = 0; i < align.GetNumseg(); i++ ) {
69  aln_from = master_first ? starts_1[i] : starts_2[i];
70  from = master_first ? starts_2[i] : starts_1[i];
71  SAlignedSeq::TPos len = lens[i];
72  bool dir = strands ? ((*strands)[i] == eNa_strand_plus) : true;
74  // update range
75  if(coll->empty()) {
76  range.SetFrom(aln_from);
77  range.SetLength(len);
78  } else {
79  range.SetFrom(min(range.GetFrom(), aln_from));
80  range.SetToOpen(max(range.GetToOpen(), aln_from + len));
81  }
83  coll->insert(SAlignedSeq::TAlignRange(aln_from, from, len, dir));
84  }
85  aln_seq->m_AlignColl = coll;
87  int dir = (coll->GetFlags() & SAlignedSeq::TAlignColl::fMixedDir);
89  // incorrect - do not return anything
90  return NULL;
91  } else if(dir == SAlignedSeq::TAlignColl::fReversed) {
92  aln_seq->m_NegativeStrand = true;
93  }
94  return aln_seq.release();
95 }
97 /// Converter
98 bool ConvertToPairwise(const CSeq_align& align,
99  const CSeq_id& master_id,
100  vector<SAlignedSeq*>& aln_seqs)
101 {
102  typedef CSeq_align::TSegs TSegs;
103  const TSegs& segs = align.GetSegs();
105  switch(segs.Which()) {
106  case TSegs::e_Denseg: {
107  const CDense_seg& dense_seg = segs.GetDenseg();
108  // find the row corresponding to master_id
109  const CDense_seg::TIds& ids = dense_seg.GetIds();
110  for( CDense_seg::TDim row = 0; row < dense_seg.GetDim(); row++ ) {
111  if(ids[row]->Equals(master_id)) {
112  return ConvertToPairwise(dense_seg, row, aln_seqs);
113  }
114  }
115  return false;
116  }
117  /// add code to support other types of Segs
118  default:
119  return false;
120  }
121 }
124 /// Converter
126  const CSeq_id& master_id,
127  vector<SAlignedSeq*>& aln_seqs)
128 {
129  CStopWatch timer;
130  timer.Start();
132  bool ok = false;
133  for( size_t i = 0; i < aligns.size(); i++ ) {
134  const CSeq_align& align = *aligns[i];
135  bool res = ConvertToPairwise(align, master_id, aln_seqs);
136  ok |= res;
137  }
138  LOG_POST("ConvertToPairwise( vector of CSeq_align) " << 1000 * timer.Elapsed() << " ms");
139  return ok;
140 }
143 /// Converter CSparse_seg -> SAlignedSeq-s
144 bool ConvertToPairwise(const CSparse_seg& sparse_seg, vector<SAlignedSeq*>& aln_seqs)
145 {
146  CConstRef<objects::CSeq_id> master_id(&sparse_seg.GetMaster_id());
148  typedef CSparse_seg::TRows TRows;
149  const TRows& rows = sparse_seg.GetRows();
150  TRows::const_iterator it = rows.begin();
152  // convert pairwise alignment to TAlignColl objects
153  for( ; it != rows.end(); ++it ) {
154  const CSparse_align& align = **it;
156  int master_index = -1;
157  if(master_id->Compare(align.GetFirst_id()) == CSeq_id::e_YES) {
158  master_index = 0;
159  } else if(master_id->Compare(align.GetSecond_id()) == CSeq_id::e_YES) {
160  master_index = 1;
161  }
163  if(master_index != -1) { // create an alignment row from this CSparse_align
164  SAlignedSeq* aln_seq = CreateAlignRow(align, master_index == 0);
165  if(aln_seq) {
166  aln_seqs.push_back(aln_seq);
167  }
168  } else {
169  LOG_POST(Error << "CreateAlignRow() - a CSparse_align is"
170  << "invalid, neither of its CSeq_ids match master id");
171  }
172  }
173  return true; // handle errors
174 }
177 bool ConvertToPairwise(const CDense_seg& dense_seg,
178  CDense_seg::TDim anchor_row,
179  vector<SAlignedSeq*>& aln_seqs)
180 {
181  typedef CDense_seg::TDim TDim;
183  for(TDim row = 0; row < dense_seg.GetDim(); row++ ) {
184  if(row != anchor_row) {
185  SAlignedSeq* aln_seq = CreateAlignRow(dense_seg, anchor_row, row);
186  if(aln_seq) {
187  aln_seqs.push_back(aln_seq);
188  }
189  }
190  }
191  return true;
192 }
195 /// Builder function
197  vector<SAlignedSeq*>& aln_seqs,
198  objects::CScope& scope)
199 {
200  if(! aln_seqs.empty()) {
201  CSparseAlignment* aln = new CSparseAlignment();
202  aln->Init(master_id, aln_seqs, scope);
203  return aln;
204  }
205  return NULL;
206 }
209 /// Builder function
211  vector<SAlignedSeq*>& aln_seqs,
212  objects::CScope& scope)
213 {
214  if(! aln_seqs.empty()) {
215  }
216  return NULL;
217 }
220 /// Converter Helper function
221 /// Creates an Align Collection from the two rows of a CDense_seg
223  CDense_seg::TDim row_1,
224  CDense_seg::TDim row_2)
225 {
226  _ASSERT(row_1 >=0 && row_1 < dense_seg.GetDim());
227  _ASSERT(row_2 >=0 && row_2 < dense_seg.GetDim());
229  unique_ptr<SAlignedSeq> aln_seq(new SAlignedSeq());
230  aln_seq->m_SeqId.Reset(dense_seg.GetIds()[row_2]);
231  SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange;
233  aln_seq->m_AlignColl = new SAlignedSeq::TAlignColl();
234  SAlignedSeq::TAlignColl& coll = *aln_seq->m_AlignColl;
236  typedef CDense_seg::TDim TDim;
237  typedef CDense_seg::TNumseg TNum;
239  const CDense_seg::TStarts& starts = dense_seg.GetStarts();
240  const CDense_seg::TLens& lens = dense_seg.GetLens();
241  const CDense_seg::TStrands* strands =
242  dense_seg.IsSetStrands() ? &dense_seg.GetStrands() : NULL;
244  // iterate by segements and add aligned segments to the collection
245  TDim n_rows = dense_seg.GetDim();
246  TNum n_seg = dense_seg.GetNumseg();
247  for( TNum i = 0; i < n_seg; i++ ) {
248  int offset = i * n_rows;
249  int from_1 = starts[row_1 + offset];
250  int from_2 = starts[row_2 + offset];
252  if(from_1 != -1 && from_2 != -1) { // not a gap
253  int len = lens[i];
254  bool direct = true;
255  if(strands) {
256  bool minus_1 = (*strands)[row_1 + offset] == eNa_strand_minus;
257  bool minus_2 = (*strands)[row_2 + offset] == eNa_strand_minus;
258  direct = (! minus_1 && ! minus_2) || (minus_1 == minus_2);
259  }
260  coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct));
262  // update range
263  if(coll.empty()) {
264  range.SetFrom(from_1);
265  range.SetLength(len);
266  } else {
267  range.SetFrom(min(range.GetFrom(), from_1));
268  range.SetToOpen(max(range.GetToOpen(), from_1 + len));
269  }
270  }
271  }
272  //LOG_POST("GetAlignColl() rows [" << row_1 << ", " << row_2 << "]" << ", segments " << coll.size());
275  return aln_seq.release();
276 }
279 /// Creates Align Collection from a CSparse_seg
280 void GetAlignColl(const CSparse_align& sparse_align,
281  const CSeq_id& master_id,
283 {
284  coll.clear();
286  int index = -1;
287  if(master_id.Compare(sparse_align.GetFirst_id()) == CSeq_id::e_YES) {
288  index = 0;
289  } else if(master_id.Compare(sparse_align.GetSecond_id()) == CSeq_id::e_YES) {
290  index = 1;
291  }
292  if(index != -1) {
293  bool first = (index == 0);
294  const CSparse_align::TFirst_starts& starts_1 = sparse_align.GetFirst_starts();
295  const CSparse_align::TFirst_starts& starts_2 = sparse_align.GetSecond_starts();
296  const CSparse_align::TLens& lens = sparse_align.GetLens();
297  const CSparse_align::TSecond_strands* strands =
298  sparse_align.IsSetSecond_strands() ? &sparse_align.GetSecond_strands() : 0;
300  typedef CSparse_align::TNumseg TNumseg;
301  TNumseg n_seg = sparse_align.GetNumseg();
302  for( TNumseg i = 0; i < n_seg; i++ ) {
303  int from_1 = first ? starts_1[i] : starts_2[i];
304  int from_2 = first ? starts_2[i] : starts_1[i];
305  int len = lens[i];
306  bool direct = strands && ((*strands)[i] == eNa_strand_minus);
308  coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct));
309  }
310  }
311 }
314 /// Reverse Converter
315 /// Converts Align Collection into a CSparse_align
317  const CSeq_id& id_2,
318  const SAlignTools::TAlignColl& coll)
319 {
320  CRef<CSparse_align> align(new CSparse_align());
322  CRef<CSeq_id> rid_1(new CSeq_id());
323  rid_1->Assign(id_1);
324  align->SetFirst_id(*rid_1);
326  CRef<CSeq_id> rid_2(new CSeq_id());
327  rid_2->Assign(id_2);
328  align->SetSecond_id(*rid_2);
330  // initilize containers
331  typedef CSparse_align::TNumseg TNumseg;
332  TNumseg n_seg = (TNumseg)coll.size();
333  align->SetNumseg(n_seg);
335  CSparse_align::TFirst_starts& starts_1 = align->SetFirst_starts();
336  starts_1.resize(n_seg);
337  CSparse_align::TFirst_starts& starts_2 = align->SetSecond_starts();
338  starts_2.resize(n_seg);
339  CSparse_align::TLens& lens = align->SetLens();
340  lens.resize(n_seg);
344  // there are reversed segments in the collection - need to fill "Strands"
345  strands = &align->SetSecond_strands();
346  strands->resize(n_seg);
347  }
349  // move data to the containers
350  TNumseg i = 0;
351  ITERATE(SAlignTools::TAlignColl, it, coll) {
352  const SAlignTools::TAlignRange& r = *it;
354  starts_1[i] = r.GetFirstFrom();
355  starts_2[i] = r.GetSecondFrom();
356  lens[i] = r.GetLength();
357  if(strands) {
358  (*strands)[i] = r.IsDirect() ? eNa_strand_plus : eNa_strand_minus;
359  }
360  i++;
361  }
363  return align;
364 }
