NCBI C++ ToolKit
cuBlock.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuBlock.cpp 87319 2019-08-19 16:00:07Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  */
24 
25 #include <ncbi_pch.hpp>
30 
32 BEGIN_SCOPE(cd_utils)
33 
34 //Block //////////////////////////////////
35 
36 Block::Block (int start, int len, int id)
37  :m_len(len), m_start(start), m_id(id)
38 {
39 }
40 
41 Block::Block (int start, int len)
42  :m_len(len), m_start(start), m_id(-1)
43 {
44 }
45 
47  :m_len(-1), m_start(-1), m_id(-1)
48 {}
49 
50 Block::Block(const Block& rhs)
51 :m_len(rhs.m_len), m_start(rhs.m_start), m_id(rhs.m_id)
52 {
53 }
54 
55 const Block& Block::operator=(const Block& rhs)
56 {
57  m_start = rhs.m_start;
58  m_len = rhs.m_len;
59  m_id = rhs.m_id;
60  return *this;
61 }
62 
63 bool Block::operator==(const Block& rhs) const
64 {
65  return (m_start == rhs.m_start) && (m_len == rhs.m_len);
66 }
67 
68 bool Block::operator!=(const Block& rhs) const
69 {
70  return !((*this) == rhs);
71 }
72 
73 bool Block::contain(const Block& rhs) const
74 {
75  return (m_start <= rhs.getStart()) && (getEnd() >= rhs.getEnd());
76 }
77 
78 bool Block::isIntersecting(const Block& rhs)const
79 {
80  int x0 = m_start;
81  int x1 = getEnd();
82  int y0 = rhs.m_start;
83  int y1 = rhs.getEnd();
84  for (int y = y0; y <= y1; y++)
85  {
86  if ( y >= x0 && y <= x1)
87  return true;
88  }
89  return false;
90 }
91 
92 //shrink rhs to the intersection of rhs with this
93 bool Block::intersect(Block& rhs)const
94 {
95  int x0 = m_start;
96  int x1 = getEnd();
97  int y0 = rhs.m_start;
98  int y1 = rhs.getEnd();
99  int yStart = -1;
100  //int yEnd = -1;
101  bool intersected = false;
102  int y = y0;
103  for (; y <= y1; y++)
104  {
105  if (!intersected)
106  {
107  if ( y >= x0 && y <= x1)
108  {
109  intersected = true;
110  yStart = y;
111  }
112  }
113  else
114  {
115  if ( y < x0 || y > x1)
116  {
117  //yEnd = y - 1;
118  break;
119  }
120  }
121  }
122  if (intersected)
123  {
124  rhs.m_start = yStart;
125  rhs.setEnd(y-1);
126  }
127  return intersected;
128 }
129 
130 //return this + deltaBlock
131 //Block Block::applyDelta(const DeltaBlock& delta) const
133 {
134  return Block(m_start + delta.deltaStart, m_len + delta.deltaLen, delta.subjectBlockID);
135 }
136 
137 /* DeltaBlock:
138  int subjectBlockID;
139  int objectBlockID;
140  int deltaStart;
141  int deltaLen;
142 */
143 //return this-object
144 //DeltaBlock Block::getDelta (const Block& object) const
145 DeltaBlock Block::operator-(const Block& object)const
146 {
147  DeltaBlock delta = {m_id, object.m_id, m_start - object.m_start, m_len - object.m_len};
148  return delta;
149 }
150 
151 Block Block::extend (int nExt, int cExt) const
152 {
153  int start = m_start + nExt;
154  int len = m_len + cExt - nExt;
155  return Block(start, len, m_id);
156 }
157 
158 void Block::extendSelf (int nExt, int cExt)
159 {
160  m_start = m_start + nExt;
161  m_len = m_len + cExt - nExt;
162 }
163 
164 void Block::addOffset(int nExt)
165 {
166  m_start = m_start + nExt;
167 }
168 
170 {
171  if (blocks.size() ==0)
172  return false;
173  SortedBlocks::const_iterator sit = blocks.begin();
174  comBlock.m_id = sit->m_id;
175  comBlock.m_start = sit->m_start;
176  comBlock.m_len = sit->m_len;
177  sit++;
178  for (; sit != blocks.end(); ++sit)
179  {
180  if( (comBlock.getEnd() + 1) != sit->m_start)
181  //there is a gap between two adjacent blocks, can't join them
182  return false;
183  else
184  comBlock.m_len += sit->m_len;
185  }
186  return true;
187 }
188 
189 //BlockModel-------------------------------------------------
190 
192  : m_blocks(), m_seqId()
193 {
194 }
195 
196 BlockModel::BlockModel(const CRef< CSeq_align> seqAlign, bool forSlave)
197  : m_blocks(), m_seqId()
198 {
199  GetSeqID(seqAlign, m_seqId, forSlave);
200  vector<int> lens, starts;
201  GetBlockLengths(seqAlign, lens);
202  GetBlockStarts(seqAlign, starts, !forSlave);
203  assert(lens.size() == starts.size());
204  for (unsigned int i = 0; i < lens.size(); i++)
205  {
206  m_blocks.push_back(Block(starts[i], lens[i], i));
207  }
208 }
209 
210 BlockModel::BlockModel(CRef< CSeq_id > seqId, bool withOneBlock)
211  : m_seqId(seqId)
212 {
213  if (withOneBlock)
214  {
215  Block block(0,1,0);
216  m_blocks.push_back(block);
217  }
218 }
219 
221  : m_blocks(rhs.m_blocks), m_seqId(rhs.m_seqId)
222 {
223 }
224 
226 {
227  block.setId(m_blocks.size());
228  m_blocks.push_back(block);
229 }
230 
232 {
233  m_seqId = rhs.m_seqId;
234  m_blocks.assign(rhs.m_blocks.begin(), rhs.m_blocks.end());
235  return *this;
236 }
237 
238 bool BlockModel::isAlike(const BlockModel& rhs) const
239 {
240  if( SeqIdsMatch(m_seqId, rhs.m_seqId) &&( m_blocks.size() == rhs.m_blocks.size()) )
241  return true;
242  else
243  return false;
244 }
245 
246 
247 bool BlockModel::operator==(const BlockModel& rhs) const
248 {
249  if (!isAlike(rhs))
250  return false;
251  for ( unsigned int i = 0; i < m_blocks.size(); i++)
252  {
253  if (m_blocks[i] != rhs.m_blocks[i])
254  return false;
255  }
256  return true;
257 }
258 
259 bool BlockModel::blockMatch(const BlockModel& rhs) const
260 {
261  if (m_blocks.size() != rhs.m_blocks.size())
262  return false;
263  for ( unsigned int i = 0; i < m_blocks.size(); i++)
264  {
265  if (m_blocks[i].getLen() != rhs.m_blocks[i].getLen())
266  return false;
267  }
268  return true;
269 }
270 
271 bool BlockModel::completeModelExtendsIntoUnallowedGappedRegion(const BlockModel& completeModel, int sequenceLength, const vector<int>* commonBlockExt) const
272 {
273  unsigned int nBlocks = m_blocks.size(), commonBlockExtSize = (commonBlockExt) ? commonBlockExt->size() : 0;
274  unsigned int i, j, k;
275  int seqLen = -1;
276  int thisStart, thisLen, thisContiguousLen;
277  int commonNTermBlockZeroExt, nTermShift;
278  int completeBlockStart, completeBlockLen;
279  int prevCTermBlockExt, allowedCTermBlockExt, lastCTermBlockExt;
280  int blockNumberOnThisOfCompleteBlockStart;
281  vector<int> commonCTermBlockExt, gapSizeAfterBlockInThis;
282  bool useInputBlockExtensions = (commonBlockExt && commonBlockExtSize == nBlocks + 1);
283  bool completeExtendsIntoGappedRegionInThis = false;
284  BlockModel thisCopy(*this); // so can use non-const methods
285  BlockModel completeCopy(completeModel); // so can use non-const methods
286 
287 // string thisRowBlockStr = toString();
288 // string completeModelStr = completeModel.toString();
289 
290  commonNTermBlockZeroExt = (useInputBlockExtensions) ? (*commonBlockExt)[0] : 0;
291  for (i = 0; i < nBlocks; ++i)
292  {
293  if (i == nBlocks - 1) {
294  seqLen = sequenceLength;
295  }
296  gapSizeAfterBlockInThis.push_back(getGapToCTerminal(i, seqLen));
297  commonCTermBlockExt.push_back((useInputBlockExtensions) ? (*commonBlockExt)[i+1] : 0);
298  }
299 
300  // Make sure that there are enough residues for the complete model to
301  // not introduce a shift into the mapped row. Require that each complete block
302  // length not extend into more gaps than are common to all rows in the original block model.
303  prevCTermBlockExt = 0;
304  for (j = 0; j < completeCopy.getBlocks().size() && !completeExtendsIntoGappedRegionInThis; ++j) {
305  completeBlockStart = completeCopy.getBlock(j).getStart();
306  completeBlockLen = completeCopy.getBlock(j).getLen();
307  blockNumberOnThisOfCompleteBlockStart = thisCopy.getBlockNumber(completeBlockStart);
308 
309  // If the completeBlock starts on an unaligned residue of this, getBlockNumber returns -1.
310  // In that case, find first aligned residue in the complete block and how many residues into
311  // N-terminal gap are needed for the mapping.
312  nTermShift = 0;
313  while (blockNumberOnThisOfCompleteBlockStart < 0 && nTermShift+1 < completeBlockLen) {
314  ++nTermShift;
315  blockNumberOnThisOfCompleteBlockStart = thisCopy.getBlockNumber(completeBlockStart + nTermShift);
316  }
317  // make sure didn't previously use all residues on C-term extension; if not enough
318  // to do N-terminal extension, set nTermShift to zero.
319  if (nTermShift > 0 && blockNumberOnThisOfCompleteBlockStart >= 0) {
320  if (blockNumberOnThisOfCompleteBlockStart == 0) {
321  if (commonNTermBlockZeroExt - nTermShift < 0) {
322  nTermShift = 0;
323  }
324  } else {
325  if (gapSizeAfterBlockInThis[blockNumberOnThisOfCompleteBlockStart - 1] - prevCTermBlockExt < nTermShift) {
326  nTermShift = 0;
327  }
328  }
329  } else if (blockNumberOnThisOfCompleteBlockStart < 0) {
330  nTermShift = 0;
331  }
332 
333  allowedCTermBlockExt = commonCTermBlockExt[blockNumberOnThisOfCompleteBlockStart];
334 
335  thisStart = thisCopy.getBlock(blockNumberOnThisOfCompleteBlockStart).getStart();
336  thisLen = thisCopy.getBlock(blockNumberOnThisOfCompleteBlockStart).getLen();
337 
338  /*
339  // There's no gap after this block; find next gap and total number of residues to it;
340  // an adjacent block is only possible if there is no allowed extension in the block.
341  if (allowedCTermBlockExt == 0) {
342  k = (unsigned) blockNumberOnThisOfCompleteBlockStart;
343  while (k + 1 < gapSizeAfterBlockInThis.size() && gapSizeAfterBlockInThis[k] == 0) {
344  ++k;
345  thisLen += thisCopy.getBlock(k).getLen();
346  }
347  }
348  */
349 
350  // If we've determined there aren't enough N-terminal residues to pad out this to
351  // conform to the complete model, we've done an illegal extension into gapped region.
352  if (thisStart - completeBlockStart > nTermShift) {
353  completeExtendsIntoGappedRegionInThis = true;
354  }
355 
356  // There are enough aligned residues from the start position on child to
357  // grow the block completely w/o extending into disallowed gap regions.
358  else if (thisLen + allowedCTermBlockExt - (completeBlockStart - thisStart) >= completeBlockLen) {
359  prevCTermBlockExt = completeBlockLen - thisLen + (completeBlockStart - thisStart);
360  if (prevCTermBlockExt < 0) prevCTermBlockExt = 0;
361 // prevCTermBlockExt = allowedCTermBlockExt;
362  }
363  else {
364 
365  // When complete block is large and covers several blocks in this, see if
366  // there is a long enough contiguous length so can safely map all blocks
367  // into the single large one. Stops being contiguous when any gap in this is
368  // larger than the specified common gap.
369  k = (unsigned) blockNumberOnThisOfCompleteBlockStart;
370  thisContiguousLen = thisLen + allowedCTermBlockExt;
371  lastCTermBlockExt = allowedCTermBlockExt;
372  while (k + 1 < gapSizeAfterBlockInThis.size() && gapSizeAfterBlockInThis[k] == commonCTermBlockExt[k]) {
373  ++k;
374  lastCTermBlockExt = gapSizeAfterBlockInThis[k];
375  thisContiguousLen += thisCopy.getBlock(k).getLen() + lastCTermBlockExt;
376  }
377 
378 
379  if (thisContiguousLen - (completeBlockStart - thisStart) >= completeBlockLen) {
380  // this is case where have to extend a contiguous range in this to cover complete block;
381  // since it's contiguous, only need an n-terminal extension in next block if used the
382  // final gap in the contiguous length; otherwise, prevCTermBlockExt set to zero.
383  // prevCTermBlockExt = completeBlockLen - thisContiguousLen + (completeBlockStart - thisStart);
384  prevCTermBlockExt = thisContiguousLen - completeBlockLen - (completeBlockStart - thisStart);
385  if (prevCTermBlockExt > lastCTermBlockExt || prevCTermBlockExt < 0) {
386  prevCTermBlockExt = 0;
387  }
388  } else {
389  completeExtendsIntoGappedRegionInThis = true;
390  }
391  }
392 
393  }
394 
395  return completeExtendsIntoGappedRegionInThis;
396 }
397 
398 bool BlockModel::contain(const BlockModel& rhs) const
399 {
400  if (!isAlike(rhs))
401  return false;
402  for ( unsigned int i = 0; i < m_blocks.size(); i++)
403  {
404  if (!m_blocks[i].contain(rhs.m_blocks[i]))
405  return false;
406  }
407  return true;
408 }
409 //delta = bm - this
410 //bool BlockModel::getCastingDelta(const BlockModel& bm, DeltaBlockModel& delta) const
411 //return (this-delta), status). status = true if complete
412 //complete means every block in this should be accounted for for at least once
413 pair<DeltaBlockModel*, bool> BlockModel::operator-(const BlockModel& bm) const
414 {
416  set<DeltaBlock> uniqueDelta;
417  for (unsigned int i = 0; i < bm.m_blocks.size(); i++)
418  {
419  minusOneBlock(bm.m_blocks[i], *delta);
420  }
421  for (DeltaBlockModel::iterator dit = delta->begin(); dit != delta->end(); dit++)
422  {
423  uniqueDelta.insert(*dit);
424  }
425  delta->clear();
426  for (set<DeltaBlock>::iterator sit = uniqueDelta.begin(); sit != uniqueDelta.end(); sit++)
427  delta->insert(*sit);
428  return pair<DeltaBlockModel*, bool>(delta, delta->size() == m_blocks.size());
429 }
430 
431 pair<DeltaBlockModel*, bool> BlockModel::intersect(const BlockModel& bm) const
432 {
434 
435  for (unsigned int i = 0; i < bm.m_blocks.size(); i++)
436  {
437  intersectOneBlock(bm.m_blocks[i], *delta);
438  }
439  return pair<DeltaBlockModel*, bool>(delta, delta->size() == m_blocks.size());
440 }
441 
442 
443 //return(this + delta, complete)
444 pair<BlockModel*, bool> BlockModel::operator+(const DeltaBlockModel& delta) const
445 {
446  BlockModel* result = new BlockModel();
448  result->m_seqId = m_seqId;
449  int resultBlockID = 0;
450  for (; dt != delta.end(); ++dt)
451  {
452  //const DeltaBlock& db = *dt;
453  if (dt->objectBlockID < 0 || dt->objectBlockID >= (int) m_blocks.size())
454  {
455  delete result;
456  return pair<BlockModel*, bool>(nullptr, false);
457  }
458  const Block& srcBlock = m_blocks[dt->objectBlockID];
459  Block block = srcBlock + (*dt);
460  if (block.getLen() > 0 && block.getStart() >=0)
461  result->m_blocks.push_back(block);
462  resultBlockID++;
463  }
464  //int eb;
465  return pair<BlockModel*, bool>(result, (result->getBlocks().size() == delta.size()) );
466 }
467 
468 //cast this to target
470 {
471  pair<DeltaBlockModel*, bool> deltaStatus = target - (*this);
472 // string dbmStr = DeltaBlockModelToString(*deltaStatus.first);
473  if (!(deltaStatus.second)) //not complete
474  {
475  delete deltaStatus.first;
476  return 0;
477  }
478  pair<BlockModel*, bool> bmStatus = (*this) + (*deltaStatus.first);
479 // string bmStr = (*bmStatus.first).toString();
480  delete deltaStatus.first;
481  if (bmStatus.second)
482  {
483  if (target.contain(*(bmStatus.first)))
484  return bmStatus.first;
485  }
486  //all other conditions are considered fail
487  delete bmStatus.first;
488  return 0;
489 
490 }
491 
492 //this - aBlock
494 {
495  //const Block& myBlock = m_blocks[myBlockNum];
496  vector<int> blockIds;
497  findIntersectingBlocks(aBlock, blockIds);
498  if (blockIds.size() <= 0)
499  return false;
500  for (unsigned int j = 0; j < blockIds.size(); j++)
501  {
502  delta.insert(m_blocks[blockIds[j]] - aBlock);
503  }
504  return true;
505 }
506 
507 //this intersect aBlock
509 {
510  //const Block& myBlock = m_blocks[myBlockNum];
511  vector<int> blockIds;
512  findIntersectingBlocks(aBlock, blockIds);
513  if (blockIds.size() <= 0)
514  return false;
515  for (unsigned int j = 0; j < blockIds.size(); j++)
516  {
517  Block intersectedBlock(aBlock);
518  if (m_blocks[blockIds[j]].intersect(intersectedBlock))
519  delta.insert(intersectedBlock - aBlock);
520  }
521  return true;
522 }
523 
524 void BlockModel::findIntersectingBlocks(const Block& target, vector<int>& result) const
525 {
526  for (unsigned int i = 0; i < m_blocks.size(); i++)
527  {
528  if(target.isIntersecting(m_blocks[i]))
529  result.push_back(i);
530  }
531 }
532 /*
533 const BlockModel& BlockModel::operator+=(const BlockModel& delta)
534 {
535  if (!isAlike(delta))
536  return *this;
537  for ( int i = 0; i < m_blocks.size(); i++)
538  {
539  m_blocks[i] += delta.m_blocks[i];
540  }
541  return *this;
542 }
543 
544 BlockModel BlockModel::operator-(const BlockModel& rhs)
545 {
546  if(!isAlike(rhs))
547  return *this;
548  BlockModel delta;
549  for ( int i = 0; i < m_blocks.size(); i++)
550  {
551  delta.m_blocks.push_back(m_blocks[i] - rhs.m_blocks[i]);
552  }
553  return delta;
554 }
555 */
556 
558 {
559  CRef<CSeq_align> sa;
560  int eb;
561  if (!master.isValid(-1, eb))
562  return sa;
563  if (!isValid(-1, eb))
564  return sa;
565  if (!blockMatch(master))
566  return sa;
567 
568  sa = new CSeq_align();
569  sa->Reset();
571  sa->SetDim(2);
572  TDendiag& ddList = sa->SetSegs().SetDendiag();
573  for (unsigned int i = 0; i < m_blocks.size(); i++)
574  {
576  dd->SetDim(2);
577  vector< CRef< CSeq_id > >& seqIds = dd->SetIds();
578 
579  CRef< CSeq_id > masterCopy(new CSeq_id), slaveCopy(new CSeq_id);
580  masterCopy->Assign(*(master.getSeqId()));
581  slaveCopy->Assign(*getSeqId());
582  seqIds.push_back(masterCopy);
583  seqIds.push_back(slaveCopy);
584 
585  CDense_diag::TStarts& starts = dd->SetStarts();
586  starts.push_back(master.m_blocks[i].getStart());
587  starts.push_back(m_blocks[i].getStart());
588  dd->SetLen(m_blocks[i].getLen());
589 
590  ddList.push_back(dd);
591  }
592 
593  return sa;
594 }
595 
597 {
598  const Block& lastBlock = *(m_blocks.rbegin());
599  return lastBlock.getEnd();
600 }
601 
603 {
604  const Block& firstBlock = *(m_blocks.begin());
605  return firstBlock.getStart();
606 }
607 
609 {
610  int len = 0;
611  for (unsigned int i = 0; i < m_blocks.size(); i++)
612  {
613  len += m_blocks[i].getLen();
614  }
615  return len;
616 }
617 
619 {
620  int gap = 0;
621  if (bn == 0)
622  gap = m_blocks[bn].getStart();
623  else
624  {
625  int delta = m_blocks[bn].getStart() - m_blocks[bn - 1].getEnd() - 1;
626  if (delta >= 0)
627  gap = delta;
628  }
629  return gap;
630 }
631 
632 int BlockModel::getGapToCTerminal(int bn, int len)const
633 {
634  int gap = 0;
635  if (bn == (int) (m_blocks.size() - 1)) //last blast
636  {
637  if (len > 0)
638  gap = (len - 1) - m_blocks[bn].getEnd();
639  }
640  else
641  {
642  int delta = m_blocks[bn + 1].getStart() - m_blocks[bn].getEnd() - 1;
643  if (delta >= 0)
644  gap = delta;
645  }
646  return gap;
647 }
648 
649 void BlockModel::addOffset(int nExt)
650 {
651  for (unsigned int i = 1; i < m_blocks.size(); i++)
652  {
653  m_blocks[i].addOffset(nExt);
654  }
655 }
656 
657 bool BlockModel::isValid(int seqLen, int& errBlock) const
658 {
659  if (m_blocks.size() == 0)
660  return false;
661  if (seqLen > 1 && getLastAlignedPosition() >= seqLen)
662  {
663  errBlock = m_blocks.size() - 1;
664  return false;
665  }
666  if (!m_blocks[0].isValid())
667  {
668  errBlock = 0;
669  return false;
670  }
671  for (unsigned int i = 1; i < m_blocks.size(); i++)
672  {
673  if (!m_blocks[i].isValid())
674  {
675  errBlock = (int) i;
676  return false;
677  }
678  if (m_blocks[i-1].getEnd() >= m_blocks[i].getStart())
679  {
680  errBlock = (int) i - 1;
681  return false;
682  }
683  }
684  return true;
685 }
686 
687 
689 {
690  if (!SeqIdsMatch(m_seqId, bm.m_seqId))
691  return false;
692  int bmLo = bm.getFirstAlignedPosition();
693  int bmHi = bm.getLastAlignedPosition();
694  int lo = getFirstAlignedPosition();
695  int hi = getLastAlignedPosition();
696  if (lo <= bmLo)
697  return hi >= bmLo;
698  else
699  return lo <= bmHi;
700 }
701 
702 //return -1 if pos is unaligned
703 int BlockModel::getBlockNumber(int pos) const
704 {
705  int i = 0;
706  for (; i < (int) m_blocks.size(); i++)
707  {
708  if (pos >= m_blocks[i].getStart() && pos <= m_blocks[i].getEnd())
709  break;
710  }
711  if (i >= (int) m_blocks.size())
712  return -1;
713  else
714  return i;
715 }
716 
717 bool BlockModel::mask(const BlockModel& maskBlockModel)
718 {
719  bool result = (SeqIdsMatch(getSeqId(), maskBlockModel.getSeqId()));
720  if (result) {
721  result = mask(maskBlockModel.getBlocks());
722  }
723  return result;
724 }
725 
726 // Returns true if this object was modified; false if there was no effect.
727 bool BlockModel::mask(const vector<Block>& maskBlocks)
728 {
729  vector<Block> maskedBlocks;
730  vector<Block>& originalBlocks = getBlocks();
731  unsigned int i, nOrigBlocks = originalBlocks.size();
732  unsigned int nMaskBlocks = maskBlocks.size();
733  int origTotalLength = getTotalBlockLength();
734  int newBlockId = 0;
735  int pos, start, len;
736  int origStart, origEnd;
737  int j, maskBlockStart, maskBlockEnd, maskFirst, maskLast;
738  bool hasEffect;
739 
740  if (nOrigBlocks == 0 || nMaskBlocks == 0) return false;
741 
742  // Collect all mask positions to simplify search code.
743  set<int> maskPositions;
744  set<int>::iterator maskPosEnd;
745  for (i = 0; i < nMaskBlocks; ++i) {
746  maskBlockStart = maskBlocks[i].getStart();
747  maskBlockEnd = maskBlocks[i].getEnd();
748  for (j = maskBlockStart; j <= maskBlockEnd; ++j) maskPositions.insert(j);
749  }
750  maskPosEnd = maskPositions.end();
751 
752  maskFirst = maskBlocks[0].getStart();
753  maskLast = maskBlocks.back().getEnd();
754 
755  for (i = 0; i < nOrigBlocks; ++i) {
756  origStart = originalBlocks[i].getStart();
757  origEnd = originalBlocks[i].getEnd();
758 
759  // If origBlock does not intersects the maskBlocks footprint, it is unmasked and can be directly copied.
760  if (origEnd < maskFirst || origStart > maskLast) {
761  maskedBlocks.push_back(originalBlocks[i]);
762  maskedBlocks.back().setId(newBlockId);
763  ++newBlockId;
764  continue;
765  }
766 
767  start = -1;
768  len = 0;
769  for (pos = origStart; pos <= origEnd; ++pos) {
770 
771  // If position is masked; end current block.
772  if (maskPositions.find(pos) != maskPosEnd) {
773  if (len > 0) {
774  maskedBlocks.push_back(Block(start, len, newBlockId));
775  ++newBlockId;
776  }
777  len = 0;
778  start = -1;
779  }
780  else
781  {
782  // Found the first position in a new block.
783  if (len == 0) {
784  start = pos;
785  }
786  ++len;
787  }
788 
789  } // end loop on original block positions
790 
791  // Make sure to include the block at the end...
792  if (len > 0) {
793  maskedBlocks.push_back(Block(start, len, newBlockId));
794  }
795 
796  } // end loop on original blocks
797 
798  _ASSERT(getTotalBlockLength() <= origTotalLength);
799  hasEffect = (getTotalBlockLength() != origTotalLength);
800  if (hasEffect) {
801  originalBlocks.clear();
802  originalBlocks = maskedBlocks;
803  }
804 
805  return hasEffect;
806 }
807 
808 void BlockModel::clipToRange(unsigned int min, unsigned max)
809 {
810  unsigned int nBlocks = getBlocks().size();
811  if (nBlocks == 0) return;
812 
813  vector<Block> maskBlocks;
814  int firstAligned = getBlocks().front().getStart();
815  int lastAligned = getBlocks().back().getEnd();
816 
817  // Add a masking block for all residues < min.
818  if (firstAligned < (int) min) {
819  maskBlocks.push_back(Block(firstAligned, min - firstAligned));
820  }
821  // Add a masking block for all residues > max.
822  if (lastAligned > (int) max) {
823  maskBlocks.push_back(Block(max + 1, lastAligned - max));
824  }
825 
826  mask(maskBlocks);
827 }
828 
829 
830 string BlockModel::toString() const
831 {
832  string blockModelStr, tmp;
833  unsigned int nBlocks = m_blocks.size();
834 
835  if (m_seqId.NotEmpty()) {
836  blockModelStr = "Sequence: " + GetSeqIDStr(m_seqId) + "\n";
837  }
838 
839  for (unsigned int i = 0; i < nBlocks; ++i) {
840  tmp = " Block Id = " + NStr::IntToString(m_blocks[i].getId());
841  tmp += "; Block Range = [" + NStr::IntToString(m_blocks[i].getStart());
842  tmp += ", " + NStr::IntToString(m_blocks[i].getEnd());
843  tmp += "] (Length = " + NStr::IntToString(m_blocks[i].getLen()) + ")\n";
844  blockModelStr += tmp;
845  }
846  return blockModelStr;
847 }
848 
850 {
851  string deltaBlockModelStr, tmp;
852  DeltaBlockModel::const_iterator dbm_cit = dbm.begin(), dbm_citEnd = dbm.end();
853 
854  for (; dbm_cit != dbm_citEnd; ++dbm_cit) {
855  tmp = " Delta Block Subject Id = " + NStr::IntToString(dbm_cit->subjectBlockID);
856  tmp += "; Delta Block Object Id = " + NStr::IntToString(dbm_cit->objectBlockID);
857  tmp += "; Delta Block Start = " + NStr::IntToString(dbm_cit->deltaStart);
858  tmp += "; Delta Block Len = " + NStr::IntToString(dbm_cit->deltaLen);
859  tmp += "\n";
860  deltaBlockModelStr += tmp;
861  }
862  return deltaBlockModelStr;
863 }
864 
866  return bm.toString();
867 }
868 
869 //implement BlockModel Pair
870 
872 m_master(0), m_slave(0)
873 {
874  m_master = new BlockModel();
875  m_slave = new BlockModel();
876 }
877 
879 {
880  m_master = new BlockModel(seqAlign, false);
881  m_slave = new BlockModel(seqAlign, true);
882 }
883 
884 //deep copy
886 {
887  if (rhs.m_master) {
888  m_master = new BlockModel(*rhs.m_master);
889  }
890  if (rhs.m_slave) {
891  m_slave = new BlockModel(*rhs.m_slave);
892  }
893 }
894 
896 {
897  // The copy can fail if rhs has null pointers.
898  delete m_master;
899  delete m_slave;
900  m_master = NULL;
901  m_slave = NULL;
902  if (rhs.m_master) {
903  m_master = new BlockModel(*rhs.m_master);
904  }
905  if (rhs.m_slave) {
906  m_slave = new BlockModel(*rhs.m_slave);
907  }
908  return *this;
909 }
910 
912 {
913  delete m_master;
914  delete m_slave;
915 }
916 
918 {
919  delete m_master;
920  delete m_slave;
921  m_master = new BlockModel();
922  m_slave = new BlockModel();
923 }
924 
926 {
927  return *m_master;
928 }
929 
931 {
932  return *m_master;
933 }
934 
936 {
937  return *m_slave;
938 }
939 
941 {
942  return *m_slave;
943 }
944 
946 {
948  int num = m_master->getBlocks().size();
949  for (int i = 0; i < num; i++)
950  {
951  extendMidway(i);
952  }
953 }
954 
956 {
957  int ngap = m_master->getGapToNTerminal(blockNum);
958  if (blockNum == 0) //do N-extend the first block
959  ngap = 0;
960  int ngapSlave = m_slave->getGapToNTerminal(blockNum);
961  if (ngap > ngapSlave)
962  ngap = ngapSlave;
963  int cgap = m_master->getGapToCTerminal(blockNum);
964  int cgapSlave = m_slave->getGapToCTerminal(blockNum);
965  if (cgap > cgapSlave)
966  cgap = cgapSlave;
967 
968  //c-ext of last block has taken half of the gap, so take what's left here
969  // negative for moving to N-end.
970  int nExt = -ngap;
971  int cExt = 0;
972  if (blockNum != (int) (m_master->getBlocks().size()-1) ) //do not C-extend the last block
973  {
974  if ((cgap % 2) == 0)
975  cExt = cgap/2;
976  else
977  cExt = cgap/2 + 1;
978  }
979  m_master->getBlock(blockNum).extendSelf(nExt, cExt);
980  m_slave->getBlock(blockNum).extendSelf(nExt, cExt);
981 }
982 
984 {
985  return m_slave->toSeqAlign(*m_master);
986 }
987 
988 
989 int BlockModelPair::mapToMaster(int slavePos) const
990 {
991  int bn = m_slave->getBlockNumber(slavePos);
992  if (bn < 0)
993  return -1;
994  return m_master->getBlock(bn).getStart() + (slavePos - m_slave->getBlock(bn).getStart());
995 }
996 
997 int BlockModelPair::mapToSlave(int masterPos) const
998 {
999  int bn = m_master->getBlockNumber(masterPos);
1000  if (bn < 0)
1001  return -1;
1002  return m_slave->getBlock(bn).getStart() + (masterPos - m_master->getBlock(bn).getStart());
1003 }
1004 
1006 {
1007  return m_master->blockMatch(*m_slave);
1008 }
1009 
1010 //assume this.master is the same as guide.master
1011 //change this.master to guide.slave
1012 //
1014 {
1015  if (!SeqIdsMatch(getMaster().getSeqId(),guide.getMaster().getSeqId()))
1016  return 0;
1017  //convert guide.slave to the intersection of this.master and guide.master
1018  pair<DeltaBlockModel*, bool> deltaGuideToThis = m_master->intersect(guide.getMaster());
1019  pair<BlockModel*, bool> intersectedGuideSlave = guide.getSlave() + *(deltaGuideToThis.first);
1020  //convert this.slave to the intersection of this.master and guide.master
1021  pair<DeltaBlockModel*, bool> deltaThisToGuide = guide.getMaster().intersect(*m_master);
1022  pair<BlockModel*, bool> intersectedThisSlave = (*m_slave) + *(deltaThisToGuide.first);
1023  assert((intersectedGuideSlave.first)->blockMatch(*(intersectedThisSlave.first)));
1024  delete m_master;
1025  delete m_slave;
1026  m_master = intersectedGuideSlave.first;
1027  m_slave = intersectedThisSlave.first;
1028  return m_master->getTotalBlockLength();
1029 }
1030 
1031 bool BlockModelPair::mask(const vector<Block>& maskBlocks, bool maskBasedOnMaster)
1032 {
1033  if (!m_master || !m_slave || !isValid()) return false;
1034 
1035  bool hasEffect = false;
1036  BlockModel& maskedBm = (maskBasedOnMaster) ? getMaster() : getSlave();
1037  vector<Block>& newBlocks = maskedBm.getBlocks();
1038  unsigned int i, nBlocks = newBlocks.size();
1039  int pos, mappedPos;
1040 
1041  // First, mask the primary BlockModel in the pair.
1042  if (maskBlocks.size() > 0 && nBlocks > 0 && maskedBm.mask(maskBlocks)) {
1043 
1044  hasEffect = true;
1045 
1046  // Then, fix up the other BlockModel in the pair to match the new masked primary BlockModel.
1047  vector<Block>& newOtherBlocks = (maskBasedOnMaster) ? getSlave().getBlocks() : getMaster().getBlocks();
1048  newOtherBlocks.clear();
1049  for (i = 0; i < nBlocks; ++i) {
1050  Block& b = newBlocks[i];
1051  pos = b.getStart();
1052  mappedPos = (maskBasedOnMaster) ? mapToSlave(pos) : mapToMaster(pos);
1053  if (mappedPos >= 0) { // should never fail if bmp.isValid is true.
1054  newOtherBlocks.push_back(Block(mappedPos, b.getLen(), b.getId()));
1055  }
1056  }
1057  }
1058  return hasEffect;
1059 }
1060 
1061 //reverse the master vs slave
1063 {
1064  BlockModel* bm = m_master;
1065  m_master = m_slave;
1066  m_slave = bm;
1067 }
1068 
1069 END_SCOPE(cd_utils)
void degap()
Definition: cuBlock.cpp:945
BlockModel & getMaster()
Definition: cuBlock.cpp:925
BlockModel & getSlave()
Definition: cuBlock.cpp:935
int mapToSlave(int masterPos) const
Definition: cuBlock.cpp:997
BlockModel * m_master
Definition: cuBlock.hpp:184
BlockModelPair & operator=(const BlockModelPair &rhs)
Definition: cuBlock.cpp:895
BlockModel * m_slave
Definition: cuBlock.hpp:185
CRef< CSeq_align > toSeqAlign() const
Definition: cuBlock.cpp:983
void reset()
Definition: cuBlock.cpp:917
int mapToMaster(int slavePos) const
Definition: cuBlock.cpp:989
int remaster(const BlockModelPair &guide)
Definition: cuBlock.cpp:1013
bool isValid() const
Definition: cuBlock.cpp:1005
void reverse()
Definition: cuBlock.cpp:1062
void extendMidway(int blockNum)
Definition: cuBlock.cpp:955
bool mask(const vector< Block > &maskBlocks, bool maskBasedOnMaster)
Definition: cuBlock.cpp:1031
Block & getBlock(int bn)
Definition: cuBlock.hpp:98
int getTotalBlockLength() const
Definition: cuBlock.cpp:608
bool mask(const BlockModel &maskBlockModel)
Definition: cuBlock.cpp:717
BlockModel * completeCastTo(const BlockModel &target) const
Definition: cuBlock.cpp:469
CRef< CSeq_id > getSeqId() const
Definition: cuBlock.hpp:102
void addOffset(int nExt)
Definition: cuBlock.cpp:649
int getLastAlignedPosition() const
Definition: cuBlock.cpp:596
bool isValid(int seqLen, int &errBlock) const
Definition: cuBlock.cpp:657
bool contain(const BlockModel &rhs) const
Definition: cuBlock.cpp:398
CRef< CSeq_align > toSeqAlign(const BlockModel &master) const
Definition: cuBlock.cpp:557
int getFirstAlignedPosition() const
Definition: cuBlock.cpp:602
bool completeModelExtendsIntoUnallowedGappedRegion(const BlockModel &completeModel, int sequenceLength, const vector< int > *commonBlockExt=NULL) const
Definition: cuBlock.cpp:271
void addBlock(Block &block)
Definition: cuBlock.cpp:225
int getGapToNTerminal(int bn) const
Definition: cuBlock.cpp:618
int getGapToCTerminal(int bn, int len=-1) const
Definition: cuBlock.cpp:632
vector< Block > m_blocks
Definition: cuBlock.hpp:137
pair< DeltaBlockModel *, bool > intersect(const BlockModel &bm) const
Definition: cuBlock.cpp:431
void findIntersectingBlocks(const Block &target, vector< int > &result) const
Definition: cuBlock.cpp:524
vector< Block > & getBlocks()
Definition: cuBlock.hpp:97
void clipToRange(unsigned int min, unsigned max)
Definition: cuBlock.cpp:808
pair< DeltaBlockModel *, bool > operator-(const BlockModel &bm) const
Definition: cuBlock.cpp:413
bool overlap(const BlockModel &bm) const
Definition: cuBlock.cpp:688
BlockModel & operator=(const BlockModel &rhs)
Definition: cuBlock.cpp:231
bool blockMatch(const BlockModel &bm) const
Definition: cuBlock.cpp:259
string toString() const
Definition: cuBlock.cpp:830
bool minusOneBlock(const Block &aBlock, DeltaBlockModel &delta) const
Definition: cuBlock.cpp:493
int getBlockNumber(int pos) const
Definition: cuBlock.cpp:703
pair< BlockModel *, bool > operator+(const DeltaBlockModel &delta) const
Definition: cuBlock.cpp:444
bool isAlike(const BlockModel &rhs) const
Definition: cuBlock.cpp:238
bool intersectOneBlock(const Block &aBlock, DeltaBlockModel &delta) const
Definition: cuBlock.cpp:508
CRef< CSeq_id > m_seqId
Definition: cuBlock.hpp:138
bool operator==(const BlockModel &rhs) const
Definition: cuBlock.cpp:247
DeltaBlock operator-(const Block &object) const
Definition: cuBlock.cpp:145
bool intersect(Block &rhs) const
Definition: cuBlock.cpp:93
bool isIntersecting(const Block &rhs) const
Definition: cuBlock.cpp:78
static bool concatenate(const SortedBlocks &blocks, Block &comBlock)
Definition: cuBlock.cpp:169
Block()
Definition: cuBlock.cpp:46
void setId(int id)
Definition: cuBlock.hpp:46
int getLen() const
Definition: cuBlock.hpp:41
bool contain(const Block &rhs) const
Definition: cuBlock.cpp:73
Block extend(int nExt, int cExt) const
Definition: cuBlock.cpp:151
bool operator!=(const Block &rhs) const
Definition: cuBlock.cpp:68
Block operator+(const DeltaBlock &deltaBlock) const
Definition: cuBlock.cpp:132
void setEnd(int end)
Definition: cuBlock.hpp:44
int m_id
Definition: cuBlock.hpp:59
const Block & operator=(const Block &rhs)
Definition: cuBlock.cpp:55
void extendSelf(int nExt, int cExt)
Definition: cuBlock.cpp:158
int m_start
Definition: cuBlock.hpp:58
void addOffset(int nExt)
Definition: cuBlock.cpp:164
int m_len
Definition: cuBlock.hpp:57
int getEnd() const
Definition: cuBlock.hpp:43
bool operator==(const Block &rhs) const
Definition: cuBlock.cpp:63
int getStart() const
Definition: cuBlock.hpp:42
const_iterator begin() const
Definition: set.hpp:286
const_iterator end() const
Definition: set.hpp:287
parent_type::iterator iterator
Definition: set.hpp:198
parent_type::const_iterator const_iterator
Definition: set.hpp:197
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
parent_type::iterator iterator
Definition: set.hpp:80
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
CSeq_align::C_Segs::TDendiag TDendiag
Definition: cuAlign.hpp:48
int GetBlockLengths(const CRef< CSeq_align > &seqAlign, vector< int > &lengths)
Definition: cuAlign.cpp:391
bool GetSeqID(const CRef< CSeq_align > &seqAlign, CRef< CSeq_id > &SeqID, bool getSlave=true)
Definition: cuAlign.cpp:55
int GetBlockStarts(const CRef< CSeq_align > &seqAlign, vector< int > &starts, bool onMaster)
Definition: cuAlign.cpp:418
string DeltaBlockModelToString(const DeltaBlockModel &dbm)
Definition: cuBlock.cpp:849
multiset< DeltaBlock > DeltaBlockModel
Definition: cuBlock.hpp:63
bool SeqIdsMatch(const CRef< CSeq_id > &id1, const CRef< CSeq_id > &id2)
Definition: cuSequence.cpp:70
string GetSeqIDStr(const CRef< CSeq_id > &SeqID)
Definition: cuUtils.cpp:145
static char tmp[3200]
Definition: utf8.c:42
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
vector< TSeqPos > TStarts
Definition: Dense_diag_.hpp:94
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
virtual void Reset(void)
Reset the whole object.
Definition: Seq_align_.cpp:333
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
#include<zmmintrin.h>
Definition: bm.h:78
T max(T x_, T y_)
T min(T x_, T y_)
Int4 delta(size_t dimension_, const Int4 *score_)
#define assert(x)
Definition: srv_diag.hpp:58
static DP_BlockInfo * blocks
#define _ASSERT
else result
Definition: token2.c:20
Modified on Tue Apr 23 07:37:09 2024 by modify_doxy.py rev. 669887