NCBI C++ ToolKit
cuAlign.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuAlign.cpp 87139 2019-07-30 13:43:56Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Adapted from CDTree-1 code by Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Utility routines for manipulating alignments.
31  *
32  * ===========================================================================
33  */
34 #include <ncbi_pch.hpp>
36 
45 
49 
50 #include <stdio.h>
51 
53 BEGIN_SCOPE(cd_utils)
54 
55 bool GetSeqID(const CRef< CSeq_align >& seqAlign, CRef< CSeq_id >& SeqID, bool getSlave)
56 {
57  //-------------------------------------------------------------------------
58  // get a SeqID.
59  // first get the row'th DenDiag, then the Slave/Master's SeqID.
60  //-------------------------------------------------------------------------
61  CRef< CDense_diag > DenDiag;
62  CDense_diag::TIds IdsSet;
63  CDense_diag::TIds::iterator i;
64 
65  if (seqAlign.NotEmpty()) {
66  if (seqAlign->GetSegs().IsDendiag() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
67  IdsSet = DenDiag->GetIds();
68  } else if (seqAlign->GetSegs().IsDenseg()) {
69  IdsSet = seqAlign->GetSegs().GetDenseg().GetIds();
70  }
71  i = IdsSet.begin();
72  if (getSlave)
73  {
74  i++;
75  }
76  SeqID = (*i);
77  return(true);
78  }
79  return(false);
80 }
81 
82 // Assumes CDD-style seq-align using Dendiags with dimension 2.
83 bool ChangeSeqIdInSeqAlign(CRef< CSeq_align>& sa, const CRef< CSeq_id >& newSeqId, bool onMaster)
84 {
85  bool result = (sa->SetSegs().IsDendiag() && sa->SetSegs().SetDendiag().size() > 0);
86  TDendiag_it ddIt, ddEnd;
87  unsigned int index = (onMaster) ? 0 : 1;
88 
89  // Sanity check the dendiag...
90  if (result) {
91  ddIt = sa->SetSegs().SetDendiag().begin();
92  ddEnd = sa->SetSegs().SetDendiag().end();
93  for (; ddIt != ddEnd; ++ddIt) {
94  if ((*ddIt)->GetDim() != 2 || (*ddIt)->GetIds().size() != 2) {
95  result = false;
96  break;
97  }
98  }
99  }
100 
101  if (result) {
102  ddIt = sa->SetSegs().SetDendiag().begin();
103  ddEnd = sa->SetSegs().SetDendiag().end();
104  CDense_diag::TIds ids;
105  for (; ddIt != ddEnd; ++ddIt) {
106  ids = (*ddIt)->SetIds();
107  ids[index]->Assign(*newSeqId);
108  }
109  }
110 
111  return result;
112 }
113 
114 
115 // convenience function
116 int MapPositionToMaster(int childPos, const CSeq_align& align) {
117 
118  return MapPosition(align, childPos, CHILD_TO_MASTER);
119 }
120 
121 // convenience function
122 int MapPositionToChild(int masterPos, const CSeq_align& align) {
123 
124  return MapPosition(align, masterPos, MASTER_TO_CHILD);
125 }
126 
127 /* RENAME */
128 // Was CCdCore::GetSeqPosition(const TDendiag* ddlist, int Position, bool OnMasterRow) {
129 int MapPosition(const CSeq_align& seqAlign, int Position, CoordMapDir mapDir) {
130 //---------------------------------------------------------------------------
131 // If mapDir = MASTER_TO_CHILD, then get position on slave
132 // row that corresponds to Position on master row. Otherwise (i.e. CHILD_TO_MASTER),
133 // get position on master row that corresponds to Position on slave row.
134 // Assumes the Seq_align is a standard CD alignment of two sequences.
135 //---------------------------------------------------------------------------
136  TDendiag_cit i, ddend;
137  CDense_diag::TStarts::const_iterator k;
138  int Start, Len, OtherStart;
139 
140  const TDendiag* ddlist; // = new TDendiag;
141  if (GetDDSetFromSeqAlign(seqAlign, ddlist)) {
142 
143  ddend = ddlist->end();
144  for (i=ddlist->begin(); i!=ddend; i++) {
145  k = (*i)->GetStarts().begin();
146  Len = (*i)->GetLen();
147  Start = (mapDir == MASTER_TO_CHILD) ? *k : *(++k);
148 // Start = OnMasterRow ? *k : *(++k);
149  k = (*i)->GetStarts().begin();
150  OtherStart = (mapDir == MASTER_TO_CHILD) ? *(++k) : *k;
151 // OtherStart = OnMasterRow ? *(++k) : *k;
152  if ((Position >= Start) && (Position < (Start+Len))) {
153  return(OtherStart + (Position-Start));
154  }
155  }
156  }
157 // delete ddlist;
158  return(INVALID_POSITION);
159 }
160 
161 
162 
163 /* ADDED */
164 bool IsPositionAligned(const CSeq_align& seqAlign, int Position, bool onMaster) {
165  bool result = false;
166 
167  if (Position == INVALID_POSITION) {
168  return result;
169  }
170 
171  const TDendiag* pDenDiagSet; // = new TDendiag;
172  if (GetDDSetFromSeqAlign(seqAlign, pDenDiagSet)) {
173  result = IsPositionAligned(pDenDiagSet, Position, onMaster);
174  }
175  return result;
176 }
177 
178 /* ADDED */
179 bool IsPositionAligned(const TDendiag*& pDenDiagSet, int Position, bool onMaster) {
180  bool result = false;
181  int start, stop;
182  TDendiag_cit i, iend;
183 
184  if (Position == INVALID_POSITION) {
185  return result;
186  }
187 
188  // for each block, check if Position is in range
189  if (pDenDiagSet) {
190  iend = pDenDiagSet->end();
191  for (i=pDenDiagSet->begin(); i!=iend; i++) {
192  start = (onMaster) ? (*i)->GetStarts().front() : (*i)->GetStarts().back();
193  stop = start + (*i)->GetLen() - 1;
194  if (Position >= start && Position <= stop) {
195  result = true;
196  break;
197  }
198  }
199  }
200  return result;
201 }
202 
203 // Return the number of positions of align1 also aligned on align2.
204 int GetAlignedPositions(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, vector<int>& alignedPositions, bool onMaster) {
205 
206  int nBlocks, position;
207  CRef< CSeq_id > align1Id, align2Id;
208  vector<int> align1Blocks, align1Starts;
209 
210  alignedPositions.clear();
211 
212  if (align1.NotEmpty() && align2.NotEmpty()) {
213 
214  // The sequences need to be the same to check common positions.
215  if (GetSeqID(align1, align1Id, !onMaster) && GetSeqID(align2, align2Id, !onMaster) &&
216  SeqIdsMatch(align1Id, align2Id)) {
217 
218  GetBlockLengths(align1, align1Blocks);
219  GetBlockStarts(align1, align1Starts, onMaster);
220 
221  // Look for residues from align1 aligned on align2.
222  nBlocks = align1Blocks.size();
223  for (int i = 0; i < nBlocks; ++i) {
224  position = align1Starts[i];
225  for (int j = 0; j < align1Blocks[i]; ++j) {
226  if (IsPositionAligned(*align2, position, onMaster)) {
227  alignedPositions.push_back(position);
228  }
229  ++position;
230  }
231  }
232  }
233  }
234  return alignedPositions.size();
235 }
236 
237 
239 
240  TDendiag_cit i;
241  int Len=0;
242 
243  if (seqAlign.Empty()) {
244  return Len;
245  }
246 
247  // get den-diags for master row; sum lengths
248  const TDendiag* pDenDiagSet; // = new TDendiag;
249  if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
250  for (i=pDenDiagSet->begin(); i!=pDenDiagSet->end(); i++) {
251  Len += (*i)->GetLen();
252  }
253  }
254  return(Len);
255 
256 }
257 
258 int GetLowerBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
259 
260  int lowerBound = -1;
261  if (seqAlign.Empty()) {
262  return lowerBound;
263  }
264 
265  const TDendiag* pDenDiagSet; // = new TDendiag;
266  if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
267  lowerBound = (onMaster) ? pDenDiagSet->front()->GetStarts().front() : pDenDiagSet->front()->GetStarts().back();
268  }
269  return(lowerBound);
270 
271 }
272 
273 int GetUpperBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
274  int upperBound = -1;
275  if (seqAlign.Empty()) {
276  return upperBound;
277  }
278 
279  const TDendiag* pDenDiagSet; // = new TDendiag;
280  if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
281  upperBound = (onMaster) ? pDenDiagSet->back()->GetStarts().front() : pDenDiagSet->back()->GetStarts().back();
282  upperBound += pDenDiagSet->back()->GetLen() - 1;
283  }
284  return(upperBound);
285 
286 }
287 
288 // Use the alignment to extract as a single string those residues that are aligned.
289 // If pAlignedRes hasn't been allocated, do so.
290 void SetAlignedResiduesOnSequence(const CRef< CSeq_align >& align, const string& sequenceString, char*& pAlignedRes, bool isMaster) {
291 
292  int length;
293  int alignedResCtr = 0;
294  int start = -1, stop = -1;
295  CRef< CDense_diag > ddFirst, ddLast;
296 
297  if (align.Empty() || sequenceString.size() < 1) {
298  return;
299  }
300 
301  length = GetNumAlignedResidues(align);
302  if (length < 1 || (int) sequenceString.size() < length) {
303  return;
304  } else {
305  // Allocate space for pAlignedRes if not already done
306  if (!pAlignedRes) {
307  pAlignedRes = new char[length];
308  if (!pAlignedRes) return;
309  }
310  }
311 
312  if (GetFirstOrLastDenDiag(align, true, ddFirst) && GetFirstOrLastDenDiag(align, false, ddLast)) {
313  if (ddFirst.NotEmpty() && ddLast.NotEmpty()) {
314  start = (isMaster) ? ddFirst->GetStarts().front() : ddFirst->GetStarts().back();
315  stop = (isMaster) ? ddLast->GetStarts().front() : ddLast->GetStarts().back();
316  stop += ddLast->GetLen() - 1;
317  }
318  }
319 
320  alignedResCtr = 0;
321  const TDendiag* pDenDiagSet; // = new TDendiag;
322  if (GetDDSetFromSeqAlign(*align, pDenDiagSet)) {
323 // if (start >=0 && start < length && stop >=0 && stop < length) {
324  if (start >=0 && start <= stop && stop < (int) sequenceString.size()) {
325  for (int i = start; i <= stop; ++i) {
326  if (IsPositionAligned(pDenDiagSet, i, isMaster) && alignedResCtr < length) {
327  //ASSERT(alignedResCtr < length);
328  pAlignedRes[alignedResCtr] = sequenceString[i];
329  ++alignedResCtr;
330  }
331  }
332  }
333  }
334 
335  // problem if alignedResCtr != length; return null pointer
336  if (alignedResCtr != length) {
337  delete pAlignedRes;
338  pAlignedRes = NULL;
339  }
340 
341 }
342 
343 
344 //===========================================
345 // Queries on block structure of alignment
346 //===========================================
347 
348 /* ADDED 10/28/03 */
349 // return block number containing residue, or -1 if not aligned or out of range.
350 int GetBlockNumberForResidue(int residue, const CRef< CSeq_align >& seqAlign, bool onMaster,
351  vector<int>* starts, vector<int>* lengths) {
352  int i = 0;
353  int result = -1, nBlocks;
354  vector<int> vstarts, vlengths;
355 
356  if (residue >= 0 && GetBlockLengths(seqAlign, vlengths) > 0 && GetBlockStarts(seqAlign, vstarts, onMaster) > 0) {
357  if (vlengths.size() == vstarts.size()) {
358  nBlocks = vstarts.size();
359  while (i < nBlocks && result < 0) {
360  if (residue >= vstarts[i] && residue < vstarts[i] + vlengths[i]) {
361  result = i;
362  }
363  ++i;
364  }
365  if (starts != NULL) {
366  starts->insert(starts->begin(), vstarts.begin(), vstarts.end());
367  }
368  if (lengths != NULL) {
369  lengths->insert(lengths->begin(), vlengths.begin(), vlengths.end());
370  }
371  }
372  }
373  return result;
374 }
375 
376 /* ADDED */
377 // return number of blocks in alignment (0 if no alignment, or not a Dense_diag)
378 int GetBlockCount(const CRef< CSeq_align >& seqAlign) {
379  int nBlocks = 0;
380  if (seqAlign.Empty()) {
381  return nBlocks;
382  }
383  if (seqAlign->GetSegs().IsDendiag()) {
384  nBlocks = seqAlign->GetSegs().GetDendiag().size();
385  }
386  return nBlocks;
387 }
388 
389 
390 // return number of blocks on success; return 0 on error
391 int GetBlockLengths(const CRef< CSeq_align >& seqAlign, vector<int>& lengths) {
392  int count = 0;
393  int nBlocks = GetBlockCount(seqAlign);
394  const TDendiag* pDenDiagSet = NULL;
395  TDendiag_cit cit;
396 
397  if (seqAlign.NotEmpty() && nBlocks > 0) {
398  lengths.clear();
399  if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
400  for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
401  lengths.push_back((*cit)->GetLen());
402  count++;
403  }
404  }
405  }
406  count = (count == nBlocks) ? count: 0;
407  return count;
408 }
409 
410 
411 // convenience method; return number of blocks on success; return 0 on error
412 int GetBlockStartsForMaster(const CRef< CSeq_align >& seqAlign, vector<int>& starts) {
413  return GetBlockStarts(seqAlign, starts, true);
414 }
415 
416 
417 // return number of blocks on success; return 0 on error
418 int GetBlockStarts(const CRef< CSeq_align >& seqAlign, vector<int>& starts, bool onMaster) {
419  int start;
420  int count = 0;
421  int nBlocks = GetBlockCount(seqAlign);
422  const TDendiag* pDenDiagSet = NULL;
423  TDendiag_cit cit;
424 
425  if (seqAlign.NotEmpty() && nBlocks > 0) {
426  starts.clear();
427  if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
428  for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
429  start = (onMaster) ? (*cit)->GetStarts().front() : (*cit)->GetStarts().back();
430  starts.push_back(start);
431  count++;
432  }
433  }
434  }
435  count = (count == nBlocks) ? count: 0;
436  return count;
437 }
438 
439 bool GetDDSetFromSeqAlign(const CSeq_align& align, const TDendiag*& dd) {
440  if (align.GetSegs().IsDendiag()) {
441  dd = &(align.GetSegs().GetDendiag());
442  return true;
443  }
444  return false;
445 }
446 
447 
449  if (align.SetSegs().IsDendiag()) {
450  dd = &(align.SetSegs().SetDendiag());
451  return true;
452  }
453  return false;
454 }
455 
456 
457 bool GetFirstOrLastDenDiag(const CRef< CSeq_align >& seqAlign, bool First, CRef< CDense_diag >& DenDiag) {
458 //-------------------------------------------------------------------------
459 // get either the first or last dense-diag of the seqAlign
460 //-------------------------------------------------------------------------
461  const TDendiag* pDenDiagSet; // (TDendiag = list<CRef<CDense_diag>>)
462  TDendiag_cit k;
463 
464  if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
465 
466  if (First) {
467  k = pDenDiagSet->begin();
468  }
469  else {
470  k = pDenDiagSet->end();
471  k--;
472  }
473  DenDiag = (*k);
474  return(true);
475  }
476  return(false);
477 }
478 
479 bool CheckSeqIdInDD(const CRef< CSeq_align >& seqAlign)
480 {
481  int iii;
482  const TDendiag* pDenDiagSet; // (TDendiag = list<CRef<CDense_diag>>)
483  TDendiag_cit k;
484  CDense_diag::TIds IdsSet;
485  CDense_diag::TIds::iterator i;
486  CRef< CSeq_id > master, slave, master2, slave2;
487  if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet))
488  {
489  iii=0;
490  k = pDenDiagSet->begin();
491  IdsSet = (*k)->GetIds();
492  i = IdsSet.begin();
493  master = *i;
494  i++;
495  slave = *i;
496  k++;iii++;
497  for (; k != pDenDiagSet->end(); k++, iii++)
498  {
499  IdsSet = (*k)->GetIds();
500  i = IdsSet.begin();
501  master2 = *i;
502  i++;
503  slave2 = *i;
504  if (!(SeqIdsMatch(master, master2)) || !SeqIdsMatch(slave, slave2))
505  return false;
506  }
507  }
508  return true;
509 }
510 
511 /*
512 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
513 _/
514 _/ DD<->SeqLoc transfer Functions
515 _/
516 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
517 */
518 
519 
520 void MakeDDFromSeqLoc(CSeq_loc * pAl,TDendiag * pDD ) {
521  int from, to;
522 
523  if (!pAl) return;
524  // make a DD from AlignAnnot
525  //if (pAl->GetLocation().IsInt()) {
526  if (pDD && pAl->IsInt()) {
527  //CSeq_interval& interval = pAl->SetLocation().SetInt();
528  CSeq_interval& interval = pAl->SetInt();
529  from=interval.SetFrom();
530  to=interval.SetTo();
531  CRef< CSeq_id > RefID(new CSeq_id);
532  RefID = &interval.SetId();
533  AddIntervalToDD(pDD,RefID,RefID,from,from,to-from+1);
534  //} else if( pAl->GetLocation().IsPacked_int() ) {
535  } else if(pDD && pAl->IsPacked_int() ) {
536  CPacked_seqint::Tdata::iterator s;
537  for (s=pAl->SetPacked_int().Set().begin(); s!=pAl->SetPacked_int().Set().end(); s++) {
538  //CSeq_interval& interval = (*s);
539  from=(*s)->GetFrom();
540  to=(*s)->GetTo();
541  CRef< CSeq_id > RefID(new CSeq_id);
542  RefID = &((*s)->SetId());
543  AddIntervalToDD(pDD,RefID,RefID ,from,from,to-from+1);
544  }
545  }
546 }
547 
548 
549 void MakeSeqLocFromDD(const TDendiag * pDD, CSeq_loc * pAl) {
550  TDendiag_cit pp;
551  int iDst;
552  CDense_diag::TStarts::const_iterator pos;
553  vector < CRef< CSeq_id > >::const_iterator pid;
554 
555  for (iDst=0,pp=pDD->begin(); pp!=pDD->end(); pp++,iDst++){
556  pos=(*pp)->GetStarts().begin();
557  TSeqPos len=((*pp)->GetLen());
558  TSeqPos posStart=*pos;
559  pid=(*pp)->GetIds().begin();
560  //CRef<CSeq_id> SeqID=*(++pid);
561  CRef<CSeq_id> SeqID=*(pid);
562 
563  if(pDD->size()==1){
564  pAl->SetInt().SetFrom(posStart);
565  pAl->SetInt().SetTo(posStart+len-1);
566  pAl->SetInt().SetId(*SeqID);
567  }else {
568  //CSeq_interval * intrvl = new CSeq_interval();
569  //CRef< CSeq_interval > intrvl = new CSeq_interval();
571  intrvl->SetFrom(posStart);
572  intrvl->SetTo(posStart+len-1);
573  intrvl->SetId(*SeqID);
574  pAl->SetPacked_int().Set().push_back(intrvl);
575  }
576  }
577 }
578 
580 // Fake it
581 {
582  CRef< CSeq_id > idMaster(new CSeq_id);
583  idMaster.Reset(seqID1);
584  CRef< CSeq_id > idSeq(new CSeq_id);
585  idSeq.Reset(seqID2);
586 
587  CRef<CDense_diag> newDD(new CDense_diag);
588  newDD->SetDim(2);
589  //newDD->SetIds().push_back(seqID1);
590  //newDD->SetIds().push_back(seqID2);
591  newDD->SetIds().push_back(idMaster);
592  newDD->SetIds().push_back(idSeq);
593  newDD->SetStarts().push_back(st1);
594  newDD->SetStarts().push_back(st2);
595  newDD->SetLen()=lll;
596  pDD->push_back(newDD); // apend to the DensDiag List
597 }
598 
599 
600 bool GetDenDiagSet(const CRef< CSeq_annot >& seqAnnot, int Row, const TDendiag*& pDenDiagSet) {
601 //-------------------------------------------------------------------------
602 // the same as SetDenDiagSet, but insure that the returned
603 // den-diag-set is const.
604 //-------------------------------------------------------------------------
605 // TDendiag* pTempDenDiagSet;
606 // bool RetVal;
607 // RetVal = SetDenDiagSet(seqAnnot, Row, pTempDenDiagSet);
608 // pDenDiagSet = pTempDenDiagSet;
609 // return(RetVal);
610  list< CRef< CSeq_align > >::const_iterator j;
611 
612  if (seqAnnot->GetData().IsAlign()) {
613  // figure out which dense-diag set to get (based on Row)
614  if (Row == 0) j = seqAnnot->GetData().GetAlign().begin();
615  else {
616  int Count = 0;
617  for (j= seqAnnot->GetData().GetAlign().begin();
618  j!= seqAnnot->GetData().GetAlign().end(); j++) {
619  if (++Count == Row) break;
620  }
621  }
622  if ((*j)->GetSegs().IsDendiag()) {
623  // get the dense-diag set
624  pDenDiagSet = &((*j)->GetSegs().GetDendiag());
625  return(true);
626  }
627  }
628  return(false);
629 }
630 
631 bool SetDenDiagSet(CRef< CSeq_annot >& seqAnnot, int Row, TDendiag*& pDenDiagSet) {
632 //-------------------------------------------------------------------------
633 // get a set of dense-diag's. this is dense-diag info for a row.
634 // for Row = 0, and Row = 1, return the same DenDiagSet.
635 //-------------------------------------------------------------------------
636  list< CRef< CSeq_align > >::iterator j;
637 
638  if (seqAnnot->GetData().IsAlign()) {
639  // figure out which dense-diag set to get (based on Row)
640  if (Row == 0) j = seqAnnot->SetData().SetAlign().begin();
641  else {
642  int Count = 0;
643  for (j= seqAnnot->SetData().SetAlign().begin();
644  j!= seqAnnot->SetData().SetAlign().end(); j++) {
645  if (++Count == Row) break;
646  }
647  }
648  if ((*j)->SetSegs().IsDendiag()) {
649  // get the dense-diag set
650  pDenDiagSet = &((*j)->SetSegs().SetDendiag());
651  return(true);
652  }
653  }
654  return(false);
655 }
656 
657 
658 bool EraseRow(CRef< CSeq_annot >& seqAnnot, int RowIndex) {
659 //-------------------------------------------------------------------------
660 // Erase the RowIndex-1 seq-align. don't erase RowIndex 0.
661 //-------------------------------------------------------------------------
662  list< CRef< CSeq_align > >::iterator j, jend;
663  int RowCount;
664 
665  if (RowIndex == 0) return(false);
666 
667  if (seqAnnot->GetData().IsAlign()) {
668  RowCount = 1;
669  jend = seqAnnot->SetData().SetAlign().end();
670  for (j= seqAnnot->SetData().SetAlign().begin(); j != jend; j++) {
671  if (RowCount == RowIndex) {
672  seqAnnot->SetData().SetAlign().erase(j);
673  return(true);
674  }
675  RowCount++;
676  if (RowCount > RowIndex) break;
677  }
678  }
679  return(false);
680 }
681 
682 //input seqAlign may actually contain CSeq_align_set
684 {
685  if (seqAlign.Empty())
686  return seqAlign;
687  if (!seqAlign->GetSegs().IsDisc())
688  return seqAlign;
689  if (seqAlign->GetSegs().GetDisc().CanGet())
690  {
691  const list< CRef< CSeq_align > >& saList = seqAlign->GetSegs().GetDisc().Get();
692  if (saList.begin() != saList.end())
693  return ExtractFirstSeqAlign(*saList.begin());
694  }
695  CRef< CSeq_align > nullRef;
696  return nullRef;
697 }
698 
699 //===========================================
700 // Functions to manipulate Dense_segs
701 //===========================================
702 
704 {
705  CRef<CSeq_align> newSa(new CSeq_align);
706  newSa->Assign(*denseSegSeqAlign);
707 
708  if (denseSegSeqAlign.NotEmpty() && denseSegSeqAlign->GetSegs().IsDenseg()) {
709  TDendiag ddList;
710  Denseg2DenseDiagList(denseSegSeqAlign->GetSegs().GetDenseg(), ddList);
711  newSa->SetSegs().SetDendiag() = ddList;
712  }
713 
714  return newSa;
715 }
716 
717 // Function written by: Kamen Todorov, NCBI
718 // Part of the objtools/alnmgr project; forked to here to avoid
719 // adding extra library dependencies.
720 
722 {
723  const CDense_seg::TIds& ids = ds.GetIds();
724  const CDense_seg::TStarts& starts = ds.GetStarts();
725  const CDense_seg::TStrands& strands = ds.GetStrands();
726  const CDense_seg::TLens& lens = ds.GetLens();
727  const CDense_seg::TScores& scores = ds.GetScores();
728  const CDense_seg::TNumseg& numsegs = ds.GetNumseg();
729  const CDense_seg::TDim& numrows = ds.GetDim();
730  int total = numrows * numsegs;
731  int pos = 0;
732 
733  int rows_per_seg;
734 
735  bool strands_exist = ((int) strands.size() == total);
736  bool scores_exist = ((int) scores.size() == total);
737 
738  for (CDense_seg::TNumseg seg = 0; seg < numsegs; seg++) {
739  rows_per_seg = 0;
741  dd->SetLen(lens[seg]);
742  for (CDense_seg::TDim row = 0; row < numrows; row++) {
743  const TSignedSeqPos& start = starts[pos];
744  if (start >=0) {
745  rows_per_seg++;
746  dd->SetIds().push_back(ids[row]);
747  dd->SetStarts().push_back(start);
748  if (strands_exist) {
749  dd->SetStrands().push_back(strands[pos]);
750  }
751  if (scores_exist) {
752  dd->SetScores().push_back(scores[pos]);
753  }
754  }
755  pos++;
756  }
757  if (rows_per_seg >= 2) {
758  dd->SetDim(rows_per_seg);
759  ddl.push_back(dd);
760  }
761  }
762 }
763 // simple and easy : added by Vahan to avoid usage of bunch of algAlignment... classes
764 bool GetPendingSeqId(CCdCore * pCD,int irow,CRef <CSeq_id> & seqID)
765 {
766  int i ;
767  list <CRef <CUpdate_align> > ::iterator pPen;
768  for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
769  if(i<irow)
770  continue;
771  CSeq_align * pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
772  CDense_diag * pDDPen=*(pAl->SetSegs().SetDendiag().begin());
773  vector < CRef< CSeq_id > >::const_iterator pid=pDDPen->GetIds().begin();
774  seqID=*(++pid);
775  return true;
776  }
777  return false;
778 }
779 
780 
781 // Assumes that the Seq_align passed is a pairwise (dim = 2) alignment of
782 // a sequence to a pssm, where the pssm is the second Id. Such alignments
783 // are obtained via RPSBlast and provided by the CDart API.
784 int GetPssmIdFromSeqAlign(const CRef<CSeq_align >& seqAlign, string& err) {
785 
786  int pssmId = 0;
787 
788  err.erase();
789  if (seqAlign.Empty()) {
790  err = "GetPssmIdFromSeqAlign: Empty Seq_align.\n";
791  } else if (seqAlign->IsSetDim() && seqAlign->GetDim() != 2) {
792  err = "GetPssmIdFromSeqAlign: Only Seq_aligns with dim = 2 supported.\n";
793  } else if (seqAlign->GetSegs().IsDenseg()) {
794  const CRef< CSeq_id >& pssmSeqId = seqAlign->GetSegs().GetDenseg().GetIds().back();
795  pssmId = GetCDDPssmIdFromSeqId(pssmSeqId);
796  } else if (seqAlign->GetSegs().IsDendiag()) {
797  err = "GetPssmIdFromSeqAlign: Dense_diags not currently supported.\n";
798  } else {
799  err.append("GetPssmIdFromSeqAlign: Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
800  }
801  return pssmId;
802 }
803 
804 // Return the GI of the master sequence of the Seq_align. If not a GI,
805 // or for other error, return 0.
806 TGi GetMasterGIFromSeqAlign(const CRef< CSeq_align >& seqAlign, string& err) {
807 
808  TGi gi = ZERO_GI;
809 
810  err.erase();
811  if (seqAlign.Empty()) {
812  err = "GetMasterGIFromSeqAlign: Empty Seq_align.\n";
813  } else if (seqAlign->GetSegs().IsDenseg()) {
814  const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDenseg().GetIds().front();
815  if (seqId.NotEmpty() && seqId->IsGi()) {
816  gi = seqId->GetGi();
817  } else {
818  err = "GetMasterGIFromSeqAlign: Dense_seg's master sequence is empty or not of type 'GI'.\n";
819  }
820  } else if (seqAlign->GetSegs().IsDendiag()) {
821  const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDendiag().front()->GetIds().front();
822  if (seqId.NotEmpty() && seqId->IsGi()) {
823  gi = seqId->GetGi();
824  } else {
825  err = "GetMasterGIFromSeqAlign: Dense_diag's master sequence is empty or not of type 'GI'.\n";
826  }
827  } else {
828  err.append("GetMasterGIFromSeqAlign: Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
829  }
830  return gi;
831 }
832 
833 
834 END_SCOPE(cd_utils) // namespace ncbi::objects::
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef –.
Definition: ncbiobj.hpp:618
#define Len
int MapPositionToMaster(int childPos, const CSeq_align &align)
Definition: cuAlign.cpp:116
void MakeDDFromSeqLoc(CSeq_loc *pAl, TDendiag *pDD)
Definition: cuAlign.cpp:520
void MakeSeqLocFromDD(const TDendiag *pDD, CSeq_loc *pAl)
Definition: cuAlign.cpp:549
TGi GetMasterGIFromSeqAlign(const CRef< CSeq_align > &seqAlign, string &err)
Definition: cuAlign.cpp:806
int GetLowerBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:258
CRef< CSeq_align > ExtractFirstSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuAlign.cpp:683
bool GetDenDiagSet(const CRef< CSeq_annot > &seqAnnot, int Row, const TDendiag *&pDenDiagSet)
Definition: cuAlign.cpp:600
bool CheckSeqIdInDD(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:479
bool ChangeSeqIdInSeqAlign(CRef< CSeq_align > &sa, const CRef< CSeq_id > &newSeqId, bool onMaster)
Definition: cuAlign.cpp:83
int GetAlignedPositions(const CRef< CSeq_align > &align1, const CRef< CSeq_align > &align2, vector< int > &alignedPositions, bool onMaster)
Definition: cuAlign.cpp:204
int GetBlockStartsForMaster(const CRef< CSeq_align > &seqAlign, vector< int > &starts)
Definition: cuAlign.cpp:412
CRef< CSeq_align > Denseg2DenseDiagList(const CRef< CSeq_align > &denseSegSeqAlign)
Definition: cuAlign.cpp:703
int GetBlockNumberForResidue(int residue, const CRef< CSeq_align > &seqAlign, bool onMaster, vector< int > *starts, vector< int > *lengths)
Definition: cuAlign.cpp:350
int GetBlockCount(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:378
bool GetSeqID(const CRef< CSeq_align > &seqAlign, CRef< CSeq_id > &SeqID, bool getSlave)
Definition: cuAlign.cpp:55
bool SetDenDiagSet(CRef< CSeq_annot > &seqAnnot, int Row, TDendiag *&pDenDiagSet)
Definition: cuAlign.cpp:631
int MapPosition(const CSeq_align &seqAlign, int Position, CoordMapDir mapDir)
Definition: cuAlign.cpp:129
void SetAlignedResiduesOnSequence(const CRef< CSeq_align > &align, const string &sequenceString, char *&pAlignedRes, bool isMaster)
Definition: cuAlign.cpp:290
bool IsPositionAligned(const CSeq_align &seqAlign, int Position, bool onMaster)
Definition: cuAlign.cpp:164
void AddIntervalToDD(TDendiag *pDD, CRef< CSeq_id > seqID1, CRef< CSeq_id > seqID2, TSeqPos st1, TSeqPos st2, TSeqPos lll)
Definition: cuAlign.cpp:579
int GetNumAlignedResidues(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:238
int GetBlockLengths(const CRef< CSeq_align > &seqAlign, vector< int > &lengths)
Definition: cuAlign.cpp:391
bool GetFirstOrLastDenDiag(const CRef< CSeq_align > &seqAlign, bool First, CRef< CDense_diag > &DenDiag)
Definition: cuAlign.cpp:457
bool GetPendingSeqId(CCdCore *pCD, int irow, CRef< CSeq_id > &seqID)
Definition: cuAlign.cpp:764
int MapPositionToChild(int masterPos, const CSeq_align &align)
Definition: cuAlign.cpp:122
int GetBlockStarts(const CRef< CSeq_align > &seqAlign, vector< int > &starts, bool onMaster)
Definition: cuAlign.cpp:418
int GetUpperBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:273
int GetPssmIdFromSeqAlign(const CRef< CSeq_align > &seqAlign, string &err)
Definition: cuAlign.cpp:784
bool GetDDSetFromSeqAlign(const CSeq_align &align, const TDendiag *&dd)
Definition: cuAlign.cpp:439
bool EraseRow(CRef< CSeq_annot > &seqAnnot, int RowIndex)
Definition: cuAlign.cpp:658
TDendiag::const_iterator TDendiag_cit
Definition: cuAlign.hpp:50
CSeq_align::C_Segs::TDendiag TDendiag
Definition: cuAlign.hpp:48
TDendiag::iterator TDendiag_it
Definition: cuAlign.hpp:49
const int INVALID_POSITION
CoordMapDir
@ MASTER_TO_CHILD
@ CHILD_TO_MASTER
bool SeqIdsMatch(const CRef< CSeq_id > &id1, const CRef< CSeq_id > &id2)
Definition: cuSequence.cpp:70
int GetCDDPssmIdFromSeqId(const CRef< CSeq_id > &id)
Definition: cuSequence.cpp:97
#define bool
Definition: bool.h:34
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
TPending & SetPending(void)
Assign a value to Pending data member.
Definition: Cdd_.hpp:1484
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool CanGet(void) const
Check if it is safe to call Get method.
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
vector< CRef< CSeq_id > > TIds
Definition: Dense_diag_.hpp:93
const TIds & GetIds(void) const
Get the Ids member data.
bool IsDendiag(void) const
Check if variant Dendiag is selected.
Definition: Seq_align_.hpp:720
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
const TDendiag & GetDendiag(void) const
Get the variant data.
Definition: Seq_align_.hpp:726
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
TType GetType(void) const
Get the Type member data.
Definition: Seq_align_.hpp:809
bool IsSetDim(void) const
dimensionality Check if a value has been assigned to Dim data member.
Definition: Seq_align_.hpp:837
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
vector< CRef< CScore > > TScores
Definition: Dense_seg_.hpp:110
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
const TScores & GetScores(void) const
Get the Scores member data.
Definition: Dense_seg_.hpp:605
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
bool IsAlign(void) const
Check if variant Align is selected.
Definition: Seq_annot_.hpp:635
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
#define row(bind, expected)
Definition: string_bind.c:73
else result
Definition: token2.c:20
#define const
Definition: zconf.h:232
Modified on Wed May 01 14:20:18 2024 by modify_doxy.py rev. 669887