NCBI C++ ToolKit
su_block_multiple_alignment.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: su_block_multiple_alignment.hpp 44452 2010-01-14 15:26:51Z satskyse $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * Classes to hold alignment data
30 *
31 * ===========================================================================
32 */
33 
34 #ifndef SU_BLOCK_MULTIPLE_ALIGNMENT__HPP
35 #define SU_BLOCK_MULTIPLE_ALIGNMENT__HPP
36 
37 #include <corelib/ncbistl.hpp>
38 #include <corelib/ncbiobj.hpp>
39 
41 
42 #include <list>
43 #include <vector>
44 #include <map>
45 
46 
47 BEGIN_SCOPE(struct_util)
48 
49 class Sequence;
50 class Block;
52 class UnalignedBlock;
53 class BLAST_Matrix;
54 
55 class NCBI_STRUCTUTIL_EXPORT BlockMultipleAlignment : public ncbi::CObject
56 {
57 public:
58  enum {
60  };
61 
62  typedef std::vector < const Sequence * > SequenceList;
63  BlockMultipleAlignment(const SequenceList& sequenceList); // first sequence is master
65 
66  // add a new aligned block - will be "owned" and deallocated by BlockMultipleAlignment
67  bool AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock);
68 
69  // these two should be called after all aligned blocks have been added; fills out
70  // unaligned blocks inbetween aligned blocks (and at ends). Also sets length.
71  bool AddUnalignedBlocks(void);
72 
73  // Fills out the BlockMap for mapping alignment column -> block+column, special colors,
74  // and sets up conservation colors (although they're not calculated until needed).
75  bool UpdateBlockMap(bool clearRowInfo = true);
76 
77  // find out if a residue is aligned, by row
78  bool IsAligned(unsigned int row, unsigned int seqIndex) const;
79 
80  // stuff regarding master sequence
81  const Sequence * GetMaster(void) const { return m_sequences[0]; }
82  bool IsMaster(const Sequence *sequence) const { return (sequence == m_sequences[0]); }
83 
84  // return sequence for given row
85  const Sequence * GetSequenceOfRow(unsigned int row) const
86  {
87  if (row < m_sequences.size())
88  return m_sequences[row];
89  else
90  return NULL;
91  }
92 
93  // will be used to control padding of unaligned blocks
98  eSplit
99  };
100 
101  // return alignment position of left side first aligned block (eUndefined if no aligned blocks)
102  unsigned int GetFirstAlignedBlockPosition(void) const;
103 
104  // makes a new copy of itself
105  BlockMultipleAlignment * Clone(void) const;
106 
107  // character query interface - "column" must be in alignment range [0 .. AlignmentWidth()-1]
108  bool GetCharacterAt(unsigned int alignmentColumn, unsigned int row,
109  eUnalignedJustification justification, char *character) const;
110 
111  // get sequence and index (if any) at given position, and whether that residue is aligned
112  bool GetSequenceAndIndexAt(unsigned int alignmentColumn, unsigned int row,
113  eUnalignedJustification justification,
114  const Sequence **sequence, unsigned int *index, bool *isAligned) const;
115 
116  // given row and sequence index, return alignment index; not the most efficient function - use sparingly
117  unsigned int GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification);
118 
119  // get a vector of all Blocks
120  typedef std::vector < ncbi::CConstRef < Block > > ConstBlockList;
121  void GetBlockList(ConstBlockList& cbl) const;
122 
123  // fill in a vector of UngappedAlignedBlocks
124  typedef std::vector < const UngappedAlignedBlock * > UngappedAlignedBlockList;
125  void GetUngappedAlignedBlocks(UngappedAlignedBlockList *blocks) const;
126  typedef std::vector < UngappedAlignedBlock * > ModifiableUngappedAlignedBlockList;
127  void GetModifiableUngappedAlignedBlocks(ModifiableUngappedAlignedBlockList *blocks);
128 
129  // PSSM for this alignment (cached)
130  const BLAST_Matrix * GetPSSM(void) const;
131  void RemovePSSM(void) const;
132 
133  // NULL if block before is aligned; if NULL passed, retrieves last block (if unaligned; else NULL)
134  const UnalignedBlock * GetUnalignedBlockBefore(const UngappedAlignedBlock *aBlock) const;
135 
136  unsigned int NBlocks(void) const { return m_blocks.size(); }
137  unsigned int NAlignedBlocks(void) const;
138  unsigned int NRows(void) const { return m_sequences.size(); }
139  unsigned int AlignmentWidth(void) const { return m_totalWidth; }
140 
141  // return a number from 1..n for aligned blocks, eUndefined for unaligned
142  unsigned int GetAlignedBlockNumber(unsigned int alignmentIndex) const
143  { return m_blockMap[alignmentIndex].alignedBlockNum; }
144 
145  // for storing/querying info
146  double GetRowDouble(unsigned int row) const { return m_rowDoubles[row]; }
147  void SetRowDouble(unsigned int row, double value) const { m_rowDoubles[row] = value; }
148  const std::string& GetRowStatusLine(unsigned int row) const { return m_rowStrings[row]; }
149  void SetRowStatusLine(unsigned int row, const std::string& value) const { m_rowStrings[row] = value; }
150 
151  // empties all rows' infos
152  void ClearRowInfo(void) const
153  {
154  for (unsigned int r=0; r<NRows(); ++r) {
155  m_rowDoubles[r] = 0.0;
156  m_rowStrings[r].erase();
157  }
158  }
159 
160 
161  ///// editing functions /////
162 
163  // if in an aligned block, give block column and width of that position; otherwise eUndefined
164  void GetAlignedBlockPosition(unsigned int alignmentIndex,
165  unsigned int *blockColumn, unsigned int *blockWidth) const;
166 
167  // get seqIndex of slave aligned to the given master seqIndex; eUndefined if master residue unaligned
168  unsigned int GetAlignedSlaveIndex(unsigned int masterSeqIndex, unsigned int slaveRow) const;
169 
170  // returns true if any boundary shift actually occurred
171  bool MoveBlockBoundary(unsigned int columnFrom, unsigned int columnTo);
172 
173  // splits a block such that alignmentIndex is the first column of the new block;
174  // returns false if no split occurred (e.g. if index is not inside aligned block)
175  bool SplitBlock(unsigned int alignmentIndex);
176 
177  // merges all blocks that overlap specified range - assuming no unaligned blocks
178  // in that range. Returns true if any merge(s) occurred, false otherwise.
179  bool MergeBlocks(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex);
180 
181  // creates a block, if given region of an unaligned block in which no gaps
182  // are present. Returns true if a block is created.
183  bool CreateBlock(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
184  eUnalignedJustification justification);
185 
186  // deletes the block containing this index; returns true if deletion occurred.
187  bool DeleteBlock(unsigned int alignmentIndex);
188 
189  // deletes all blocks; returns true if there were any blocks to delete
190  bool DeleteAllBlocks(void);
191 
192  // shifts (horizontally) the residues in and immediately surrounding an
193  // aligned block; returns true if any shift occurs.
194  bool ShiftRow(unsigned int row, unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
195  eUnalignedJustification justification);
196 
197  // delete a row; returns true if successful
198  bool DeleteRow(unsigned int row);
199 
200  // this function does two things: it extracts from a multiple alignment all slave rows listed for
201  // removal; and if pairwiseAlignments!=NULL, for each slave removed creates a new BlockMultipleAlignment
202  // that contains the alignment of just that slave with the master, as it was in the original multiple
203  // (i.e., not according to the corresponding pre-IBM MasterSlaveAlignment)
204  typedef std::list < BlockMultipleAlignment * > AlignmentList;
205  bool ExtractRows(const std::vector < unsigned int >& slavesToRemove, AlignmentList *pairwiseAlignments);
206 
207  // merge in the contents of the given alignment (assuming same master, compatible block structure),
208  // addings its rows to the end of this alignment; returns true if merge successful. Does not change
209  // block structure - just adds the part of new alignment's aligned blocks that intersect with this
210  // object's aligned blocks
211  bool MergeAlignment(const BlockMultipleAlignment *newAlignment);
212 
213  // reorder rows according to newOrder; returns true on success
214  bool ReorderRows(const std::vector < unsigned int >& newOrder);
215 
216 private:
218 
219  typedef std::list < ncbi::CRef < Block > > BlockList;
221 
222  struct BlockInfo {
224  int blockColumn, alignedBlockNum;
225  };
226  typedef std::vector < BlockInfo > BlockMap;
228 
229  unsigned int m_totalWidth;
230 
231  bool CheckAlignedBlock(const Block *newBlock) const;
232  UnalignedBlock * CreateNewUnalignedBlockBetween(const Block *left, const Block *right);
233  Block * GetBlockBefore(const Block *block);
234  Block * GetBlockAfter(const Block *block);
235  const Block * GetBlockBefore(const Block *block) const;
236  const Block * GetBlockAfter(const Block *block) const;
237  void InsertBlockBefore(Block *newBlock, const Block *insertAt);
238  void InsertBlockAfter(const Block *insertAt, Block *newBlock);
239  void RemoveBlock(Block *block);
240 
241  // for cacheing of residue->block lookups
242  mutable unsigned int m_cachePrevRow;
243  mutable const Block *m_cachePrevBlock;
244  mutable BlockList::const_iterator m_cacheBlockIterator;
245  void InitCache(void);
246 
247  // given a row and seqIndex, find block that contains that residue
248  const Block * GetBlock(unsigned int row, unsigned int seqIndex) const;
249 
250  // intended for volatile storage of row-associated info (e.g. for alignment scores, etc.)
251  mutable std::vector < double > m_rowDoubles;
252  mutable std::vector < std::string > m_rowStrings;
253 
254  // associated PSSM
256 };
257 
258 
259 // static function to create Seq-aligns out of multiple
261 ncbi::objects::CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple,
262  const BlockMultipleAlignment::UngappedAlignedBlockList& blocks, unsigned int slaveRow);
263 
264 
265 // base class for Block - BlockMultipleAlignment is made up of a list of these
266 class NCBI_STRUCTUTIL_EXPORT Block : public ncbi::CObject
267 {
268 public:
269  virtual ~Block(void) { } // virtual destructor for base class
270 
271  // makes a new copy of itself
272  virtual Block * Clone(const BlockMultipleAlignment *newMultiple) const = 0;
273 
274  unsigned int m_width;
275 
276  virtual bool IsAligned(void) const = 0;
277 
278  // get sequence index for a column, which must be in block range (0 ... width-1)
279  virtual unsigned int GetIndexAt(unsigned int blockColumn, unsigned int row,
280  BlockMultipleAlignment::eUnalignedJustification justification) const = 0;
281 
282  // delete a row
283  virtual void DeleteRow(unsigned int row) = 0;
284  virtual void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove) = 0;
285 
286  bool IsFrom(const BlockMultipleAlignment *alignment) const { return (alignment == m_parentAlignment); }
287 
288  // given a row number (from 0 ... nSequences-1), give the sequence range covered by this block
289  struct Range {
290  int from, to;
291  };
292 
293  const Range* GetRangeOfRow(int row) const { return &(m_ranges[row]); }
294  void SetRangeOfRow(unsigned int row, int from, int to)
295  {
296  m_ranges[row].from = from;
297  m_ranges[row].to = to;
298  }
299 
300  // resize - add new (empty) rows at end
301  void AddRows(unsigned int nNewRows) { m_ranges.resize(m_ranges.size() + nNewRows); }
302 
303  unsigned int NSequences(void) const { return m_ranges.size(); }
304 
305  // reorder rows according to newOrder; returns true on success
306  bool ReorderRows(const std::vector < unsigned int >& newOrder);
307 
308 protected:
309  Block(const BlockMultipleAlignment *multiple) :
310  m_parentAlignment(multiple), m_ranges(multiple->NRows()) { }
311 
313 
314  typedef std::vector < Range > RangeList;
316 };
317 
318 
319 // a gapless aligned block; width must be >= 1
321 {
322 public:
323  UngappedAlignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { }
324 
325  bool IsAligned(void) const { return true; }
326 
327  unsigned int GetIndexAt(unsigned int blockColumn, unsigned int row,
329  BlockMultipleAlignment::eCenter) const // justification ignored for aligned block
330  { return (GetRangeOfRow(row)->from + blockColumn); }
331 
332  char GetCharacterAt(unsigned int blockColumn, unsigned int row) const;
333 
334  void DeleteRow(unsigned int row);
335  void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove);
336 
337  Block * Clone(const BlockMultipleAlignment *newMultiple) const;
338 };
339 
340 
341 // an unaligned block; max width of block must be >=1. But range over any given
342 // sequence can be length 0 (but not <0); if length 0, "to" is the residue before
343 // the block, and "from" (= to+1) is the residue after.
345 {
346 public:
347  UnalignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { }
348 
349  void Resize(void); // recompute block width from current ranges
350 
351  bool IsAligned(void) const { return false; }
352 
353  unsigned int GetIndexAt(unsigned int blockColumn, unsigned int row,
355 
356  void DeleteRow(unsigned int row);
357  void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove);
358 
359  Block * Clone(const BlockMultipleAlignment *newMultiple) const;
360 };
361 
362 END_SCOPE(struct_util)
363 
364 #endif // SU_BLOCK_MULTIPLE_ALIGNMENT__HPP
bool IsAligned(char ch)
std::list< BlockMultipleAlignment * > AlignmentList
std::vector< const Sequence * > SequenceList
bool IsMaster(const Sequence *sequence) const
unsigned int AlignmentWidth(void) const
BlockList::const_iterator m_cacheBlockIterator
void SetRowDouble(unsigned int row, double value) const
void SetRowStatusLine(unsigned int row, const std::string &value) const
const Sequence * GetMaster(void) const
std::vector< const UngappedAlignedBlock * > UngappedAlignedBlockList
const Sequence * GetSequenceOfRow(unsigned int row) const
unsigned int GetAlignedBlockNumber(unsigned int alignmentIndex) const
std::list< ncbi::CRef< Block > > BlockList
unsigned int NBlocks(void) const
const std::string & GetRowStatusLine(unsigned int row) const
double GetRowDouble(unsigned int row) const
std::vector< BlockInfo > BlockMap
std::vector< std::string > m_rowStrings
std::vector< UngappedAlignedBlock * > ModifiableUngappedAlignedBlockList
std::vector< ncbi::CConstRef< Block > > ConstBlockList
bool IsFrom(const BlockMultipleAlignment *alignment) const
virtual unsigned int GetIndexAt(unsigned int blockColumn, unsigned int row, BlockMultipleAlignment::eUnalignedJustification justification) const =0
const BlockMultipleAlignment * m_parentAlignment
std::vector< Range > RangeList
void SetRangeOfRow(unsigned int row, int from, int to)
const Range * GetRangeOfRow(int row) const
unsigned int NSequences(void) const
virtual void DeleteRow(unsigned int row)=0
void AddRows(unsigned int nNewRows)
virtual void DeleteRows(std::vector< bool > &removeRows, unsigned int nToRemove)=0
Block(const BlockMultipleAlignment *multiple)
virtual bool IsAligned(void) const =0
virtual ~Block(void)
virtual Block * Clone(const BlockMultipleAlignment *newMultiple) const =0
UnalignedBlock(const BlockMultipleAlignment *multiple)
UngappedAlignedBlock(const BlockMultipleAlignment *multiple)
unsigned int GetIndexAt(unsigned int blockColumn, unsigned int row, BlockMultipleAlignment::eUnalignedJustification justification=BlockMultipleAlignment::eCenter) const
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
#define kMax_UInt
Definition: ncbi_limits.h:185
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_STRUCTUTIL_EXPORT
Definition: ncbi_export.h:841
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
The NCBI C++/STL use hints.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define row(bind, expected)
Definition: string_bind.c:73
static DP_BlockInfo * blocks
ncbi::objects::CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple, const BlockMultipleAlignment::UngappedAlignedBlockList &blocks, unsigned int slaveRow)
Modified on Fri Sep 20 14:58:05 2024 by modify_doxy.py rev. 669887