NCBI C++ ToolKit
cuAlign.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuAlign.hpp 87132 2019-07-29 19:14:23Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Adapted from CDTree-1 code by Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Utility routines for manipulating alignments.
31  *
32  * ===========================================================================
33  */
34 
35 #ifndef CU_ALGALIGN_HPP
36 #define CU_ALGALIGN_HPP
37 
38 
39 // include ncbistd.hpp, ncbiobj.hpp, ncbi_limits.h, various stl containers
40 #include <corelib/ncbiargs.hpp>
43 
46 BEGIN_SCOPE(cd_utils)
47 
48 typedef CSeq_align::C_Segs::TDendiag TDendiag;
49 typedef TDendiag::iterator TDendiag_it;
50 typedef TDendiag::const_iterator TDendiag_cit;
51 
52 // Assumes a typical CD-style seq_align with one master & one slave row.
54 bool GetSeqID(const CRef< CSeq_align >& seqAlign, CRef< CSeq_id >& SeqID, bool getSlave=true);
55 
56 // Replace the indicated seq-id in the CSeq_align with newSeqId.
58 bool ChangeSeqIdInSeqAlign(CRef< CSeq_align>& sa, const CRef< CSeq_id >& newSeqId, bool onMaster);
59 
60 // coordinate mapping functions; return INVALID_POSITION on failure
62 int MapPositionToMaster(int childPos, const CSeq_align& align);
64 int MapPositionToChild(int masterPos, const CSeq_align& align);
66 int MapPosition(const CSeq_align& seqAlign, int Position, CoordMapDir mapDir); // from CCd::GetSeqPosition
67 
69 bool IsPositionAligned(const CSeq_align& seqAlign, int Position, bool onMaster);
71 bool IsPositionAligned(const TDendiag*& dd, int Position, bool onMaster);
73 int GetAlignedPositions(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, vector<int>& alignedPositions, bool onMaster);
75 int GetNumAlignedResidues(const CRef< CSeq_align >& align); // see alse ddLen; main part of CCd::GetAlignmentLength
77 int GetLowerBound(const CRef< CSeq_align >& seqAlign, bool onMaster);
79 int GetUpperBound(const CRef< CSeq_align >& seqAlign, bool onMaster);
80 
81 // returns a null pointer on failure
83 void SetAlignedResiduesOnSequence(const CRef< CSeq_align >& align, const string& sequenceString, char*& pAlignedRes, bool isMaster = false);
84 
86 bool CheckSeqIdInDD(const CRef< CSeq_align >& seqAlign);
87 // Query alignment block structure (assumes dense_diags in the seq_align)
89 int GetBlockNumberForResidue (int residue, const CRef< CSeq_align >& seqAlign, bool onMaster,
90  vector<int>* starts = NULL, vector<int>* lengths = NULL); // -1 if not aligned
92 int GetBlockCount (const CRef< CSeq_align >& seqAlign);
94 int GetBlockLengths(const CRef< CSeq_align >& seqAlign, vector<int>& lengths);
96 int GetBlockStarts (const CRef< CSeq_align >& seqAlign, vector<int>& starts, bool onMaster);
98 int GetBlockStartsForMaster(const CRef< CSeq_align >& seqAlign, vector<int>& starts);
99 
100 // Get DD from Seq_align
104 bool GetDDSetFromSeqAlign(CSeq_align& align, TDendiag*& dd);
105 
106 // Convert between DD and a SeqLoc (which will be contain a SeqInterval); from cdt_manipcd
108 void MakeDDFromSeqLoc(CSeq_loc * pAl,TDendiag * pDD );
110 void MakeSeqLocFromDD(const TDendiag * pDD, CSeq_loc * pAl);
112 void AddIntervalToDD(TDendiag * pDD,CRef<CSeq_id> seqID1, CRef<CSeq_id> seqID2,TSeqPos st1,TSeqPos st2, TSeqPos lll);
113 
114 // GetFirstOrLastDenDiag was formerly CCd::GetDenDiag(int Row, bool First, CRef< CDense_diag >& DenDiag)
115 // firstOrLast==true --> first den diag, otherwise last den diag
117 bool GetFirstOrLastDenDiag(const CRef< CSeq_align >& seqAlign, bool firstOrLast, CRef<CDense_diag>& dd);
119 bool GetDenDiagSet(const CRef< CSeq_annot >& seqAnnot, int row, const TDendiag*& pDenDiagSet); // get dense-diag info for one row
121 bool SetDenDiagSet(CRef< CSeq_annot >& seqAnnot, int row, TDendiag*& pddSet);
122 
124 bool EraseRow(CRef< CSeq_annot >& seqAnnot, int row);
125 
126 // Returns 'seqAlign' unless the input is wrapping a CSeq_align_set
127 // (has segs of type 'disc'), in which case the first seq-align found
128 // will be returned. Returns an empty CRef on failure.
129 // Note: this is a recursive function.
130 // Was 'extractOneSeqAlign' from cuBlast2Seq and cuSimpleB2SWrapper.
133 
134 // Functions that manipulate or assume Dense_segs
135 
136 // Given a CSeq_align with a denseg alignment, return a new CSeq_align with an
137 // equivalent dense-diag list. If the input doesn't have a denseg, or if the
138 // conversion fails, the returned CRef will be a copy of the input CRef.
141 
142 // Get DD list from a Dense_seg.
143 // Function written by: Kamen Todorov, NCBI
144 // Part of the objtools/alnmgr project forked to avoid
145 // adding extra library dependencies.
148 
149 // Assumes that the Seq_align passed is a pairwise (dim = 2) Dense_seg alignment
150 // of a sequence to a pssm, where the pssm is the second Id. Such alignments
151 // are obtained via RPSBlast and provided by the CDart API, e.g.
152 // Return 0 on failure.
154 int GetPssmIdFromSeqAlign(const CRef<CSeq_align >& seqAlign, string& err);
155 
156 // Return the GI of the master (i.e. first) sequence of the Seq_align. If not a GI,
157 // or for other error, return 0.
159 TGi GetMasterGIFromSeqAlign(const CRef< CSeq_align >& seqAlign, string& err);
160 
161 //class CCd;
163 bool GetPendingSeqId(CCdCore * pCD,int irow,CRef <CSeq_id> & seqID);
164 
165 END_SCOPE(cd_utils)
167 
168 
169 #endif // ALGALIGN_HPP
CRef –.
Definition: ncbiobj.hpp:618
int MapPositionToMaster(int childPos, const CSeq_align &align)
Definition: cuAlign.cpp:116
void MakeDDFromSeqLoc(CSeq_loc *pAl, TDendiag *pDD)
Definition: cuAlign.cpp:520
USING_SCOPE(objects)
void MakeSeqLocFromDD(const TDendiag *pDD, CSeq_loc *pAl)
Definition: cuAlign.cpp:549
TDendiag::const_iterator TDendiag_cit
Definition: cuAlign.hpp:50
TGi GetMasterGIFromSeqAlign(const CRef< CSeq_align > &seqAlign, string &err)
Definition: cuAlign.cpp:806
int GetLowerBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:258
CRef< CSeq_align > ExtractFirstSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuAlign.cpp:683
CSeq_align::C_Segs::TDendiag TDendiag
Definition: cuAlign.hpp:48
bool CheckSeqIdInDD(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:479
bool ChangeSeqIdInSeqAlign(CRef< CSeq_align > &sa, const CRef< CSeq_id > &newSeqId, bool onMaster)
Definition: cuAlign.cpp:83
int GetAlignedPositions(const CRef< CSeq_align > &align1, const CRef< CSeq_align > &align2, vector< int > &alignedPositions, bool onMaster)
Definition: cuAlign.cpp:204
int GetBlockStartsForMaster(const CRef< CSeq_align > &seqAlign, vector< int > &starts)
Definition: cuAlign.cpp:412
CRef< CSeq_align > Denseg2DenseDiagList(const CRef< CSeq_align > &denseSegSeqAlign)
Definition: cuAlign.cpp:703
int GetBlockCount(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:378
int MapPosition(const CSeq_align &seqAlign, int Position, CoordMapDir mapDir)
Definition: cuAlign.cpp:129
bool IsPositionAligned(const CSeq_align &seqAlign, int Position, bool onMaster)
Definition: cuAlign.cpp:164
bool GetDenDiagSet(const CRef< CSeq_annot > &seqAnnot, int row, const TDendiag *&pDenDiagSet)
Definition: cuAlign.cpp:600
void SetAlignedResiduesOnSequence(const CRef< CSeq_align > &align, const string &sequenceString, char *&pAlignedRes, bool isMaster=false)
Definition: cuAlign.cpp:290
TDendiag::iterator TDendiag_it
Definition: cuAlign.hpp:49
int GetBlockNumberForResidue(int residue, const CRef< CSeq_align > &seqAlign, bool onMaster, vector< int > *starts=NULL, vector< int > *lengths=NULL)
Definition: cuAlign.cpp:350
void AddIntervalToDD(TDendiag *pDD, CRef< CSeq_id > seqID1, CRef< CSeq_id > seqID2, TSeqPos st1, TSeqPos st2, TSeqPos lll)
Definition: cuAlign.cpp:579
bool GetFirstOrLastDenDiag(const CRef< CSeq_align > &seqAlign, bool firstOrLast, CRef< CDense_diag > &dd)
Definition: cuAlign.cpp:457
bool EraseRow(CRef< CSeq_annot > &seqAnnot, int row)
Definition: cuAlign.cpp:658
int GetBlockLengths(const CRef< CSeq_align > &seqAlign, vector< int > &lengths)
Definition: cuAlign.cpp:391
bool GetSeqID(const CRef< CSeq_align > &seqAlign, CRef< CSeq_id > &SeqID, bool getSlave=true)
Definition: cuAlign.cpp:55
bool GetPendingSeqId(CCdCore *pCD, int irow, CRef< CSeq_id > &seqID)
Definition: cuAlign.cpp:764
int MapPositionToChild(int masterPos, const CSeq_align &align)
Definition: cuAlign.cpp:122
int GetBlockStarts(const CRef< CSeq_align > &seqAlign, vector< int > &starts, bool onMaster)
Definition: cuAlign.cpp:418
int GetUpperBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:273
bool SetDenDiagSet(CRef< CSeq_annot > &seqAnnot, int row, TDendiag *&pddSet)
Definition: cuAlign.cpp:631
int GetPssmIdFromSeqAlign(const CRef< CSeq_align > &seqAlign, string &err)
Definition: cuAlign.cpp:784
bool GetDDSetFromSeqAlign(const CSeq_align &align, const TDendiag *&dd)
Definition: cuAlign.cpp:439
int GetNumAlignedResidues(const CRef< CSeq_align > &align)
Definition: cuAlign.cpp:238
CoordMapDir
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
Defines command line argument related classes.
#define row(bind, expected)
Definition: string_bind.c:73
#define const
Definition: zconf.h:232
Modified on Mon Apr 22 04:01:05 2024 by modify_doxy.py rev. 669887