NCBI C++ ToolKit
cuBlast2Seq.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuBlast2Seq.hpp 51593 2011-10-17 14:21:18Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * Functions to call C++ Blast2Seq API
31  *
32  * ===========================================================================
33  */
34 
35 #ifndef CU_BLAST2SEQ_HPP
36 #define CU_BLAST2SEQ_HPP
37 
38 #include <vector>
44 
47 BEGIN_SCOPE(cd_utils)
48 
49 typedef void (* NotifierFunction) (int Num, int Total);
50 
52 {
53 public:
54  static const int CDD_DATABASE_SIZE ;
55  static const double BLAST_SCALING_FACTOR_DEFAULT ;
56  static const double E_VAL_WHEN_NO_SEQ_ALIGN ; // eval when Blast doesn't return a seq-align
57  static const double SCORE_WHEN_NO_SEQ_ALIGN ;
58  static const string SCORING_MATRIX_DEFAULT;
59  static const long DEFAULT_NR_SIZE;
60  static const int DEFAULT_NR_SEQNUM;
61 
62  //the source of the query sequences
63  CdBlaster(AlignmentCollection& source, string matrixName = SCORING_MATRIX_DEFAULT);
64  CdBlaster(vector< CRef<CBioseq> >& seqs, string matrixName = SCORING_MATRIX_DEFAULT);
65 
66  void setQueryRows(const vector<int>* rows) {m_queryRows = rows;};
67  void setSubjectRows(const vector<int>* rows){m_subjectRows = rows;};
68  void setScoreType(CSeq_align::EScoreType scoreType){m_scoreType = scoreType;};
69 
70  void useWholeSequence(bool whole);
71  void setFootprintExtension(int nExt, int CExt);
72 
73  // Returns false if there was a problem setting up the Blast query factory object
74  // for any of the sequences; results for other sequences are available even with
75  // a false return value.
76  bool blast(NotifierFunction notifier=0);
77 
78  //to do psi-blast
79  void setPsiBlastTarget(CRef<CPssmWithParameters> pssm);
80  CRef<CPssmWithParameters> setPsiBlastTarget(CCdCore* targetCD);
81 
82  // returns number of psi-blast done
83  int psiBlast();
84 
85  double getPairwiseScore(int row1, int row2);
86  double getPairwiseEValue(int row1, int row2);
87 
88  double getPsiBlastScore(int row);
89  double getPsiBlastEValue(int row);
90 
91 
92  /* we don't need the actual alignments for now
93  so these two methods only returns the origial seq-align with denseg */
94  CRef<CSeq_align> getPsiBlastAlignement(int row);
95  CRef<CSeq_align> getPairwiseBlastAlignement(int row1, int row2);
96 
97 private:
98  long m_dbSize;
100 
101  AlignmentCollection* m_ac; //normally, the source of alignments and seuqneces
102  const vector<int>* m_queryRows;
103  const vector<int>* m_subjectRows;
104  vector< CRef<CBioseq> >* m_seqs; // the source of sequences for the coreage test.
106  bool m_useWhole; // false: use aligned footprint
107  int m_nExt;
108  int m_cExt;
109  vector< CRef<CSeq_align> > m_alignments;
110 
111  // for blast tree, instead of keeping alignments, just keep scores and e-vals
113  vector< double > m_scores;
114  vector< double > m_evals;
115 
116  vector<int> m_offsets;
117  vector<int> m_batchSizes;
120  vector< CRef< CBioseq > > m_truncatedBioseqs;
121 
122  CRef< CBioseq > truncateBioseq(int row);
123  void processBlastHits(int queryRow, CSearchResultSet& hits);
124  //void processBlastHits(TSeqAlignVector& hits);
125  //void processBlastHits(BlastHSPResults* hits, int numSubjects);
126  bool IsFootprintValid(int from, int to, int len);
127  void ApplyEndShiftToRange(int& from, int nTermShift, int& to, int cTermShift, int len);
128  int getCompositeIndex(int query, int subject);
129  //not needed for now
130  //CRef< CSeq_align > remapSeqAlign(int query, int subject, CRef< CSeq_align > seqAlign);
131 
132  void RemoveAllDataLoaders();
133  bool FillOutSeqLoc(const CRef< CBioseq >& bs, CSeq_loc& seqLoc);
134 };
135 
136 END_SCOPE(cd_utils)
138 
139 #endif //
Search Results for All Queries.
EScoreType
enum controlling known named scores
Definition: Seq_align.hpp:128
void setQueryRows(const vector< int > *rows)
Definition: cuBlast2Seq.hpp:66
CRef< CPssmWithParameters > m_psiTargetPssm
const vector< int > * m_subjectRows
void setScoreType(CSeq_align::EScoreType scoreType)
Definition: cuBlast2Seq.hpp:68
vector< double > m_scores
vector< CRef< CSeq_align > > m_alignments
static const double E_VAL_WHEN_NO_SEQ_ALIGN
Definition: cuBlast2Seq.hpp:56
static const int DEFAULT_NR_SEQNUM
Definition: cuBlast2Seq.hpp:60
vector< CRef< CBioseq > > * m_seqs
static const double BLAST_SCALING_FACTOR_DEFAULT
Definition: cuBlast2Seq.hpp:55
long m_dbSize
Definition: cuBlast2Seq.hpp:98
bool m_useWhole
static const string SCORING_MATRIX_DEFAULT
Definition: cuBlast2Seq.hpp:58
vector< double > m_evals
CCdCore * m_psiTargetCd
const vector< int > * m_queryRows
vector< int > m_batchSizes
static const long DEFAULT_NR_SIZE
Definition: cuBlast2Seq.hpp:59
vector< CRef< CBioseq > > m_truncatedBioseqs
static const double SCORE_WHEN_NO_SEQ_ALIGN
Definition: cuBlast2Seq.hpp:57
string m_scoringMatrix
vector< int > m_offsets
void setSubjectRows(const vector< int > *rows)
Definition: cuBlast2Seq.hpp:67
static const int CDD_DATABASE_SIZE
Definition: cuBlast2Seq.hpp:54
int m_dbSeqNum
Definition: cuBlast2Seq.hpp:99
CSeq_align::EScoreType m_scoreType
AlignmentCollection * m_ac
static void RemoveAllDataLoaders()
Definition: cn3d_blast.cpp:236
USING_SCOPE(objects)
void(* NotifierFunction)(int Num, int Total)
Definition: cuBlast2Seq.hpp:49
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
int len
const CharType(& source)[N]
Definition: pointer.h:1149
Declares CPsiBl2Seq, the C++ API for the PSI-BLAST 2 Sequences engine.
#define row(bind, expected)
Definition: string_bind.c:73
static string subject
static string query
Modified on Sat Jun 15 11:49:03 2024 by modify_doxy.py rev. 669887