NCBI C++ ToolKit
psiblast.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ===========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author: Christiam Camacho
26  *
27  */
28 
29 /** @file psiblast.cpp
30  * Implementation of CPsiBlast.
31  */
32 
33 #include <ncbi_pch.hpp>
35 #include "psiblast_impl.hpp"
36 #include "psiblast_aux_priv.hpp" // for PsiBlastAddAncillaryPssmData
37 
38 // PSSM Engine includes
42 #include "bioseq_extract_data_priv.hpp" // for CBlastQuerySourceBioseqSet
44 
47 
48 
49 /** @addtogroup AlgoBlast
50  *
51  * @{
52  */
53 
56 BEGIN_SCOPE(blast)
57 
59  CRef<CLocalDbAdapter> blastdb,
61 : m_Subject(blastdb), m_Impl(0)
62 {
63  m_Impl = new CPsiBlastImpl(query_factory, m_Subject,
64  CConstRef<CBlastProteinOptionsHandle>(options.GetPointer()));
65 }
66 
68  CRef<CLocalDbAdapter> blastdb,
70 : m_Subject(blastdb), m_Impl(0)
71 {
72  m_Impl = new CPsiBlastImpl(pssm, m_Subject, options);
73 }
74 
76 {
77  if (m_Impl) {
78  delete m_Impl;
79  }
80 }
81 
82 void
84 {
85  m_Impl->SetPssm(pssm);
86 }
87 
90 {
91  return m_Impl->GetPssm();
92 }
93 
96 {
98  return m_Impl->Run();
99 }
100 
102 PsiBlastComputePssmFromAlignment(const objects::CBioseq& query,
104  CRef<objects::CScope> database_scope,
105  const CPSIBlastOptionsHandle& opts_handle,
106  CConstRef<CBlastAncillaryData> ancillary_data,
107  PSIDiagnosticsRequest* diagnostics_request)
108 {
109  // Extract PSSM engine options from options handle
110  CPSIBlastOptions opts;
111  PSIBlastOptionsNew(&opts);
112  opts->pseudo_count = opts_handle.GetPseudoCount();
113  opts->inclusion_ethresh = opts_handle.GetInclusionThreshold();
114 
115  string query_descr = NcbiEmptyString;
116 
117  if (query.IsSetDescr()) {
118  const CBioseq::TDescr::Tdata& data = query.GetDescr().Get();
119  ITERATE(CBioseq::TDescr::Tdata, iter, data) {
120  if((*iter)->IsTitle()) {
121  query_descr += (*iter)->GetTitle();
122  }
123  }
124  }
125 
126  CBlastQuerySourceBioseqSet query_source(query, true);
127  string warnings;
128  const SBlastSequence query_seq =
129  query_source.GetBlastSequence(0, eBlastEncodingProtein,
131  eSentinels, &warnings);
132  _ASSERT(warnings.empty());
133 
134  CPsiBlastInputData input(query_seq.data.get()+1, // skip sentinel
135  query_seq.length-2, // don't count sentinels
136  alignment, database_scope,
137  *opts.Get(),
138  opts_handle.GetMatrixName(),
139  opts_handle.GetGapOpeningCost(),
140  opts_handle.GetGapExtensionCost(),
141  diagnostics_request,
142  query_descr);
143 
144  CPssmEngine engine(&input);
145  engine.SetUngappedStatisticalParams(ancillary_data);
146  CRef<CPssmWithParameters> retval(engine.Run());
147 
149  opts_handle.GetGapOpeningCost(),
150  opts_handle.GetGapExtensionCost());
151  return retval;
152 }
153 
154 END_SCOPE(blast)
156 
157 /* @} */
User-defined methods of the data storage class.
Internal auxiliary setup classes/functions for extracting sequence data from Bioseqs.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
Implements the IBlastQuerySource interface using a CBioseq_set as data source.
CConstRef –.
Definition: ncbiobj.hpp:1266
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
Handle to the protein-protein options to the BLAST algorithm.
Wrapper class for PSIBlastOptions .
Definition: blast_aux.hpp:330
Implementation class for PSI-BLAST (database search and 2 sequences).
This class is a concrete strategy for IPssmInputData, and it implements the traditional PSI-BLAST alg...
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Definition: psiblast.hpp:79
Computes a PSSM as specified in PSI-BLAST.
CRef –.
Definition: ncbiobj.hpp:618
Source of query sequence data for BLAST Provides an interface for search classes to retrieve sequence...
Definition: query_data.hpp:147
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
Definition: blast_setup.hpp:65
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
Definition: psiblast.cpp:95
class CPsiBlastImpl * m_Impl
Implementation class.
Definition: psiblast.hpp:130
CConstRef< objects::CPssmWithParameters > GetPssm() const
Accessor for the most recently used PSSM.
Definition: psiblast.cpp:89
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
double GetInclusionThreshold() const
Returns InclusionThreshold.
~CPsiBlast()
Destructor.
Definition: psiblast.cpp:75
PSIBlastOptions * Get() const
Definition: blast_aux.hpp:330
virtual SBlastSequence GetBlastSequence(int index, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, string *warnings=0) const
Return the sequence data for a sequence.
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
const char * GetMatrixName() const
Returns MatrixName.
int GetGapExtensionCost() const
Returns GapExtensionCost.
void SetPssm(CConstRef< objects::CPssmWithParameters > pssm)
This method allows the same object to be reused when performing multiple iterations.
Definition: psiblast.cpp:83
CConstRef< objects::CPssmWithParameters > GetPssm() const
Accessor for the most recently used PSSM.
CRef< objects::CPssmWithParameters > PsiBlastComputePssmFromAlignment(const objects::CBioseq &query, CConstRef< objects::CSeq_align_set > alignment, CRef< objects::CScope > database_scope, const CPSIBlastOptionsHandle &opts_handle, CConstRef< CBlastAncillaryData > ancillary_data, PSIDiagnosticsRequest *diagnostics_req=0)
Computes a PSSM from the result of a PSI-BLAST iteration.
Definition: psiblast.cpp:102
void PsiBlastAddAncillaryPssmData(objects::CPssmWithParameters &pssm, int gap_open, int gap_extend)
Even though the query sequence and the matrix gap costs are not a product of the PSSM engine,...
CPsiBlast(CRef< IQueryFactory > query_factory, CRef< CLocalDbAdapter > blastdb, CConstRef< CPSIBlastOptionsHandle > options)
Constructor to compare a single sequence against a database of protein sequences.
Definition: psiblast.cpp:58
void SetPssm(CConstRef< objects::CPssmWithParameters > pssm)
This method allows the same object to be reused when performing multiple iterations.
void SetUngappedStatisticalParams(CConstRef< CBlastAncillaryData > ancillary_data)
Sets the Karlin & Altschul parameters in the BlastScoreBlk to be used in PSSM generation.
int GetGapOpeningCost() const
Returns GapOpeningCost.
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
CRef< CLocalDbAdapter > m_Subject
Reference to a BLAST subject/database object.
Definition: psiblast.hpp:127
int GetPseudoCount() const
Returns PseudoCount.
@ eBlastEncodingProtein
NCBIstdaa.
@ eSentinels
Use sentinel bytes.
Definition: blast_setup.hpp:94
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NcbiEmptyString
Definition: ncbistr.hpp:122
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
static int input()
Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declarations of auxiliary functions/classes for PSI-BLAST.
Defines implementation class for PSI-BLAST and PSI-BLAST 2 Sequences.
Declares the CPSIBlastOptionsHandle class.
C++ API for the PSI-BLAST PSSM engine.
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Int4 pseudo_count
Pseudocount constant.
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Definition: blast_psi.h:181
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
static string query
#define _ASSERT
Modified on Thu Feb 29 12:17:46 2024 by modify_doxy.py rev. 669887