77 seqloc->
SetInt().SetFrom(from);
78 seqloc->
SetInt().SetTo(to);
81 loc_list.push_back(seqloc);
83 seg_list.push_back(
SSegmentLoc(query_index, from, to));
97 vector<SSegmentLoc>& filler_segs)
100 vector<CRangeCollection<TOffset> > sorted_segs(num_queries);
109 CHit *subhit = *subitr;
120 ITERATE(vector<int>, it, blastp_indices) {
132 itr->GetFrom() - 1, filler_segs,
i);
134 seg_start = itr->GetToOpen();
144 seq_length - 1, filler_segs,
i);
149 printf(
"Filler Segments:\n");
150 for (
int i = 0;
i < (
int)filler_segs.size();
i++) {
151 printf(
"query %d %4d - %4d\n",
152 filler_segs[
i].seq_index,
153 filler_segs[
i].GetFrom(),
154 filler_segs[
i].GetTo());
170 const vector<int>& indices,
172 vector<SSegmentLoc>& filler_segs)
174 const int kBlastBatchSize = 10000;
175 size_t num_full_queries = indices.size();
177 if (filler_locs.empty())
185 blastp_opts->SetEvalueThreshold(
max(blastp_evalue, 10.0));
187 blastp_opts->SetSegFiltering(
false);
192 while (batch_start < (
int)filler_locs.size()) {
197 for (
int i = batch_start;
i < (
int)filler_locs.size();
i++) {
198 const CSeq_loc& curr_loc = *filler_locs[
i];
201 if (batch_size + fragment_size >= kBlastBatchSize && batch_size > 0)
205 batch_size += fragment_size;
208 CBl2Seq blaster(curr_batch, queries, *blastp_opts);
214 "Alignment interrupted");
221 for (
int i = 0;
i < (
int)curr_batch.size();
i++) {
223 int list1_oid = filler_segs[batch_start +
i].seq_index;
225 for (
size_t j = 0; j < num_full_queries; j++) {
229 if (list1_oid == indices[j])
235 v[
i * num_full_queries + j]->
Get()) {
249 const CScore& curr_score = **score_itr;
257 if (evalue > blastp_evalue)
261 align_score, denseg));
277 const CScore& curr_score = **score_itr;
289 if (evalue > blastp_evalue)
293 indices[j], align_score, dendiag));
301 batch_start += curr_batch.size();
307 const vector<int>& indices)
329 vector< CRef<objects::CSeq_loc> > filler_locs;
330 vector<SSegmentLoc> filler_segs;
336 printf(
"blastp hits:\n");
339 printf(
"query %d %4d - %4d query %d %4d - %4d score %d\n",
364 unique_ptr< vector<int> >
result(
new vector<int>());
383 int left = -1, right = -1;
385 for (
size_t i=0;
i < left_inds->size();
i++) {
386 for (
size_t j=0;j < right_inds->size();j++) {
387 if (dist > dmat((*left_inds)[
i], (*right_inds)[j]) || left < 0) {
388 left = (*left_inds)[
i];
389 right = (*right_inds)[j];
390 dist = dmat(left, right);
400 blastp_opts->SetEvalueThreshold(
max(blastp_evalue, 10.0));
401 blastp_opts->SetSegFiltering(
false);
406 CBl2Seq blaster(left_query, right_query, *blastp_opts);
424 const CScore& curr_score = **score_itr;
432 if (evalue > blastp_evalue)
450 const CScore& curr_score = **score_itr;
462 if (evalue > blastp_evalue)
466 align_score, dendiag));
473 ITERATE(vector<int>, it, *right_inds) {
474 left_inds->push_back(*it);
482 const vector<TPhyTreeNode*>& cluster_trees)
488 ITERATE(vector<TPhyTreeNode*>, it, cluster_trees) {
497 printf(
"in-cluster blastp hits:\n");
500 printf(
"query %d %4d - %4d query %d %4d - %4d score %d\n",
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
int TOffset
Basic data type for offsets into a sequence.
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
Runs the BLAST algorithm between 2 sequences.
Handle to the protein-protein options to the BLAST algorithm.
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
void Append(CHitList &hitlist)
Append one hitlist to another.
int Size() const
Retrieve number of hits in list.
void PurgeAllHits()
Delete all hits unconditionally.
bool Empty()
Determine whether a list contains no hits.
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
void AddToHitList(CHit *hit)
Append a hit to the hitlist.
A generalized representation of a pairwise alignment.
TSubHit & GetSubHit()
Retrieve a list of subhits.
int m_Score
Score of alignment.
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
TRange m_SeqRange1
The range of offsets on the first sequence.
static const int kMinHitSize
Not always used, but useful to avoid extremely small hits.
TRange m_SeqRange2
The range of offsets on the second sequence.
bool HasSubHits()
Query if a CHit has a hierarchy of subhits available.
vector< CHit * > TSubHit
Hits can be grouped hierarchically.
double GetBlastpEvalue(void) const
Get e-value for accepting Blastp hits.
bool GetVerbose(void) const
Get verbose mode.
Simultaneously align multiple protein sequences.
SProgress m_ProgressMonitor
CRef< objects::CScope > m_Scope
void x_FindLocalInClusterHits(const vector< TPhyTreeNode * > &cluster_trees)
Run blast on sequences from each cluster subtree.
vector< CRef< objects::CSeq_loc > > m_tQueries
CHitList m_LocalInClusterHits
void x_AlignFillerBlocks(const blast::TSeqLocVector &queries, const vector< int > &indices, vector< CRef< objects::CSeq_loc > > &filler_locs, vector< SSegmentLoc > &filler_segs)
Run blastp, aligning the collection of filler fragments against the entire input dataset.
void x_FindLocalHits(const blast::TSeqLocVector &queries, const vector< int > &indices)
Run blast on selected input sequences and postprocess the results.
@ eInterrupt
Alignment interruped through callback function.
vector< CSequence > m_QueryData
void x_AssignDefaultResFreqs()
CConstRef< CMultiAlignerOptions > m_Options
void x_AddNewSegment(vector< CRef< objects::CSeq_loc > > &loc_list, const CRef< objects::CSeq_loc > &query, TOffset from, TOffset to, vector< SSegmentLoc > &seg_list, int query_index)
Create a new query sequence that is a subset of a previous query sequence.
void x_MakeFillerBlocks(const vector< int > &indices, vector< CRef< objects::CSeq_loc > > &filler_locs, vector< SSegmentLoc > &filler_segs)
Turn all fragments of selected query sequence not already covered by a domain hit into a separate que...
unique_ptr< vector< int > > x_AlignClusterQueries(const TPhyTreeNode *node)
definition of a Culling tree
Interface for CMultiAligner.
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
CSearchResults & GetResults(size_type qi, size_type si)
Retrieve results for a query-subject pair contained by this object.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
TNodeList::const_iterator TNodeList_CI
bool IsLeaf() const
Report whether this is a leaf node.
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
const TValue & GetValue(void) const
Return node's value.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TStr & GetStr(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
const TScores & GetScores(void) const
Get the Scores member data.
E_Choice Which(void) const
Which variant is currently selected.
vector< CRef< CScore > > TScore
TInt GetInt(void) const
Get the variant data.
const TDendiag & GetDendiag(void) const
Get the variant data.
const TValue & GetValue(void) const
Get the Value member data.
vector< CRef< CScore > > TScores
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
TReal GetReal(void) const
Get the variant data.
list< CRef< CDense_diag > > TDendiag
const TId & GetId(void) const
Get the Id member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
TFrom GetFrom(void) const
Get the From member data.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
unsigned int
A callback function used to compare two keys in a database.
const TYPE & Get(const CNamedParameterList *param)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to represent a single sequence to be fed to BLAST.