81 size_t Dims = Denseg.
GetDim();
83 for(
size_t CurrSeg = 0; CurrSeg < SegCount; ++CurrSeg) {
84 int Index = (Dims*CurrSeg)+Row;
85 int CurrStart = Denseg.
GetStarts()[Index];
86 if( CurrStart != -1) {
88 CurrLoc->
SetInt().SetId().Assign( *Denseg.
GetIds()[Row] );
89 CurrLoc->
SetInt().SetFrom() = CurrStart;
90 CurrLoc->
SetInt().SetTo() = CurrStart + Denseg.
GetLens()[CurrSeg];
93 Accum->
SetMix().Set().push_back(CurrLoc);
114 TSeqPos PosCoveredBases = 0, NegCoveredBases = 0;
120 PosCoveredBases += (*LocIter)->GetInt().GetLength();
122 NegCoveredBases += (*LocIter)->GetInt().GetLength();
128 return max(PosCoveredBases, NegCoveredBases);
144 AccumResults->Get()) {
145 int BestRank = QueryIter->second->GetBestRank();
149 << QueryIter->second->GetQueryId()->AsFastaString()
150 <<
" needs Instanced MM Aligner.");
153 << QueryIter->second->GetQueryId()->AsFastaString()
154 <<
" fails the minimum percent coverage cutoff. Skipping.");
172 vector<CRef<CInstance> > Instances;
176 if(Instances.empty()) {
180 ERR_POST(
Info <<
" Instance Count: " << Instances.size());
192 << Inst.
Subject.GetId().AsFastaString());
194 << Inst.
Query.GetTo() <<
":"
196 <<
" and s: " << Inst.
Subject.GetFrom() <<
":"
205 Inst.
Query.GetStrand(),
216 .
Print(
"instance_query", Inst.
Query.GetId().AsFastaString())
217 .
Print(
"instance_subject", Inst.
Subject.GetId().AsFastaString())
218 .
Print(
"instance_align", (GlobalDs.
IsNull() ?
"false" :
"true"));
223 Result->
SetSegs().SetDenseg().Assign(*GlobalDs);
226 ResultSet->
Set().push_back(Result);
255 if(!ResultSet->
Get().empty()) {
291 double PercentRemaining = 1.0-PercentComplete;
293 double Factor = PercentRemaining/PercentComplete;
297 ERR_POST(
Error <<
" Instanced Aligner took over 5 minutes. Timed out.");
302 double TimeEstimated = Span.
GetAsDouble() * Factor;
305 ERR_POST(
Error <<
" Instanced Aligner expected to take " << TimeEstimated
307 <<
" minutes. Terminating Early.");
353 TSeqPos QueryStrandedStart, QueryStrandedStop;
355 QueryStrandedStart = QueryStart;
356 QueryStrandedStop = QueryStop;
358 QueryStrandedStart = ( (QueryVec.
size()-1) - QueryStop);
359 QueryStrandedStop = ( (QueryVec.
size()-1) - QueryStart);
362 string QuerySeq, SubjectSeq;
363 QueryVec.
GetSeqData(QueryStrandedStart, QueryStrandedStop+1, QuerySeq);
364 SubjectVec.
GetSeqData(SubjectStart, SubjectStop+1, SubjectSeq);
389 Score = Aligner.
Run();
393 ResultDenseg = Aligner.
GetDense_seg(ExtractQueryStart, Strand, QueryId,
405 ResultDenseg->
OffsetRow(1, SubjectStart);
425 objects::CScope& Scope)
430 list<CConstRef<CSeq_align> > In;
446 CDense_seg& Denseg = (*AlignIter)->SetSegs().SetDenseg();
479 Pluses->Set().push_back(*AlignIter);
481 Minuses->
Set().push_back(*AlignIter);
484 if(!Pluses->Set().empty()) {
489 Instances.push_back(Inst);
493 if(!Minuses->
Set().empty()) {
498 Instances.push_back(Inst);
529 Query.SetId().Assign(AlignSet.
Get().front()->GetSeq_id(0));
530 Subject.SetId().Assign(AlignSet.
Get().front()->GetSeq_id(1));
532 Query.SetStrand() = AlignSet.
Get().front()->GetSeqStrand(0);
533 Subject.SetStrand() = AlignSet.
Get().front()->GetSeqStrand(1);
542 Query.SetFrom(
min(
Query.GetFrom(), (*AlignIter)->GetSeqStart(0)));
545 Query.SetTo(
max(
Query.GetTo(), (*AlignIter)->GetSeqStop(0)));
583 return (
Subject.GetLength() /
double(
Query.GetLength()));
589 return Query.GetLength();
613 TSubjectCoverage BestCoverage;
614 double MaxCoverage = 0;
621 string IdStr = Set->
Get().front()->GetSeq_id(1).AsFastaString();
622 double SubjCoverage = 0;
625 (*AlignIter)->GetNamedScore(
"pct_coverage", PctCov);
626 SubjCoverage =
max(SubjCoverage, PctCov);
628 BestCoverage[IdStr] = SubjCoverage;
629 MaxCoverage =
max(SubjCoverage, MaxCoverage);
633 typedef vector<CRef<CInstance> > TInstVector;
638 TInstVector SubjInstances;
641 string SubjIdStr = Set->
Get().front()->GetSeq_id(1).AsFastaString();
642 if(BestCoverage[SubjIdStr] < (MaxCoverage*0.10)) {
654 bool Inserted =
false;
655 bool Contained =
false;
656 ITERATE(TInstVector, InstIter, SubjInstances) {
657 bool CurrContained = (*InstIter)->IsAlignmentContained(**AlignIter);
658 Contained |= CurrContained;
663 int GapDist = (*InstIter)->GapDistance(**AlignIter);
664 if(GapDist < 20000) {
665 (*InstIter)->MergeIn(*AlignIter);
672 SubjInstances.push_back(Inst);
678 TInstVector CleanedInstances;
680 ITERATE(TInstVector, InstIter, SubjInstances) {
683 if((*InstIter)->Alignments.Get().size() <= 1)
692 bool DupeFound =
false;
694 if( (*AlignIter)->GetSeqStart(0) == (*SourceIter)->GetSeqStart(0) &&
695 (*AlignIter)->GetSeqStart(1) == (*SourceIter)->GetSeqStart(1) &&
696 (*AlignIter)->GetSeqStop(0) == (*SourceIter)->GetSeqStop(0) &&
697 (*AlignIter)->GetSeqStop(1) == (*SourceIter)->GetSeqStop(1)) {
707 bool Contained =
false;
708 ITERATE(TInstVector, CleanIter, CleanedInstances) {
709 bool Curr = (*CleanIter)->IsAlignmentContained(**AlignIter);
719 Dupe |= ((*OldInstIter)->Query.Equals(Inst->Query)
720 && (*OldInstIter)->Subject.Equals(Inst->Subject));
723 CleanedInstances.push_back(Inst);
728 copy(CleanedInstances.begin(), CleanedInstances.end(),
729 insert_iterator<TInstVector>(Instances, Instances.end()));
739 vector<CRef<CInstance> >::iterator Curr;
740 Curr = Instances.begin();
741 for(Curr = Instances.begin(); Curr != Instances.end(); ) {
742 if( (*Curr)->SubjToQueryRatio() > MaxRatio ||
743 (*Curr)->SubjToQueryRatio() < 0.10 )
744 Curr = Instances.erase(Curr);
751 for(Curr = Instances.begin(); Curr != Instances.end(); ++Curr) {
752 TSeqPos CurrLength = (*Curr)->QueryLength();
753 LongestInstance =
max(CurrLength, LongestInstance);
757 for(Curr = Instances.begin(); Curr != Instances.end(); ) {
758 if( (*Curr)->QueryLength() <= (LongestInstance*0.05))
759 Curr = Instances.erase(Curr);
764 vector<CRef<CInstance> >::iterator Outer, Inner;
765 for(Outer = Instances.begin(); Outer != Instances.end(); ++Outer) {
766 for(Inner = Outer+1; Inner != Instances.end(); ) {
767 if( (*Outer)->Query.Equals((*Inner)->Query) &&
768 (*Outer)->Subject.Equals((*Inner)->Subject) ) {
769 Inner = Instances.erase(Inner);
781 double BestPctCoverage = -1.0;
790 double PctCoverage = -1.0;
791 (*AlignIter)->GetNamedScore(
"pct_coverage", PctCoverage);
792 BestPctCoverage =
max(BestPctCoverage, PctCoverage);
TSeqPos s_CalcCoverageCount(TAlignSetRef Alignments, int Row, CScope &Scope)
bool s_ProgressCallback(CNWAligner::SProgressInfo *ProgressInfo)
CRef< CSeq_loc > s_CoverageSeqLoc(TAlignSetRef Alignments, int Row, CScope &Scope)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastNucleotideOptionsHandle class.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Definitions of special type used in BLAST.
Main argument class for BLASTN application.
class CAlignCleanup implements an alignment cleanup utility based on the C++ alignment manager.
void Cleanup(const TAligns &aligns_in, TAligns &aligns_out, EMode mode=eDefault)
void TrimEndGaps()
Trim leading/training gaps if possible.
ENa_strand GetSeqStrand(TDim row) const
void Reverse(void)
Reverse the segments' orientation.
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
double SubjToQueryRatio() const
int GapDistance(const objects::CSeq_align &Align) const
objects::CSeq_interval Subject
void MergeIn(CRef< objects::CSeq_align > Align)
objects::CSeq_align_set Alignments
TSeqPos QueryLength() const
bool IsAlignmentContained(const objects::CSeq_align &Align) const
CInstance(const CRef< objects::CSeq_align > Align)
objects::CSeq_interval Query
TAlignResultsRef GenerateAlignments(objects::CScope &Scope, ISequenceSet *QuerySet, ISequenceSet *SubjectSet, TAlignResultsRef AccumResults)
void x_FilterInstances(vector< CRef< CInstance > > &Instances, double MaxRatio)
CRef< objects::CSeq_align_set > x_RunCleanup(const objects::CSeq_align_set &AlignSet, objects::CScope &Scope)
void x_RunAligner(objects::CScope &Scope, CQuerySet &QueryAligns, TAlignResultsRef Results)
void x_GetDistanceInstances(CQuerySet &QueryAligns, objects::CScope &Scope, vector< CRef< CInstance > > &Instances)
void x_GetCleanupInstances(CQuerySet &QueryAligns, objects::CScope &Scope, vector< CRef< CInstance > > &Instances)
CRef< objects::CDense_seg > x_RunMMGlobal(const objects::CSeq_id &QueryId, const objects::CSeq_id &SubjectId, objects::ENa_strand Strand, TSeqPos QueryStart, TSeqPos QueryStop, TSeqPos SubjectStart, TSeqPos SubjectStop, objects::CScope &Scope)
bool x_MinCoverageCheck(const CQuerySet &QueryAligns)
TAssemblyToSubjectSet & Get()
CConstRef< objects::CSeq_id > GetQueryId() const
TSeqPos GetSeqStop(TDim row) const
void SetNamedScore(const string &id, int score)
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Declares the CDiscNucleotideOptionsHandle class.
void SetProgressCallback(FProgressCallback prg_callback, void *data)
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
void SetWms(TScore value)
CRef< objects::CDense_seg > GetDense_seg(TSeqPos query_start, objects::ENa_strand query_strand, TSeqPos subj_start, objects::ENa_strand subj_strand, bool trim_end_gaps=false) const
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
void Info(CExceptionArgs_Base &args)
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eStrand_Plus
Plus strand.
@ eStrand_Minus
Minus strand.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_SCOPE(ns)
Define a new scope.
double GetAsDouble(void) const
Return time span as number of seconds.
long GetCompleteSeconds(void) const
Get number of complete seconds.
@ eCurrent
Use current time. See also CCurrentTime.
static bool IsSignaled(TSignalMask signals=eSignal_Any)
Check that any of specified signals is received.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
const TLens & GetLens(void) const
Get the Lens member data.
void SetType(TType value)
Assign a value to Type data member.
TDim GetDim(void) const
Get the Dim member data.
TStrands & SetStrands(void)
Assign a value to Strands data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
list< CRef< CSeq_loc > > Tdata
const Tdata & Get(void) const
Get the member data.
const TMix & GetMix(void) const
Get the variant data.
Main class to perform a BLAST search on the local machine.
Magic spell ;-) needed for some weird compilers... very empiric.
Setup interrupt signal handling.
Defines NCBI C++ exception handling.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
void Out(T t, int w, CNcbiOstream &to=cout)