52 THitComparator sorter (THitComparator::eQueryMin);
53 stable_sort(hitrefs.begin(), hitrefs.end(), sorter);
56 TSeqPos stmin = hitrefs.front()->GetQueryMin();
57 TSeqPos enmax = hitrefs.front()->GetQueryMax();
60 if((*it)->GetQueryMin() <= enmax) {
61 enmax =
max((*it)->GetQueryMax(), enmax);
63 len += enmax - stmin + 1;
64 stmin = (*it)->GetQueryMin();
65 enmax = (*it)->GetQueryMax();
68 len += enmax - stmin + 1;
77 bool is_protein_subject)
81 TSeqPos subj_start = (*h)->GetSubjStart();
82 TSeqPos subj_stop = (*h)->GetSubjStop();
83 TSeqPos qry_start = (*h)->GetQueryStart();
84 TSeqPos qry_stop = (*h)->GetQueryStop();
86 if (!is_protein_subject) {
90 (*h)->SetQueryStart(qry_start);
91 (*h)->SetQueryStop(qry_stop);
96 bool undef_score =
true;
98 if ((*oh)->GetSubjStart() == subj_start &&
99 subj_stop == (*oh)->GetSubjStop() &&
100 (*oh)->GetQueryStart() == qry_start &&
101 qry_stop == (*oh)->GetQueryStop() &&
102 (undef_score || score < (*oh)->GetScore())) {
103 score = (*oh)->GetScore();
118 copy(hits_new.begin(), hits_new.end(), back_inserter(hitrefs));
125 result += (*i)->GetScore();
134 result += (*i)->GetRawScore();
142 result->SetId().SetStr(
id);
150 result->SetId().SetStr(
id);
165 TSeqPos subj_leftmost = hitrefs.front()->GetSubjMin();
171 if (!(*h)->GetSubjStrand()) {
177 TSeqPos subj_min = (*h)->GetSubjMin();
178 TSeqPos subj_max = (*h)->GetSubjMax();
179 TSeqPos qry_min = (*h)->GetQueryMin();
180 TSeqPos qry_max = (*h)->GetQueryMax();
181 double pct_identity =(*h) ->GetIdentity();
182 double bit_score = (*h)->GetScore();
183 int score = (*h)->GetRawScore();
185 subj_leftmost =
min(subj_leftmost, subj_min);
186 subj_rightmost =
max(subj_rightmost, subj_max);
191 qry_id->
Assign(*(*h)->GetQueryId());
193 std_seg->SetLoc().push_back(qry_loc);
196 subj_id->
Assign(*(*h)->GetSubjId());
198 std_seg->SetLoc().push_back(subj_loc);
200 std_seg->SetScores().push_back(
RealScore(
"pct_identity",pct_identity*100));
201 std_seg->SetScores().push_back(
RealScore(
"bit_score",bit_score));
202 if(score > 0) std_seg->SetScores().push_back(
IntScore(
"score",score));
204 std_segs.push_back(std_seg);
208 result->SetData().SetAlign().push_back(seq_align);
211 uo->
SetType().SetStr(
"Compart Scores");
218 result->AddUserObject( *uo );
221 qry_id->
Assign(*hitrefs.front()->GetQueryId());
225 result->SetDesc().Set().push_back( align );
228 subj_id->
Assign(*hitrefs.front()->GetSubjId());
232 result->SetDesc().Set().push_back(region);
239 const vector<pair<THit::TCoord, THit::TCoord> > *gaps)
241 unique_ptr<CCompartmentAccessor<THit> > comps_ptr;
242 if (orig_hitrefs.empty())
247 bool is_protein_subject;
252 double max_subj_query_ratio = 0;
256 max_subj_query_ratio =
max(max_subj_query_ratio, subj_query_ratio);
258 is_protein_subject = max_subj_query_ratio < 2;
263 if (!hitref->GetQueryStrand())
265 if (!is_protein_subject) {
267 hitref->SetQueryMax(hitref->GetQueryMax()*3+2);
268 hitref->SetQueryMin(hitref->GetQueryMin()*3);
271 hitref->SetIdentity(0.9999f);
273 hitref->SetIdentity(hitref->GetScore()/hitref->GetLength());
275 hitrefs.push_back(hitref);
290 comps.
Run(hitrefs.begin(), hitrefs.end(),
NULL, gaps);
298 }
while (comps.
GetNext(comphits));
304 const vector<pair<THit::TCoord, THit::TCoord> > *gaps)
306 unique_ptr<CCompartmentAccessor<THit> > comps_ptr =
314 const vector<pair<THit::TCoord, THit::TCoord> > *gaps)
317 if (comps_ptr ==
NULL)
334 TSeqPos cur_begin_extended = cur_begin < max_extent ? 0 : cur_begin - max_extent;
335 TSeqPos cur_end_extended = cur_end + max_extent;
339 vector<pair<THit::TCoord, THit::TCoord> >::const_iterator it;
340 for(it = gaps->begin(); it != gaps->end(); ++it) {
343 if( gfrom < cur_begin && cur_begin_extended < gto ) {
344 if( gto < cur_begin ) {
345 cur_begin_extended = gto + 1;
347 cur_begin_extended = cur_begin;
350 if( cur_end < gto && gfrom < cur_end_extended ) {
351 if( cur_end < gfrom ) {
352 cur_end_extended = gfrom - 1;
354 cur_end_extended = cur_end;
361 cur_compartment_loc->
SetInt().SetFrom(cur_begin_extended);
362 cur_compartment_loc->
SetInt().SetTo(cur_end_extended);
364 if (prev_compartment_loc.
NotEmpty() &&
369 TSeqPos prev_end = prev_end_extended - max_extent;
370 if(prev_end < cur_begin) {
371 if (prev_end_extended >= cur_begin_extended) {
372 prev_end_extended = (prev_end + cur_begin)/2;
373 cur_begin_extended = prev_end_extended+1;
374 _ASSERT(cur_begin_extended <= cur_begin);
375 prev_compartment_loc->
SetInt().SetTo(prev_end_extended);
376 cur_compartment_loc->
SetInt().SetFrom(cur_begin_extended);
379 prev_compartment_loc->
SetInt().SetTo(prev_end);
380 cur_compartment_loc->
SetInt().SetFrom(cur_begin);
383 prev_compartment_loc=cur_compartment_loc;
384 results.push_back(compartment);
386 }
while (comps.
GetNext(comphits));
412 raw_score = (
int) score;
420 covered_aa, score, raw_score));
444 "Max genomic extent to look for exons beyond compartment ends.",
450 (
"compartment_penalty",
452 "Penalty to open a new compartment "
453 "(compartment identification parameter). "
454 "Multiple compartments will only be identified if "
455 "they have at least this level of coverage.",
460 (
"min_compartment_idty",
462 "Minimal compartment identity for multiple compartments",
467 (
"min_singleton_idty",
469 "Minimal compartment identity for single compartment",
477 "Maximal intron length",
489 "Ignore hit identity. Set all to 99.99%\nDeprecated: use -maximize arg",
495 "Maximal compartment overlap on subject in bp.",
502 "parameter to maximize",
516 "subject molecule type",
525 m_CompartmentPenalty(default_CompartmentPenalty),
526 m_MinCompartmentIdty(default_MinCompartmentIdty),
527 m_MinSingleCompartmentIdty(default_MinSingleCompartmentIdty),
528 m_MaxExtent(default_MaxExtent),
529 m_ByCoverage(default_ByCoverage),
530 m_MaxIntron(default_MaxIntron),
531 m_MaxOverlap(default_MaxOverlap),
532 m_Maximizing(default_Maximizing),
533 m_SubjectMol(default_SubjectMol)
538 m_CompartmentPenalty(args[
"compartment_penalty"].AsDouble()),
539 m_MinCompartmentIdty(args[
"min_compartment_idty"].AsDouble()),
542 m_MinSingleCompartmentIdty(args[
"min_singleton_idty"]
543 ? args[
"min_singleton_idty"].AsDouble()
544 : default_MinSingleCompartmentIdty),
545 m_MaxExtent(args[
"max_extent"].AsInteger()),
546 m_MaxIntron(args[
"max_intron"].AsInteger()),
547 m_MaxOverlap(args[
"max_overlap"].AsInteger())
549 if (args[
"maximize"]) {
559 if (args[
"by_coverage"]) {
568 if (args[
"subj-mol"].AsString() ==
"na") {
570 }
else if (args[
"subj-mol"].AsString() ==
"aa") {
@ eExtreme_Positional
numerical value
void remove_if(Container &c, Predicate *__pred)
TCoord GetQuerySpan(void) const
TCoord GetSubjSpan(void) const
static const double default_MinCompartmentIdty
static const int default_MaxIntron
static const EMaximizing default_Maximizing
static const double default_MinSingleCompartmentIdty
static const int default_MaxExtent
static void SetupArgDescriptions(CArgDescriptions *argdescr)
static const double default_CompartmentPenalty
static const int default_MaxOverlap
static const char * s_scoreNames[]
static const bool default_ByCoverage
CCompartOptions()
with deafalt params
double m_CompartmentPenalty
double m_MinCompartmentIdty
static const ESubjectMol default_SubjectMol
double m_MinSingleCompartmentIdty
void Run(typename THitRefs::iterator start, typename THitRefs::iterator finish, CScope *scope=NULL, const vector< pair< TCoord, TCoord > > *gaps=NULL)
Execute: identify compartments.
void SetMaxIntron(TCoord mi)
Assign the maximum intron length, in base pairs.
bool GetFirst(THitRefs &compartment)
Initialize iteration over the results.
const TCoord * GetBox(size_t i) const
void SetMaxOverlap(TCoord max_overlap)
Assign the maximum length for compartments to overlap on the subject.
bool GetNext(THitRefs &compartment)
Proceed with iteration over the results.
static TCoord s_GetDefaultMaxOverlap(void)
Retrieve the default compartment overlap behaviour (no overlap).
static TCoord s_GetDefaultMaxIntron(void)
Retrieve the default maximum length of an intron.
vector< THitRef > THitRefs
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
TCompartments FormatAsAsn(CCompartmentAccessor< THit > *comps_ptr, CCompartOptions compart_options, const vector< pair< THit::TCoord, THit::TCoord > > *gaps)
unique_ptr< CCompartmentAccessor< THit > > CreateCompartmentAccessor(const THitRefs &orig_hitrefs, CCompartOptions compart_options, const vector< pair< THit::TCoord, THit::TCoord > > *gaps)
Selects compartments.
CRef< CScore > IntScore(const string &id, int value)
CRef< CSeq_annot > MakeCompartment(THitRefs &hitrefs)
USING_SCOPE(ncbi::objects)
void RemoveOverlaps(THitRefs &hitrefs)
TCompartments SelectCompartmentsHits(const THitRefs &orig_hitrefs, CCompartOptions compart_options, const vector< pair< THit::TCoord, THit::TCoord > > *gaps)
Composition of first two functions.
CRef< CScore > RealScore(const string &id, double value)
double TotalScore(THitRefs &hitrefs)
void RestoreOriginalHits(THitRefs &hitrefs, const THitRefs &orig_hitrefs, bool is_protein_subject)
int TotalRawScore(THitRefs &hitrefs)
int CountQueryCoverage(THitRefs &hitrefs)
TCompartmentStructs MakeCompartments(const TCompartments &compartments, CCompartOptions compart_options)
CSplign::THitRefs THitRefs
vector< SCompartment > TCompartmentStructs
list< CRef< CSeq_annot > > TCompartments
static void s_RunGreedy(typename THitRefs::iterator hri_beg, typename THitRefs::iterator hri_end, THitRefs *phits_new, TCoord min_hit_len=100, double min_hit_idty=.9, TCoord margin=1, TCoord retain_overlap=0, EUnique_type unique_type=e_Strict)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
void SetDependency(const string &arg1, EDependency dep, const string &arg2)
Define a dependency.
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
@ eExcludes
One argument excludes another.
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TSeqPos GetStop(ESeqLocExtremes ext) const
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
bool IsStr(void) const
Check if variant Str is selected.
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
TInt GetInt(void) const
Get the variant data.
TReal GetReal(void) const
Get the variant data.
void SetType(TType &value)
Assign a value to Type data member.
const TType & GetType(void) const
Get the Type member data.
list< CRef< CStd_seg > > TStd
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetType(TType value)
Assign a value to Type data member.
@ eType_partial
mapping pieces together
const Tdata & Get(void) const
Get the member data.
TIds & SetIds(void)
Assign a value to Ids data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
void SetAlign_type(TAlign_type value)
Assign a value to Align_type data member.
const TUser & GetUser(void) const
Get the variant data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsRegion(void) const
Check if variant Region is selected.
TRegion & SetRegion(void)
Select the variant.
TAlign & SetAlign(void)
Select the variant.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsUser(void) const
Check if variant User is selected.
list< CRef< CAnnotdesc > > Tdata
@ eAlign_type_ref
set of alignments to the same sequence
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)