95 ostr <<
"Length of the aligned segments, including the length of all gap segments";
98 ostr <<
"Length of the aligned segments, excluding all gap segments; thus, this is the length of all actually aligned (i.e., match or mismatch) bases";
112 bool exon_specific =
false)
125 "'product_gap_length' and 'genomic_gap_length' scores "
126 "valid only for Spliced-seg alignments");
135 ostr <<
"Total number of gap bases missing";
138 ostr <<
"Number of gap openings";
142 ostr <<
" in product exons";
143 }
else if(
m_Row == 1) {
144 ostr <<
" in genomic exons";
149 }
else if(
m_Row == 1) {
150 ostr <<
" in subject";
192 "failed to retrieve sequence for " +
197 "Can't count frameshifts on a genomic alignment");
212 ostr <<
"Number of ";
216 ostr <<
"frameshifting insertions";
218 ostr <<
" in the query";
219 }
else if(
m_Row == 1) {
220 ostr <<
" in the subject";
222 ostr <<
" or deletions";
238 ostr <<
"Length of the longest gap observed in either query or subject";
259 ostr <<
"Length of unaligned sequence 3' of alignment end";
268 double score_value = 0;
297 ostr <<
"Length of polya tail";
318 double product_length = 0;
327 if (product_length == 0) {
342 ostr <<
"Length of unaligned sequence contained within the aligned "
343 "range. Note that this does not count gaps; rather, it computes "
344 "the length of all missing, unaligned sequence bounded by the "
354 double score_value = 0;
361 CSpliced_seg::TExons::const_reverse_iterator it =
363 CSpliced_seg::TExons::const_reverse_iterator
prev =
365 CSpliced_seg::TExons::const_reverse_iterator end =
369 for (++it; it != end; ++it, ++
prev) {
370 score_value += (*it)->GetProduct_start().GetNucpos() -
371 (*prev)->GetProduct_end().GetNucpos() - 1;
374 for (++it; it != end; ++it, ++
prev) {
375 TSeqPos curr_nuc = (*it)->GetProduct_start().AsSeqPos();
376 TSeqPos last_nuc = (*prev)->GetProduct_end().AsSeqPos();
377 score_value += curr_nuc - last_nuc - 1;
382 CSpliced_seg::TExons::const_iterator it =
384 CSpliced_seg::TExons::const_iterator
prev =
386 CSpliced_seg::TExons::const_iterator end =
390 for (++it; it != end; ++it, ++
prev) {
391 score_value += (*it)->GetProduct_start().GetNucpos() -
392 (*prev)->GetProduct_end().GetNucpos() - 1;
395 for (++it; it != end; ++it, ++
prev) {
396 TSeqPos curr_nuc = (*it)->GetProduct_start().AsSeqPos();
397 TSeqPos last_nuc = (*prev)->GetProduct_end().AsSeqPos();
398 score_value += curr_nuc - last_nuc - 1;
407 "internal_unaligned not implemented for this "
408 "type of alignment");
433 ostr <<
"Start of query sequence (0-based coordinates)";
435 else if (
m_Row == 1) {
436 ostr <<
"Start of subject sequence (0-based coordinates)";
441 ostr <<
"End of query sequence (0-based coordinates)";
443 else if (
m_Row == 1) {
444 ostr <<
"End of subject sequence (0-based coordinates)";
471 ostr <<
"Ratio of subject aligned range length to query aligned "
496 ostr <<
"Length of query sequence";
498 else if (
m_Row == 1) {
499 ostr <<
"Length of subject sequence";
519 "Can't get length for sequence " +
556 "Symmetric overlap, as a percent (0-100). This is similar to "
557 "coverage, except that it takes into account both query and "
558 "subject sequence lengths. Alignment length is divided by "
560 <<
" of the two sequence lengths";
568 double pct_overlap = length * 100;
573 "failed to retrieve sequence for " +
579 "failed to retrieve sequence for " +
610 "Length of the shortest exon. Note that this score has "
611 "meaning only for Spliced-seg alignments, as would be generated "
612 "by Splign or ProSplign.";
633 "Length of the longest intron. Note that this score has "
634 "meaning only for Spliced-seg alignments, as would be generated "
635 "by Splign or ProSplign.";
656 "Count of the number of exons. Note that this score has "
657 "meaning only for Spliced-seg alignments, as would be generated "
658 "by Splign or ProSplign.";
676 "'exon_count' score is valid only for "
677 "Spliced-seg alignments");
689 "Minimum distance between an indel and a splice site. Note that "
690 "this score has meaning only for Spliced-seg alignments, as would "
691 "be generated by Splign or ProSplign.";
704 unsigned result = INT_MAX;
710 unsigned distance_5prime = 0, distance_3prime = 0;
711 bool found_indel =
false;
714 switch (part.
Which()) {
722 distance_5prime += part.
GetDiag();
736 distance_5prime = INT_MAX;
741 switch (part.
Which()) {
749 distance_3prime += part.
GetDiag();
763 distance_3prime = INT_MAX;
773 "No indels found in exons with splice sites");
788 if ((*it)->GetId() == gcode) {
812 <<
" codon was found, 0 otherwise. Note that this score has "
813 "meaning only for Spliced-seg alignments, as would be generated "
814 "by Splign or ProSplign.";
823 bool is_protein =
false;
826 bool score_precalculated=
false;
835 ? (*it)->IsStart_codon_found()
836 : (*it)->IsStop_codon_found() ) {
837 score_precalculated=
true;
839 ? (*it)->GetStart_codon_found()
840 : (*it)->GetStop_codon_found())
846 if (score_precalculated) {
852 if (!product_length) {
857 "Can't get sequence " +
860 is_protein = product_bsh.
IsAa();
882 if ( !genomic_bsh ) {
884 "failed to retrieve sequence for " +
895 cds_loc = mapper.
Map(loc);
901 "failed to retrieve sequence for " +
909 for ( ; feat_it; ++feat_it) {
928 total_q_range.
SetTo(total_q_range.
GetTo() - 3);
943 total_q_range.
SetTo(total_q_range.
GetTo() + offs);
950 query_id, total_q_range.
GetFrom(),
951 total_q_range.
GetTo(), q_strand);
954 cds_loc = mapper.
Map(adjusted_loc);
971 int to = from + 2 * direction;
973 if (to >= 0 && to < genomic_len) {
1013 gcode = gc->
GetId();
1047 "Count of the number of internal stop codons encountered when "
1048 "translating the aligned coding region. Note that this has meaning "
1049 "only for Spliced-seg transcript alignments with a transcript that "
1050 "has an annotated cdregion, or for Spliced-seg protein alignments.";
1062 return stop_finder.
FindStops(align).size();
1090 -> GetSeq().GetAnnot().front()
1091 -> GetData().GetFtable().front();
1094 cds->
SetData().SetCdregion().ResetCode_break();
1100 trans.resize(trans.size() - 1);
1104 score += (*
i ==
'*');
1149 "Percent-identity score confined to the coding region "
1150 "associated with the align transcipt. Not supported "
1151 "for standard-seg alignments.";
1155 "Percent-coverage score confined to the coding region "
1156 "associated with the align transcipt.";
1159 ostr <<
"Start position of product's coding region.";
1162 ostr <<
"End position of product's coding region.";
1165 ostr <<
" Note that this has meaning only if product has a coding "
1166 "region annotation.";
1179 "failed to retrieve sequence for " +
1198 cds_ranges += it.GetRange();
1230 ?
"Percentage of query sequence aligned to subject (0.0-100.0)"
1231 :
"Percentage of subject sequence aligned to query (0.0-100.0)");
1244 return covered_bases ? 100.0f * double(covered_bases) / double(seq_len)
1270 ostr <<
"Taxid of query sequence";
1272 else if (
m_Row == 1) {
1273 ostr <<
"Taxid of subject sequence";
1305 "Position of last splice site. Note that this has meaning only "
1306 "for Spliced-seg transcript alignments, and only if the alignment "
1307 "has at least two exons.";
1335 "last_splice_site score inapplicable");
1354 string row_name =
m_Row == 0 ?
"query" :
"subject";
1355 string range_type =
m_IncludeGaps ?
"total aligned range" :
"aligned bases";
1357 "size of overlap of " + range_type +
" with any alignments "
1358 "over the same " + row_name +
" sequence that have previously "
1359 "passed this filter. Assumes that input alignments "
1360 "are collated by " + row_name +
", and then sorted by priority for "
1361 "inclusion in the output.";
1417 string row_name =
m_Row == 0 ?
"query" :
"subject";
1418 string range_type =
m_IncludeGaps ?
"total aligned range" :
"aligned bases";
1420 "size of overlap of " + range_type +
" with any alignments "
1421 "over the same " + row_name +
" sequence that have previously "
1422 "passed this filter. Assumes that input alignments "
1423 "are collated by " + row_name +
", and then sorted by priority for "
1424 "inclusion in the output.";
1447 overlap &= it->second;
1463 it->second += align.GetSeqRange(
m_Row);
1465 it->second += align.GetAlignedBases(
m_Row);
1490 "restrict to the first N subjects seen for each query";
1499 int index_row =
m_Row;
1500 int alt_row =
abs(index_row - 1);
1505 if (it == ranks.
end()) {
1539 "Recompute a raw BLAST score for an arbitrary protein-to-DNA "
1540 "alignment, using a Spliced-seg as input. Computation is "
1541 "constrained to accept only protein-to-nucleotide Spliced-seg "
1542 "alignments and is slightly different than the raw BLAST score, "
1543 "in that gap computations differ due to the lack of true "
1544 "composition based statistics. These differences are minimal.";
1557 "CScore_TblastnScore: "
1558 "valid only for spliced-seg alignments");
1564 "CScore_TblastnScore: "
1565 "valid only for protein spliced-seg alignments");
1591 "Adjusted protein score (ratio of actual score to perfect score)";
1613 double perfect_score =
max(q_perfect, s_perfect);
1614 return perfect_score ? score / perfect_score : 0;
1625 "failed to retrieve sequence for " +
1633 seg.
SetIds().push_back(
id);
1634 seg.
SetIds().push_back(
id);
1660 <<
" exon. Note that this score has "
1661 "meaning only for Spliced-seg alignments, as would be generated "
1662 "by Splign or ProSplign, and only if it has at least one intron.";
1675 "CScore_EdgeExonInfo: "
1676 "valid only for spliced-seg alignments with at least one intron");
1687 if ((*score_it)->CanGetId() && (*score_it)->GetId().IsStr()
1688 && (*score_it)->GetId().GetStr() ==
"idty")
1690 return (*score_it)->GetValue().GetReal() * 100;
1727 if (db->GetDb() ==
"LocusID" && db->GetTag().IsId()) {
1728 return db->GetTag().GetId();
1746 ostr <<
"Gene ID of " << (
m_Row == 0 ?
"query" :
"subject");
1758 "failed to retrieve sequence for " +
1776 ostr <<
"CRC of the strucural parts of the alignment";
1798 "1 if rna Seq-feat based on this alignment is partial; "
1799 "0 if it is complete";
1816 if (feat->GetData().IsRna()) {
1817 return feat->IsSetPartial() && feat->GetPartial();
1822 "Can't generate rna sequence from alignment");
1834 "1 if query is a mRNA and its coding region has ribosomal "
1835 "slippage; 0 otherwise";
1847 "failed to retrieve sequence for " +
1853 feat_it->
GetExcept_text().find(
"ribosomal slippage") != string::npos;
1870 "Computes the percent of residues in the aligned "
1871 << (
m_Row == 0 ?
"query" :
"subject")
1872 <<
" region that would be filtered by 'seg'";
1884 "failed to retrieve sequence for " +
1890 "alignment filter requires that the requested "
1891 "sequence be a protein");
1910 SeqBufferSeg((
unsigned char *)seq.data(), seq.size(), 0, sp, &seq_locs);
1914 vector<size_t> counts(seq.size(), 0);
1916 for (
int i = itr->ssr->left; i <= itr->ssr->right; ++
i) {
1925 for (
const auto&
i : counts) {
1928 double val = count_x * 100.0 / seq.size();
1958 "Computes the value of Shannon's entropy for the specified "
1960 << (
m_Row == 0 ?
"query" :
"subject") <<
" region";
1972 "failed to retrieve sequence for " +
2008 (
"align_length_ungap",
2020 (
"query_gap_length",
2024 (
"subject_gap_length",
2028 (
"product_gap_length",
2032 (
"genomic_gap_length",
2060 (
"symmetric_overlap",
2065 (
"symmetric_overlap_min",
2070 (
"3prime_unaligned",
2096 (
"query_start", score));
2099 (
"5prime_unaligned", score));
2108 (
"internal_unaligned",
2113 (
"cds_internal_stops",
2125 (
"cds_pct_identity",
2129 (
"cds_pct_coverage",
2139 (
"subject_coverage",
2144 (
"align_length_ratio",
2160 (
"query_length", score));
2163 (
"product_length", score));
2189 (
"last_splice_site",
2204 (
"query_overlap_nogaps",
2214 (
"subject_overlap_nogaps",
2219 (
"query_subject_overlap",
2224 (
"query_subject_overlap_nogaps",
2229 (
"subject_ordinal_pos",
2234 (
"query_ordinal_pos",
2239 (
"prosplign_tblastn_score",
2244 (
"blast_score_ratio",
2273 (
"5prime_exon_pct_identity",
2280 (
"3prime_exon_pct_identity",
2314 (
"min_indel_to_splice",
2324 (
"ribosomal_slippage",
2342 const string &score_name)
2344 ostr <<
" * " << score_name << endl;
2349 ostr <<
" " << *
i << endl;
2355 ostr <<
"Build-in score names: " << endl;
2361 ostr <<
"Computed tokens: " << endl;
2379 token_it->second->PrintHelp(os);
2383 return "assumed to be a score on the Seq-align";
2397 return token_it->second->GetComplexity();
2404 const string &score_name)
2414 return token_it->second->IsInteger();
2418 if ((*stored_score_it)->CanGetValue() &&
2419 (*stored_score_it)->CanGetId() &&
2420 (*stored_score_it)->GetId().IsStr() &&
2421 (*stored_score_it)->GetId().GetStr() == score_name)
2423 return (*stored_score_it)->GetValue().IsInt();
2430 const string &score_name)
2433 if (align.GetNamedScore(score_name, score)) {
2453 return token_it->second->Get(align, &*
m_Scope);
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Int2 SeqBufferSeg(Uint1 *sequence, Int4 length, Int4 offset, SegParameters *sparamsp, BlastSeqLoc **seg_locs)
Runs seg on a protein sequence in ncbistdaa.
SegParameters * SegParametersNewAa(void)
Allocated SeqParameter struct for proteins and fills with default values.
void SegParametersFree(SegParameters *sparamsp)
Free SegParameters structure.
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
static const CTrans_table & GetTransTable(int id)
static const CGenetic_code_table & GetCodeTable(void)
set< TSeqPos > FindStops(const CSeq_align &align)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
position_type GetCoveredLength(void) const
Returns total length covered by ranges in this collection, i.e.
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
double GetPercentIdentity(CScope &scope, const CSeq_align &align, EPercentIdentityType type=eGapped)
int ComputeTieBreaker(const CSeq_align &align)
int GetBlastScore(CScope &scope, const CSeq_align &align)
Compute the BLAST score of the alignment.
double ComputeScore(CScope &scope, const CSeq_align &align, const CRangeCollection< TSeqPos > &ranges, CSeq_align::EScoreType score)
TScoreDictionary m_Scores
IScore::EComplexity Complexity(const string &score_name)
double GetScore(const objects::CSeq_align &align, const string &score_name)
Get requested score for alignment.
void UpdateState(const objects::CSeq_align &align)
void PrintDictionary(CNcbiOstream &)
Print out the dictionary of recognized score names.
string HelpText(const string &score_name)
Help text for score.
void x_PrintDictionaryEntry(CNcbiOstream &ostr, const string &score_name)
static int GetGeneId(const objects::CBioseq_Handle &bsh)
set< string > m_ScoresUsed
CRef< objects::CScope > m_Scope
bool IsIntegerScore(const objects::CSeq_align &align, const string &score_name)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
CScore_AlignLength(bool include_gaps)
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
CScore_AlignStartStop(int row, bool start)
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
double x_GetPerfectScore(CScope &scope, const CSeq_id_Handle &idh) const
CScore_BlastRatio(CScoreLookup &lookup)
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScoreLookup & m_ScoreLookup
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
CScore_CdsScore(EScoreType type)
const EScoreType m_ScoreType
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_EdgeExonInfo(EEdge edge, EInfoType type)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
CScore_FrameShifts(int row=-1, bool frameshifts=true)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_GapCount(bool count_bases, int row=-1, bool exon_specific=false)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
map< CSeq_id_Handle, TOrdinalPos > TIds
virtual double Get(const CSeq_align &align, CScope *) const
map< CSeq_id_Handle, size_t > TOrdinalPos
virtual bool IsInteger() const
CScore_OrdinalPos(int row)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
CScore_OverlapBoth(int row, bool include_gaps)
virtual EComplexity GetComplexity() const
virtual void UpdateState(const objects::CSeq_align &align)
For any IScore subclasses that have an internal state, this function will be called to update it for ...
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
map< pair< CSeq_id_Handle, CSeq_id_Handle >, CRangeCollection< TSeqPos > > TData
virtual EComplexity GetComplexity() const
virtual void UpdateState(const objects::CSeq_align &align)
For any IScore subclasses that have an internal state, this function will be called to update it for ...
virtual bool IsInteger() const
CRangeCollection< TSeqPos > m_CoveredRanges
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_Overlap(int row, bool include_gaps)
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_SequenceLength(int row)
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
CScore_StartStopCodon(bool start_codon)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_SymmetricOverlap(EType type)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_Taxid(int row, const string &rank="")
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
CScore_TblastnScore(CScoreLookup &lookup)
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScoreLookup & m_ScoreLookup
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
TLengthRange ExonLengthRange() const
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
TSeqPos GetNumGapOpeningsWithinRange(const TSeqRange &range, TDim row=-1) const
TLengthRange IntronLengthRange() const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
static string HelpText(EScoreType score)
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
double AlignLengthRatio() const
TSeqPos GetSeqStart(TDim row) const
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
TSeqPos GetNumFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
TSeqPos GetNumFrameshifts(TDim row=-1) const
Retrieves the number of times a given row shifts frames; i.e.
static const TScoreNameMap & ScoreNameMap()
TLengthRange GapLengthRange() const
static bool IsIntegerScore(EScoreType score)
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
TTaxId GetAncestorByRank(TTaxId id_tax, const char *rank_name)
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
bool IsAnyStart(int state) const
bool IsOrfStop(int state) const
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static int lookup(const char *name, const struct lookup_int *table)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
#define REVERSE_ITERATE(Type, Var, Cont)
ITERATE macro to reverse sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
CConstRef< CSeq_id > GetSeqId(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
void SetPacked_int(TPacked_int &v)
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
TTaxId GetTaxId(const CSeq_id &id, TGetFlags flags=0)
Get taxonomy id of bioseq Return -1 if sequence is not found Return 0 if sequence doesn't have taxono...
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
TSeqPos GetSequenceLength(const CSeq_id &id, TGetFlags flags=0)
Get sequence length Return kInvalidSeqPos if sequence is not found.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
const CSeqFeatData & GetData(void) const
CConstRef< CDbtag > GetNamedDbxref(const CTempString &db) const
Return a specified DB xref.
TSeqPos GetBioseqLength(void) const
bool IsSetExcept_text(void) const
TInst_Mol GetInst_Mol(void) const
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
const string & GetExcept_text(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
bool CanGetInst_Mol(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetCoding(TCoding coding)
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
TThisType & Set(position_type from, position_type to)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
const TTag & GetTag(void) const
Get the Tag member data.
TId GetId(void) const
Get the variant data.
@ eLim_circle
artificial break at origin of circle
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
bool CanGetProduct_length(void) const
Check if it is safe to call GetProduct_length method.
TLens & SetLens(void)
Assign a value to Lens data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
vector< CRef< CScore > > TScore
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool CanGetExons(void) const
Check if it is safe to call GetExons method.
const TAcceptor_before_exon & GetAcceptor_before_exon(void) const
Get the Acceptor_before_exon member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool CanGetProduct_type(void) const
Check if it is safe to call GetProduct_type method.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
list< CRef< CSpliced_seg_modifier > > TModifiers
const TScores & GetScores(void) const
Get the Scores member data.
TStarts & SetStarts(void)
Assign a value to Starts data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsStd(void) const
Check if variant Std is selected.
bool IsSetExons(void) const
set of segments involved each segment corresponds to one exon exons are always in biological order Ch...
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TBases & GetBases(void) const
Get the Bases member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
const Tdata & Get(void) const
Get the member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
TIds & SetIds(void)
Assign a value to Ids data member.
const TModifiers & GetModifiers(void) const
Get the Modifiers member data.
TNucpos GetNucpos(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsSetDonor_after_exon(void) const
Check if a value has been assigned to Donor_after_exon data member.
bool CanGetProduct_end(void) const
Check if it is safe to call GetProduct_end method.
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
list< CRef< CGenetic_code > > Tdata
const TLocation & GetLocation(void) const
Get the Location member data.
TFrame GetFrame(void) const
Get the Frame member data.
void SetData(TData &value)
Assign a value to Data data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
ENa_strand
strand of nucleic acid
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TFtable & GetFtable(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
unsigned int
A callback function used to compare two keys in a database.
double ComputeNormalizedProteinEntropy(const CTempString &sequence, size_t word_size)
Sequence Entropy Calculation.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static TSeqPos s_GetNaLength(CBioseq_Handle bsh)
Get sequence's length in nucleic acids.
static const CGenetic_code * s_GetGeneticCode(const CSeq_id &seq_id, CScope *scope)
#define row(bind, expected)
Used to hold a set of positions, mostly used for filtering.
struct BlastSeqLoc * next
next in linked list
Structure to hold parameters for seg search.