83 }
else if (
m_Row == 1) {
87 "only pairwise alignments are supported");
95 ostr <<
" Seq-id(s), separated by a ';'";
106 CScope::TIds::const_iterator
i = it;
108 if (
i != ids.end()) {
121 , m_GetIdType(id_type)
122 , m_TagOnly(tag_only)
132 ostr <<
" accession.version";
140 ostr <<
" id as it appears in alignment";
147 ostr <<
"; tag only for gnl seq-ids";
156 }
else if (
m_Row == 0) {
158 }
else if (
m_Row == 1) {
162 "only pairwise alignments are supported");
208 : m_Row(
row), m_NoMinus(nominus)
214 ostr <<
"Start of alignment in ";
217 }
else if (
m_Row == 1) {
221 "only pairwise alignments are supported");
229 }
else if (
m_Row == 1) {
233 "only pairwise alignments are supported");
249 ostr <<
min(
r.GetFrom(),
r.GetTo()) + 1;
272 : m_Row(
row), m_NoMinus(nominus)
278 ostr <<
"End of alignment in ";
281 }
else if (
m_Row == 1) {
285 "only pairwise alignments are supported");
293 }
else if (
m_Row == 1) {
297 "only pairwise alignments are supported");
311 ostr <<
max(
r.GetFrom(),
r.GetTo()) + 1;
339 ostr <<
"Strand of alignment in ";
342 }
else if (
m_Row == 1) {
346 "only pairwise alignments are supported");
354 }
else if (
m_Row == 1) {
358 "only pairwise alignments are supported");
393 ostr <<
"Length of ";
396 }
else if (
m_Row == 1) {
400 "only pairwise alignments are supported");
409 }
else if (
m_Row == 1) {
413 "only pairwise alignments are supported");
422 m_Row == 0 ?
"query_length" :
"subject_length");
423 if (score == numeric_limits<double>::quiet_NaN()) {
434 ostr <<
"Alignment length";
445 ostr << (
int)
m_Scores->GetScore(align,
"align_length");
452 ostr <<
"Alignment length not counting gaps";
457 ostr <<
"length_ungap";
463 ostr << (
int)
m_Scores->GetScore(align,
"align_length_ungap");
475 ostr <<
"Percentage of identical matches";
477 ostr <<
" excluding gaps on either row";
487 ostr <<
"(ungapped)";
494 double pct_id =
m_Scores->GetScore(align,
496 :
"pct_identity_ungap");
498 pct_id =
min(pct_id, 99.99);
507 ostr << (
m_Row == 0 ?
"Percent coverage of query in subject"
508 :
"Percent coverage of subject in query");
520 ?
"pct_coverage" :
"subject_coverage");
521 if (pct_cov != 100) {
522 pct_cov =
min(pct_cov, 99.99);
531 ostr <<
"Number of gap openings";
548 ostr <<
"Number of identical matches";
552 ostr <<
"identities";
557 ostr << (
int)
m_Scores->GetScore(align,
"num_ident");
564 ostr <<
"Number of mismatches";
573 ostr << (
int)
m_Scores->GetScore(align,
"num_mismatch");
586 bool is_gapped =
false;
597 range.SetFrom(start);
599 ranges.push_back(
range);
614 "smismatchpos and qmismatchpos currently do not handle "
615 "this type of alignment.");
626 ostr <<
"Positions of aligned mismatches, comma seperated";
631 ostr <<
"qmismatchpos";
632 }
else if(
m_Row == 1) {
633 ostr <<
"smismatchpos";
636 "only pairwise alignments are supported");
644 vector<TSeqPos> mm_pos;
653 "supported for protein alignments");
657 if (!exon->IsSetParts()) {
660 ENa_strand exon_qstrand = exon->IsSetProduct_strand()
661 ? exon->GetProduct_strand() : QStrand;
662 ENa_strand exon_sstrand = exon->IsSetGenomic_strand()
663 ? exon->GetGenomic_strand() : SStrand;
666 : exon->GetProduct_end().GetNucpos();
668 : exon->GetGenomic_end();
672 switch (part->Which()) {
674 pos += direction * part->GetMatch();
678 for (
unsigned i = 0;
i < part->GetMismatch(); ++
i) {
679 mm_pos.push_back(pos);
686 pos += direction * part->GetProduct_ins();
692 pos += direction * part->GetGenomic_ins();
698 "supported for alignments with diag");
708 string QueryStr, SubjtStr;
729 string QS = QueryStr.substr(0,50);
730 string SS = SubjtStr.substr(0,50);
733 list<TSeqRange> QSegRanges, SSegRanges;
739 list<TSeqRange>::const_iterator SSegIter = SSegRanges.begin();
740 ITERATE(list<TSeqRange>, QSegIter, QSegRanges) {
748 size_t QOffset, SOffset;
751 size_t QMOffset = QAlignRange.
GetTo()-QuerySeg.
GetTo();
755 size_t SMOffset = SAlignRange.
GetTo()-SubjtSeg.
GetTo();
760 for(
unsigned Loop = 0; Loop < QuerySeg.
GetLength(); Loop++) {
761 size_t QLoop = QOffset+Loop;
762 size_t SLoop = SOffset+Loop;
769 if(QueryStr[QLoop] == SubjtStr[SLoop]) {
785 sort(mm_pos.begin(), mm_pos.end());
786 ITERATE(vector<TSeqPos>, it, mm_pos) {
787 if (it != mm_pos.begin()) {
803 ostr <<
"Positions of gapped, unaligned, segments, comma seperated";
808 ostr <<
"qgapranges";
809 }
else if(
m_Row == 1) {
810 ostr <<
"sgapranges";
813 "only pairwise alignments are supported");
822 GappedRC -= AlignedRC;
827 if (it != GappedRC.
begin()) {
830 ostr << it->GetFrom()+1 <<
"-" << it->GetTo()+1;
838 ostr <<
"Total number of gaps";
854 ostr <<
"Expect value";
863 double score =
m_Scores->GetScore(align,
"e_value");
865 score == numeric_limits<double>::quiet_NaN()) {
876 ios_base::fmtflags cur_flags=ostr.flags();
879 ostr << scientific << score;
882 ostr.unsetf(ios_base::scientific);
885 ostr << setiosflags(cur_flags);
893 ostr <<
"Expect value in mantissa format";
897 ostr <<
"evalue_mantissa";
904 score =
m_Scores->GetScore(align,
"e_value");
907 score == numeric_limits<double>::quiet_NaN()) {
917 double mantissa = score;
920 while(mantissa >= 10.0) {
923 while(mantissa < 1.0) {
926 }
else if(score < 0.0) {
927 while(mantissa <= -10.0) {
930 while(mantissa > -1.0) {
943 ostr <<
"Expect value in exponent format";
947 ostr <<
"evalue_exponent";
954 score =
m_Scores->GetScore(align,
"e_value");
957 score == numeric_limits<double>::quiet_NaN()) {
967 double mantissa = score;
972 while(mantissa >= 10.0) {
976 while(mantissa < 1.0) {
980 }
else if(score < 0.0) {
981 while(mantissa <= -10.0) {
985 while(mantissa > -1.0) {
1002 ostr <<
"Bit score";
1011 double score =
m_Scores->GetScore(align,
"bit_score");
1019 ostr <<
"Raw score";
1028 double score =
m_Scores->GetScore(align,
"score");
1036 const string& col_name)
1037 : m_ScoreName(score_name)
1038 , m_ColName(col_name)
1080 ostr <<
"Entropy value for the "
1081 << (
m_Row == 0 ?
"query " :
"subject ")
1089 << (
m_Row == 0 ?
"query_" :
"subject_")
1095 const objects::CSeq_align& align)
1098 (
m_Row == 0 ?
"query_" :
"subject_") +
1112 ostr <<
"Entropy value for the "
1113 << (
m_Row == 0 ?
"query " :
"subject ")
1121 << (
m_Row == 0 ?
"query_" :
"subject_")
1127 const objects::CSeq_align& align)
1130 (
m_Row == 0 ?
"query_" :
"subject_") +
1150 ostr <<
"Defline of the ";
1153 }
else if (
m_Row == 1) {
1157 "only pairwise alignments are supported");
1159 ostr <<
" sequence";
1167 }
else if (
m_Row == 1) {
1171 "only pairwise alignments are supported");
1181 "indexing past the end of available "
1182 "sequences in an alignment");
1196 ostr <<
"Alignment ids";
1202 ostr <<
"align_ids";
1215 if ((*it)->IsId()) {
1216 ostr << (*it)->GetId();
1218 else if ((*it)->IsStr()) {
1219 ostr << (*it)->GetStr();
1229 ostr <<
"best_placement group id";
1234 ostr <<
"best_placement_group";
1251 ostr <<
f->GetData().GetStr();
1268 ostr <<
"Prot-ref of the ";
1271 }
else if (
m_Row == 1) {
1275 "only pairwise alignments are supported");
1277 ostr <<
" sequence";
1285 }
else if (
m_Row == 1) {
1289 "only pairwise alignments are supported");
1299 "indexing past the end of available "
1300 "sequences in an alignment");
1310 if (feat_iter.
GetSize() == 1) {
1324 ostr <<
"Dump the ";
1331 ostr << (
m_Sequence == 0 ?
"unaligned segment" :
"intron");
1337 ostr <<
" structure";
1346 ostr <<
" for the query sequence";
1349 ostr <<
" of a Spliced-seg alignment";
1373 ostr << (
m_Sequence == 0 ?
"unaligned" :
"introns");
1377 ostr << (
m_Sequence == 0 ?
"unaligned_len" :
"intron_len");
1397 +
" not supported for protein alignments");
1400 typedef pair<const CProt_pos*, const CProt_pos*> TProteinExon;
1401 vector<TProteinExon> protein_exons;
1402 vector<TSeqRange> nuc_exons;
1409 intron_ranges += align_range;
1417 protein_exons.push_back(
1430 nuc_exons.push_back(exon_range);
1434 intron_ranges -= exon_range;
1438 list<TSeqRange> range_list;
1439 if (!nuc_exons.empty()) {
1440 range_list.insert(range_list.end(), nuc_exons.begin(),
1442 }
else if (!intron_ranges.
Empty()) {
1443 range_list.insert(range_list.end(), intron_ranges.
begin(),
1444 intron_ranges.
end());
1449 range_list.reverse();
1454 ITERATE (vector<TProteinExon>, it, protein_exons) {
1455 if (it != protein_exons.begin()) {
1459 ostr <<
'(' << it->first->GetAmin()+1
1460 <<
'/' << it->first->GetFrame()
1461 <<
".." << it->second->GetAmin()+1
1462 <<
'/' << it->second->GetFrame() <<
')';
1465 ITERATE (list<TSeqRange>, it, range_list) {
1466 if (it != range_list.begin()) {
1480 ostr << it->GetLength();
1499 ostr <<
"Taxid of the ";
1502 }
else if (
m_Row == 1) {
1506 "only pairwise alignments are supported");
1508 ostr <<
" sequence";
1515 }
else if (
m_Row == 1) {
1519 "only pairwise alignments are supported");
1528 "indexing past the end of available "
1529 "sequences in an alignment");
1551 }
else if (
m_Row == 1) {
1555 "only pairwise alignments are supported");
1557 ostr <<
" sequence";
1564 }
else if (
m_Row == 1) {
1568 "only pairwise alignments are supported");
1578 "indexing past the end of available "
1579 "sequences in an alignment");
1589 desc_iter; ++desc_iter)
1594 ostr << desc_iter->GetComment().substr(
m_Prefix.size() + 2);
1620 ostr <<
"Full taxname of the ";
1624 ostr <<
"Species name of the ";
1628 ostr <<
"Genus name of the ";
1632 ostr <<
"Kingdom name of the ";
1637 case 0: ostr <<
"query";
break;
1638 case 1: ostr <<
"subject";
break;
1641 "only pairwise alignments are supported");
1643 ostr <<
" sequence";
1649 case 0: ostr <<
"q";
break;
1650 case 1: ostr <<
"s";
break;
1653 "only pairwise alignments are supported");
1658 case eSpecies: ostr <<
"species";
break;
1659 case eGenus: ostr <<
"genus";
break;
1660 case eKingdom: ostr <<
"kingdom";
break;
1669 "indexing past the end of available "
1670 "sequences in an alignment");
1680 m_Row == 0 ?
"query_taxid"
1681 :
"subject_taxid"));
1685 taxid =
m_Taxon1->GetSpecies(taxid);
1693 taxid =
m_Taxon1->GetSuperkingdom(taxid);
1700 bool is_species =
false;
1701 bool is_uncultured =
false;
1704 m_Taxon1->GetOrgRef(taxid, is_species, is_uncultured, blast_name);
1707 org->GetLabel(&
label);
1724 ostr <<
"size of biggest gap";
1729 ostr <<
"biggestgap";
1730 }
else if(
m_Row == 0) {
1731 ostr <<
"qbiggestgap";
1732 }
else if(
m_Row == 1) {
1733 ostr <<
"sbiggestgap";
1736 "only pairwise alignments are supported");
1756 for(
int Index = 0; Index < Denseg.
GetNumseg(); Index++) {
1757 bool QGap = (Denseg.
GetStarts()[2*Index] == -1);
1761 }
else if(
m_Row == 0 && QGap) {
1770 "biggestgap is only supported for Dense-sef and Disc alignments");
1785 }
else if (
m_Row == 1) {
1789 "only pairwise alignments are supported");
1791 ostr <<
" has a chromosome, its name";
1798 }
else if (
m_Row == 1) {
1802 "only pairwise alignments are supported");
1818 if( (*SubIter)->CanGetSubtype() &&
1820 (*SubIter)->CanGetName() ) {
1821 Chrom = (*SubIter)->GetName();
1842 }
else if (
m_Row == 1) {
1846 "only pairwise alignments are supported");
1848 ostr <<
" has a clone, its name";
1855 }
else if (
m_Row == 1) {
1859 "only pairwise alignments are supported");
1875 if( (*SubIter)->CanGetSubtype() &&
1877 (*SubIter)->CanGetName() ) {
1878 Clone = (*SubIter)->GetName();
1902 }
else if (
m_Row == 1) {
1906 "only pairwise alignments are supported");
1908 ostr <<
" sequence tech type";
1915 }
else if (
m_Row == 1) {
1919 "only pairwise alignments are supported");
1928 string TechStr =
"(none)";
1951 ostr <<
"Strand of alignment in ";
1954 }
else if (
m_Row == 1) {
1958 "only pairwise alignments are supported");
1960 ostr <<
", 'b' if both in a Disc-seg alignment";
1966 ostr <<
"qdiscstrand";
1967 }
else if (
m_Row == 1) {
1968 ostr <<
"sdiscstrand";
1971 "only pairwise alignments are supported");
1978 bool Plus=
false, Minus=
false;
1982 else if(Minus && !Plus)
1984 else if(Plus && Minus)
1989 bool& Plus,
bool& Minus)
2009 : m_ColName(col_name)
2016 ostr <<
"'" <<
m_Text <<
"' as fixed text";
2035 ostr <<
"length_ungap / size of aligned query sequence range";
2040 ostr <<
"align_len_ratio";
2050 ostr << double(align_length) / double(align_range);
2063 ostr <<
"Alignment CIGAR string";
2078 "cigar format only supports denseg alignments.");
2086 for(
int Loop = 0; Loop < NumSeg; Loop++) {
2087 int Length = Lens[Loop];
2090 if( Starts[ (Loop*2) ] == -1)
2092 else if( Starts[ (Loop*2)+1 ] == -1)
2097 ostr << Length <<
Code;
2124 ostr <<
"Accession of ";
2127 ostr <<
"Chain id of ";
2130 ostr <<
"Chromosome containing ";
2134 ostr << (
m_Type ==
eFull ?
"full assembly" :
"assembly unit") <<
" of ";
2138 }
else if (
m_Row == 1) {
2142 "only pairwise alignments are supported");
2144 ostr <<
" sequence";
2151 }
else if (
m_Row == 1) {
2155 "only pairwise alignments are supported");
2158 ostr << (
m_Type ==
eFull ?
"fullasm" :
"asmunit");
2170 ostr <<
"chromosome";
2192 ?
"Assembly Name" :
"Assembly Unit Name")
2197 if (obj.
HasField(
"Assembly Accession")) {
2199 ?
"Assembly Accession" :
"Assembly Unit Accession")
2208 if (obj.
HasField(
"GenColl Chain")) {
2242 Assm = Seq->GetFullAssembly();
2248 Assm.
Reset(unit_assm);
2268 size_t chain_start = accession.find_first_of(
"123456789");
2269 size_t chain_end = accession.find(
'.');
2270 ostr << accession.substr(chain_start, chain_end-chain_start);
2275 ostr << Seq->GetChrName();
2283 : m_Row(
row), m_Gencoll(gencoll)
2289 ostr <<
"Patch type, if any, of ";
2292 }
else if (
m_Row == 1) {
2296 "only pairwise alignments are supported");
2298 ostr <<
" sequence";
2304 ostr <<
"qpatchtype";
2305 }
else if (
m_Row == 1) {
2306 ostr <<
"spatchtype";
2309 "only pairwise alignments are supported");
2324 if(Seq->CanGetPatch_type()) {
2335 : m_Row(
row), m_Gencoll(gencoll)
2341 ostr <<
"Nearest Gap, if any, or edge, of ";
2344 }
else if (
m_Row == 1) {
2348 "only pairwise alignments are supported");
2350 ostr <<
" sequence";
2356 ostr <<
"qnearestgap";
2357 }
else if (
m_Row == 1) {
2358 ostr <<
"snearestgap";
2361 "only pairwise alignments are supported");
2368 list<TSeqRange>& Gaps)
2375 if(!Seq->CanGetStructure())
2380 if( (*DeltaIter)->IsLiteral()) {
2381 if (!(*DeltaIter)->GetLiteral().CanGetSeq_data() ||
2382 (*DeltaIter)->GetLiteral().GetSeq_data().IsGap()) {
2385 GapRange.
SetLength((*DeltaIter)->GetLiteral().GetLength());
2386 Gaps.push_back(GapRange);
2388 CurrStart += (*DeltaIter)->GetLiteral().
GetLength();
2389 }
else if( (*DeltaIter)->IsLoc()) {
2390 s_FindGaps(Assembly, *(*DeltaIter)->GetLoc().GetId(), CurrStart, Gaps);
2391 CurrStart += (*DeltaIter)->GetLoc().GetTotalRange().GetLength();
2406 list<TSeqRange> Gaps;
2409 if(SeqLength == 0) {
2420 ITERATE(list<TSeqRange>, GapIter, Gaps) {
2438 ostr <<
"Blast Traceback string";
2452 "btop format only supports denseg alignments.");
2463 : m_IndelType(indel_type)
2464 , m_CoordinateRow(coordinate_row)
2473 ostr <<
"List of frameshift indels";
2477 ostr <<
"List of non-frameshifting indels";
2481 ostr <<
"List of all indels wihin CDS";
2485 ostr <<
", coordinates on query sequence";
2493 ostr <<
"frameshifts";
2497 ostr <<
"non-frameshift indels";
2501 ostr <<
"indels in cds";
2505 ostr <<
" on query";
2516 "failed to retrieve sequence for " +
2531 vector<CSeq_align::SIndel> indels;
2567 ostr <<
"Gene symbol for " << (
m_Row == 0 ?
"query" :
"subject");
2572 ostr << (
m_Row == 0 ?
"query" :
"subject") <<
"_gene_symbol";
2619 unsigned snp_count = 0, snp_pos = 0;
2622 for (
unsigned deletion_pos = 0; deletion_pos <
subject.size();
2625 string subject_with_del =
subject;
2626 subject_with_del.insert(deletion_pos, 1,
query[deletion_pos]);
2627 if (
query == subject_with_del) {
2628 subject.insert(deletion_pos, 1,
'-');
2634 for (
unsigned index = 0; index <
query.size(); ++index) {
2641 if (snp_count == 1) {
2653 : m_CoordinateRow(
row)
2660 ostr <<
"Mismatches or indels within start codon";
2662 ostr <<
", coordinates on query sequence";
2668 ostr <<
"Start codon changes";
2670 ostr <<
" on query";
2681 "failed to retrieve sequence for " +
2704 : m_CoordinateRow(
row)
2711 ostr <<
"Mismatches or indels within stop codon";
2713 ostr <<
", coordinates on query sequence";
2719 ostr <<
"Stop codon changes";
2721 ostr <<
" on query";
2747 -> GetSeq().GetAnnot().front()
2748 -> GetData().GetFtable().front();
2749 cds->
SetData().SetCdregion().ResetCode_break();
2752 bool missing_stop =
false;
2755 trans.resize(trans.size() - 1);
2757 missing_stop =
true;
2761 for (
size_t changed_codons_count = 0, internal_stop_pos = trans.find(
'*');
2762 internal_stop_pos != string::npos || missing_stop;
2763 internal_stop_pos = trans.find(
'*', internal_stop_pos+1))
2765 if (internal_stop_pos == string::npos) {
2767 internal_stop_pos = trans.size() - 1;
2768 missing_stop =
false;
2776 if (changed_codons_count++) {
2788 const string &unavailable_string)
2789 : m_Scores(&scores), m_Ostr(ostr), m_UnavailableString(unavailable_string)
3070 formatter_it->second->SetGencoll(gencoll);
3080 const string &separators,
3081 vector<string> &toks)
3083 unsigned int paren_level = 0;
3086 if (!paren_level && separators.find(*char_it) != string::npos) {
3087 if (!next_tok.empty()) {
3088 toks.push_back(next_tok);
3093 if (*char_it ==
'(') {
3095 }
else if (*char_it ==
')') {
3098 "Unbalanced parentheses: " +
format);
3102 next_tok += *char_it;
3104 if (!next_tok.empty()) {
3105 toks.push_back(next_tok);
3109 "Unbalanced parentheses: " +
format);
3115 CRegexp re1(
"score\\(([^,]*),([^)]*)\\)");
3116 CRegexp re2(
"score\\(([^)]*)\\)");
3118 CRegexp text_re1(
"text\\(([^,]*),([^)]*)\\)");
3119 CRegexp text_re2(
"text\\(([^)]*)\\)");
3121 vector<string> toks;
3124 ITERATE (vector<string>, it, toks) {
3130 string score_name = re1.
GetSub(*it, 1);
3131 string col_name = re1.
GetSub(*it, 2);
3135 string score_name = re2.
GetSub(*it, 1);
3138 }
else if (text_re1.
IsMatch(s)) {
3139 string score_name = text_re1.
GetSub(*it, 1);
3140 string col_name = text_re1.
GetSub(*it, 2);
3143 }
else if (text_re2.
IsMatch(s)) {
3144 string score_name = text_re2.
GetSub(*it, 1);
3162 (*it)->PrintHeader(
m_Ostr);
3164 list< CIRef<IFormatter> >::const_iterator
i = it;
3179 (*it)->Print(
m_Ostr, align);
3189 list< CIRef<IFormatter> >::const_iterator
i = it;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
string GetAccession() const
Retrieve the accession for this assembly.
string GetName() const
Retrieve the name of this assembly.
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
ostream & AsString(ostream &s) const
void GetLabel(string *label) const
const_iterator end() const
const_iterator begin() const
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
vector< SIndel > GetNonFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
TDim CheckNumRows(void) const
Validatiors.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
vector< SIndel > GetFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
vector< SIndel > GetIndelsWithinRange(const TSeqRange &range, TDim row=-1) const
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
CTabularFormatter_FixedText(const string &col_name, const string &text)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceGi
return only a gi-based seq-id
@ eGetId_HandleDefault
returns the ID associated with a bioseq-handle
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
vector< CSeq_id_Handle > TIds
const CSeqFeatData & GetData(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
position_type GetLength(void) const
TThisType & SetLength(position_type length)
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ eNocase
Case insensitive compare.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
list< CRef< CSubSource > > TSubtype
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
TInt GetInt(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
TUnit & SetUnit(void)
Select the variant.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
const TDenseg & GetDenseg(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TLens & GetLens(void) const
Get the Lens member data.
vector< TSignedSeqPos > TStarts
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
list< CRef< CObject_id > > TId
list< CRef< CUser_object > > TExt
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
TDim GetDim(void) const
Get the Dim member data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
const TExt & GetExt(void) const
Get the Ext member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
const TLocation & GetLocation(void) const
Get the Location member data.
void SetData(TData &value)
Assign a value to Data data member.
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
bool IsGeneral(void) const
Check if variant General is selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
@ eNa_strand_both
in forward orientation
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TSource & GetSource(void) const
Get the variant data.
TTech GetTech(void) const
Get the Tech member data.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
bool CanGetTech(void) const
Check if it is safe to call GetTech method.
list< CRef< CDelta_seq > > Tdata
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ e_Comment
a more extensive comment
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define row(bind, expected)
static void s_Split(const string &format, const string &separators, vector< string > &toks)
Split a string, but ignore separators within parentheses.
TSeqPos s_FindGaps(const CGC_Assembly &Assembly, const CSeq_id &Id, const TSeqPos Offset, list< TSeqRange > &Gaps)
void s_AlignToSeqRanges(const CSeq_align &align, int row, list< TSeqRange > &ranges)
static string s_CodonVariation(const CSeq_align &align, TSeqPos pos, CScope &scope, int row)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)