83 }
else if (
m_Row == 1) {
87 "only pairwise alignments are supported");
95 ostr <<
" Seq-id(s), separated by a ';'";
106 CScope::TIds::const_iterator
i = it;
108 if (
i != ids.end()) {
121 , m_GetIdType(id_type)
122 , m_TagOnly(tag_only)
132 ostr <<
" accession.version";
140 ostr <<
" id as it appears in alignment";
147 ostr <<
"; tag only for gnl seq-ids";
156 }
else if (
m_Row == 0) {
158 }
else if (
m_Row == 1) {
162 "only pairwise alignments are supported");
208 : m_Row(row), m_NoMinus(nominus)
214 ostr <<
"Start of alignment in ";
217 }
else if (
m_Row == 1) {
221 "only pairwise alignments are supported");
229 }
else if (
m_Row == 1) {
233 "only pairwise alignments are supported");
249 ostr <<
min(
r.GetFrom(),
r.GetTo()) + 1;
272 : m_Row(row), m_NoMinus(nominus)
278 ostr <<
"End of alignment in ";
281 }
else if (
m_Row == 1) {
285 "only pairwise alignments are supported");
293 }
else if (
m_Row == 1) {
297 "only pairwise alignments are supported");
311 ostr <<
max(
r.GetFrom(),
r.GetTo()) + 1;
339 ostr <<
"Strand of alignment in ";
342 }
else if (
m_Row == 1) {
346 "only pairwise alignments are supported");
354 }
else if (
m_Row == 1) {
358 "only pairwise alignments are supported");
393 ostr <<
"Length of ";
396 }
else if (
m_Row == 1) {
400 "only pairwise alignments are supported");
409 }
else if (
m_Row == 1) {
413 "only pairwise alignments are supported");
422 m_Row == 0 ?
"query_length" :
"subject_length");
423 if (score == numeric_limits<double>::quiet_NaN()) {
434 ostr <<
"Alignment length";
445 ostr << (
int)
m_Scores->GetScore(align,
"align_length");
452 ostr <<
"Alignment length not counting gaps";
457 ostr <<
"length_ungap";
463 ostr << (
int)
m_Scores->GetScore(align,
"align_length_ungap");
475 ostr <<
"Percentage of identical matches";
477 ostr <<
" excluding gaps on either row";
487 ostr <<
"(ungapped)";
494 double pct_id =
m_Scores->GetScore(align,
496 :
"pct_identity_ungap");
498 pct_id =
min(pct_id, 99.99);
507 ostr << (
m_Row == 0 ?
"Percent coverage of query in subject"
508 :
"Percent coverage of subject in query");
520 ?
"pct_coverage" :
"subject_coverage");
521 if (pct_cov != 100) {
522 pct_cov =
min(pct_cov, 99.99);
531 ostr <<
"Number of gap openings";
548 ostr <<
"Number of identical matches";
552 ostr <<
"identities";
557 ostr << (
int)
m_Scores->GetScore(align,
"num_ident");
564 ostr <<
"Number of mismatches";
573 ostr << (
int)
m_Scores->GetScore(align,
"num_mismatch");
586 bool is_gapped =
false;
597 range.SetFrom(start);
599 ranges.push_back(
range);
614 "smismatchpos and qmismatchpos currently do not handle "
615 "this type of alignment.");
626 ostr <<
"Positions of aligned mismatches, comma seperated";
631 ostr <<
"qmismatchpos";
632 }
else if(
m_Row == 1) {
633 ostr <<
"smismatchpos";
636 "only pairwise alignments are supported");
644 vector<TSeqPos> mm_pos;
653 "supported for protein alignments");
657 if (!exon->IsSetParts()) {
660 ENa_strand exon_qstrand = exon->IsSetProduct_strand()
661 ? exon->GetProduct_strand() : QStrand;
662 ENa_strand exon_sstrand = exon->IsSetGenomic_strand()
663 ? exon->GetGenomic_strand() : SStrand;
666 : exon->GetProduct_end().GetNucpos();
668 : exon->GetGenomic_end();
672 switch (part->Which()) {
674 pos += direction * part->GetMatch();
678 for (
unsigned i = 0;
i < part->GetMismatch(); ++
i) {
679 mm_pos.push_back(pos);
686 pos += direction * part->GetProduct_ins();
692 pos += direction * part->GetGenomic_ins();
698 "supported for alignments with diag");
708 string QueryStr, SubjtStr;
729 string QS = QueryStr.substr(0,50);
730 string SS = SubjtStr.substr(0,50);
733 list<TSeqRange> QSegRanges, SSegRanges;
739 list<TSeqRange>::const_iterator SSegIter = SSegRanges.begin();
740 ITERATE(list<TSeqRange>, QSegIter, QSegRanges) {
748 size_t QOffset, SOffset;
751 size_t QMOffset = QAlignRange.
GetTo()-QuerySeg.
GetTo();
755 size_t SMOffset = SAlignRange.
GetTo()-SubjtSeg.
GetTo();
760 for(
unsigned Loop = 0; Loop < QuerySeg.
GetLength(); Loop++) {
761 size_t QLoop = QOffset+Loop;
762 size_t SLoop = SOffset+Loop;
769 if(QueryStr[QLoop] == SubjtStr[SLoop]) {
785 sort(mm_pos.begin(), mm_pos.end());
786 ITERATE(vector<TSeqPos>, it, mm_pos) {
787 if (it != mm_pos.begin()) {
803 ostr <<
"Positions of gapped, unaligned, segments, comma seperated";
808 ostr <<
"qgapranges";
809 }
else if(
m_Row == 1) {
810 ostr <<
"sgapranges";
813 "only pairwise alignments are supported");
822 GappedRC -= AlignedRC;
827 if (it != GappedRC.
begin()) {
830 ostr << it->GetFrom()+1 <<
"-" << it->GetTo()+1;
838 ostr <<
"Total number of gaps";
854 ostr <<
"Expect value";
863 double score =
m_Scores->GetScore(align,
"e_value");
865 score == numeric_limits<double>::quiet_NaN()) {
876 ios_base::fmtflags cur_flags=ostr.flags();
879 ostr << scientific << score;
882 ostr.unsetf(ios_base::scientific);
885 ostr << setiosflags(cur_flags);
893 ostr <<
"Expect value in mantissa format";
897 ostr <<
"evalue_mantissa";
904 score =
m_Scores->GetScore(align,
"e_value");
907 score == numeric_limits<double>::quiet_NaN()) {
917 double mantissa = score;
921 while(mantissa >= 10.0) {
925 while(mantissa < 1.0) {
929 }
else if(score < 0.0) {
930 while(mantissa <= -10.0) {
934 while(mantissa > -1.0) {
948 ostr <<
"Expect value in exponent format";
952 ostr <<
"evalue_exponent";
959 score =
m_Scores->GetScore(align,
"e_value");
962 score == numeric_limits<double>::quiet_NaN()) {
972 double mantissa = score;
977 while(mantissa >= 10.0) {
981 while(mantissa < 1.0) {
985 }
else if(score < 0.0) {
986 while(mantissa <= -10.0) {
990 while(mantissa > -1.0) {
1007 ostr <<
"Bit score";
1016 double score =
m_Scores->GetScore(align,
"bit_score");
1024 ostr <<
"Raw score";
1033 double score =
m_Scores->GetScore(align,
"score");
1041 const string& col_name)
1042 : m_ScoreName(score_name)
1043 , m_ColName(col_name)
1085 ostr <<
"Entropy value for the "
1086 << (
m_Row == 0 ?
"query " :
"subject ")
1094 << (
m_Row == 0 ?
"query_" :
"subject_")
1100 const objects::CSeq_align& align)
1103 (
m_Row == 0 ?
"query_" :
"subject_") +
1117 ostr <<
"Entropy value for the "
1118 << (
m_Row == 0 ?
"query " :
"subject ")
1126 << (
m_Row == 0 ?
"query_" :
"subject_")
1132 const objects::CSeq_align& align)
1135 (
m_Row == 0 ?
"query_" :
"subject_") +
1155 ostr <<
"Defline of the ";
1158 }
else if (
m_Row == 1) {
1162 "only pairwise alignments are supported");
1164 ostr <<
" sequence";
1172 }
else if (
m_Row == 1) {
1176 "only pairwise alignments are supported");
1186 "indexing past the end of available "
1187 "sequences in an alignment");
1201 ostr <<
"Alignment ids";
1207 ostr <<
"align_ids";
1220 if ((*it)->IsId()) {
1221 ostr << (*it)->GetId();
1223 else if ((*it)->IsStr()) {
1224 ostr << (*it)->GetStr();
1234 ostr <<
"best_placement group id";
1239 ostr <<
"best_placement_group";
1256 ostr <<
f->GetData().GetStr();
1273 ostr <<
"Prot-ref of the ";
1276 }
else if (
m_Row == 1) {
1280 "only pairwise alignments are supported");
1282 ostr <<
" sequence";
1290 }
else if (
m_Row == 1) {
1294 "only pairwise alignments are supported");
1304 "indexing past the end of available "
1305 "sequences in an alignment");
1315 if (feat_iter.
GetSize() == 1) {
1329 ostr <<
"Dump the ";
1336 ostr << (
m_Sequence == 0 ?
"unaligned segment" :
"intron");
1342 ostr <<
" structure";
1351 ostr <<
" for the query sequence";
1354 ostr <<
" of a Spliced-seg alignment";
1378 ostr << (
m_Sequence == 0 ?
"unaligned" :
"introns");
1382 ostr << (
m_Sequence == 0 ?
"unaligned_len" :
"intron_len");
1402 +
" not supported for protein alignments");
1405 typedef pair<const CProt_pos*, const CProt_pos*> TProteinExon;
1406 vector<TProteinExon> protein_exons;
1407 vector<TSeqRange> nuc_exons;
1414 intron_ranges += align_range;
1422 protein_exons.push_back(
1435 nuc_exons.push_back(exon_range);
1439 intron_ranges -= exon_range;
1443 list<TSeqRange> range_list;
1444 if (!nuc_exons.empty()) {
1445 range_list.insert(range_list.end(), nuc_exons.begin(),
1447 }
else if (!intron_ranges.
Empty()) {
1448 range_list.insert(range_list.end(), intron_ranges.
begin(),
1449 intron_ranges.
end());
1454 range_list.reverse();
1459 ITERATE (vector<TProteinExon>, it, protein_exons) {
1460 if (it != protein_exons.begin()) {
1464 ostr <<
'(' << it->first->GetAmin()+1
1465 <<
'/' << it->first->GetFrame()
1466 <<
".." << it->second->GetAmin()+1
1467 <<
'/' << it->second->GetFrame() <<
')';
1470 ITERATE (list<TSeqRange>, it, range_list) {
1471 if (it != range_list.begin()) {
1485 ostr << it->GetLength();
1504 ostr <<
"Taxid of the ";
1507 }
else if (
m_Row == 1) {
1511 "only pairwise alignments are supported");
1513 ostr <<
" sequence";
1520 }
else if (
m_Row == 1) {
1524 "only pairwise alignments are supported");
1533 "indexing past the end of available "
1534 "sequences in an alignment");
1556 }
else if (
m_Row == 1) {
1560 "only pairwise alignments are supported");
1562 ostr <<
" sequence";
1569 }
else if (
m_Row == 1) {
1573 "only pairwise alignments are supported");
1583 "indexing past the end of available "
1584 "sequences in an alignment");
1594 desc_iter; ++desc_iter)
1599 ostr << desc_iter->GetComment().substr(
m_Prefix.size() + 2);
1625 ostr <<
"Full taxname of the ";
1629 ostr <<
"Species name of the ";
1633 ostr <<
"Genus name of the ";
1637 ostr <<
"Kingdom name of the ";
1642 case 0: ostr <<
"query";
break;
1643 case 1: ostr <<
"subject";
break;
1646 "only pairwise alignments are supported");
1648 ostr <<
" sequence";
1654 case 0: ostr <<
"q";
break;
1655 case 1: ostr <<
"s";
break;
1658 "only pairwise alignments are supported");
1663 case eSpecies: ostr <<
"species";
break;
1664 case eGenus: ostr <<
"genus";
break;
1665 case eKingdom: ostr <<
"kingdom";
break;
1674 "indexing past the end of available "
1675 "sequences in an alignment");
1685 m_Row == 0 ?
"query_taxid"
1686 :
"subject_taxid"));
1690 taxid =
m_Taxon1->GetSpecies(taxid);
1698 taxid =
m_Taxon1->GetSuperkingdom(taxid);
1705 bool is_species =
false;
1706 bool is_uncultured =
false;
1709 m_Taxon1->GetOrgRef(taxid, is_species, is_uncultured, blast_name);
1712 org->GetLabel(&
label);
1729 ostr <<
"size of biggest gap";
1734 ostr <<
"biggestgap";
1735 }
else if(
m_Row == 0) {
1736 ostr <<
"qbiggestgap";
1737 }
else if(
m_Row == 1) {
1738 ostr <<
"sbiggestgap";
1741 "only pairwise alignments are supported");
1761 for(
int Index = 0; Index < Denseg.
GetNumseg(); Index++) {
1762 bool QGap = (Denseg.
GetStarts()[2*Index] == -1);
1766 }
else if(
m_Row == 0 && QGap) {
1775 "biggestgap is only supported for Dense-sef and Disc alignments");
1790 }
else if (
m_Row == 1) {
1794 "only pairwise alignments are supported");
1796 ostr <<
" has a chromosome, its name";
1803 }
else if (
m_Row == 1) {
1807 "only pairwise alignments are supported");
1823 if( (*SubIter)->CanGetSubtype() &&
1825 (*SubIter)->CanGetName() ) {
1826 Chrom = (*SubIter)->GetName();
1847 }
else if (
m_Row == 1) {
1851 "only pairwise alignments are supported");
1853 ostr <<
" has a clone, its name";
1860 }
else if (
m_Row == 1) {
1864 "only pairwise alignments are supported");
1880 if( (*SubIter)->CanGetSubtype() &&
1882 (*SubIter)->CanGetName() ) {
1883 Clone = (*SubIter)->GetName();
1907 }
else if (
m_Row == 1) {
1911 "only pairwise alignments are supported");
1913 ostr <<
" sequence tech type";
1920 }
else if (
m_Row == 1) {
1924 "only pairwise alignments are supported");
1933 string TechStr =
"(none)";
1956 ostr <<
"Strand of alignment in ";
1959 }
else if (
m_Row == 1) {
1963 "only pairwise alignments are supported");
1965 ostr <<
", 'b' if both in a Disc-seg alignment";
1971 ostr <<
"qdiscstrand";
1972 }
else if (
m_Row == 1) {
1973 ostr <<
"sdiscstrand";
1976 "only pairwise alignments are supported");
1983 bool Plus=
false, Minus=
false;
1987 else if(Minus && !Plus)
1989 else if(Plus && Minus)
1994 bool& Plus,
bool& Minus)
2014 : m_ColName(col_name)
2021 ostr <<
"'" <<
m_Text <<
"' as fixed text";
2040 ostr <<
"length_ungap / size of aligned query sequence range";
2045 ostr <<
"align_len_ratio";
2055 ostr << double(align_length) / double(align_range);
2068 ostr <<
"Alignment CIGAR string";
2083 "cigar format only supports denseg alignments.");
2091 for(
int Loop = 0; Loop < NumSeg; Loop++) {
2092 int Length = Lens[Loop];
2095 if( Starts[ (Loop*2) ] == -1)
2097 else if( Starts[ (Loop*2)+1 ] == -1)
2102 ostr << Length <<
Code;
2112 : m_Row(row), m_Type(
type), m_Info(
info)
2129 ostr <<
"Accession of ";
2132 ostr <<
"Chain id of ";
2135 ostr <<
"Chromosome containing ";
2139 ostr << (
m_Type ==
eFull ?
"full assembly" :
"assembly unit") <<
" of ";
2143 }
else if (
m_Row == 1) {
2147 "only pairwise alignments are supported");
2149 ostr <<
" sequence";
2156 }
else if (
m_Row == 1) {
2160 "only pairwise alignments are supported");
2163 ostr << (
m_Type ==
eFull ?
"fullasm" :
"asmunit");
2175 ostr <<
"chromosome";
2197 ?
"Assembly Name" :
"Assembly Unit Name")
2202 if (obj.
HasField(
"Assembly Accession")) {
2204 ?
"Assembly Accession" :
"Assembly Unit Accession")
2213 if (obj.
HasField(
"GenColl Chain")) {
2247 Assm = Seq->GetFullAssembly();
2253 Assm.
Reset(unit_assm);
2273 size_t chain_start = accession.find_first_of(
"123456789");
2274 size_t chain_end = accession.find(
'.');
2275 ostr << accession.substr(chain_start, chain_end-chain_start);
2280 ostr << Seq->GetChrName();
2288 : m_Row(row), m_Gencoll(gencoll)
2294 ostr <<
"Patch type, if any, of ";
2297 }
else if (
m_Row == 1) {
2301 "only pairwise alignments are supported");
2303 ostr <<
" sequence";
2309 ostr <<
"qpatchtype";
2310 }
else if (
m_Row == 1) {
2311 ostr <<
"spatchtype";
2314 "only pairwise alignments are supported");
2329 if(Seq->CanGetPatch_type()) {
2340 : m_Row(row), m_Gencoll(gencoll)
2346 ostr <<
"Nearest Gap, if any, or edge, of ";
2349 }
else if (
m_Row == 1) {
2353 "only pairwise alignments are supported");
2355 ostr <<
" sequence";
2361 ostr <<
"qnearestgap";
2362 }
else if (
m_Row == 1) {
2363 ostr <<
"snearestgap";
2366 "only pairwise alignments are supported");
2373 list<TSeqRange>& Gaps)
2380 if(!Seq->CanGetStructure())
2385 if( (*DeltaIter)->IsLiteral()) {
2386 if (!(*DeltaIter)->GetLiteral().CanGetSeq_data() ||
2387 (*DeltaIter)->GetLiteral().GetSeq_data().IsGap()) {
2390 GapRange.
SetLength((*DeltaIter)->GetLiteral().GetLength());
2391 Gaps.push_back(GapRange);
2393 CurrStart += (*DeltaIter)->GetLiteral().
GetLength();
2394 }
else if( (*DeltaIter)->IsLoc()) {
2395 s_FindGaps(Assembly, *(*DeltaIter)->GetLoc().GetId(), CurrStart, Gaps);
2396 CurrStart += (*DeltaIter)->GetLoc().GetTotalRange().GetLength();
2411 list<TSeqRange> Gaps;
2414 if(SeqLength == 0) {
2425 ITERATE(list<TSeqRange>, GapIter, Gaps) {
2443 ostr <<
"Blast Traceback string";
2457 "btop format only supports denseg alignments.");
2468 : m_IndelType(indel_type)
2469 , m_CoordinateRow(coordinate_row)
2478 ostr <<
"List of frameshift indels";
2482 ostr <<
"List of non-frameshifting indels";
2486 ostr <<
"List of all indels wihin CDS";
2490 ostr <<
", coordinates on query sequence";
2498 ostr <<
"frameshifts";
2502 ostr <<
"non-frameshift indels";
2506 ostr <<
"indels in cds";
2510 ostr <<
" on query";
2521 "failed to retrieve sequence for " +
2536 vector<CSeq_align::SIndel> indels;
2572 ostr <<
"Gene symbol for " << (
m_Row == 0 ?
"query" :
"subject");
2577 ostr << (
m_Row == 0 ?
"query" :
"subject") <<
"_gene_symbol";
2624 unsigned snp_count = 0, snp_pos = 0;
2627 for (
unsigned deletion_pos = 0; deletion_pos <
subject.size();
2630 string subject_with_del =
subject;
2631 subject_with_del.insert(deletion_pos, 1,
query[deletion_pos]);
2632 if (
query == subject_with_del) {
2633 subject.insert(deletion_pos, 1,
'-');
2639 for (
unsigned index = 0; index <
query.size(); ++index) {
2646 if (snp_count == 1) {
2658 : m_CoordinateRow(row)
2665 ostr <<
"Mismatches or indels within start codon";
2667 ostr <<
", coordinates on query sequence";
2673 ostr <<
"Start codon changes";
2675 ostr <<
" on query";
2686 "failed to retrieve sequence for " +
2709 : m_CoordinateRow(row)
2716 ostr <<
"Mismatches or indels within stop codon";
2718 ostr <<
", coordinates on query sequence";
2724 ostr <<
"Stop codon changes";
2726 ostr <<
" on query";
2752 -> GetSeq().GetAnnot().front()
2753 -> GetData().GetFtable().front();
2754 cds->
SetData().SetCdregion().ResetCode_break();
2757 bool missing_stop =
false;
2760 trans.resize(trans.size() - 1);
2762 missing_stop =
true;
2766 for (
size_t changed_codons_count = 0, internal_stop_pos = trans.find(
'*');
2767 internal_stop_pos != string::npos || missing_stop;
2768 internal_stop_pos = trans.find(
'*', internal_stop_pos+1))
2770 if (internal_stop_pos == string::npos) {
2772 internal_stop_pos = trans.size() - 1;
2773 missing_stop =
false;
2781 if (changed_codons_count++) {
2793 const string &unavailable_string)
2794 : m_Scores(&scores), m_Ostr(ostr), m_UnavailableString(unavailable_string)
3075 formatter_it->second->SetGencoll(gencoll);
3085 const string &separators,
3086 vector<string> &toks)
3088 unsigned int paren_level = 0;
3091 if (!paren_level && separators.find(*char_it) != string::npos) {
3092 if (!next_tok.empty()) {
3093 toks.push_back(next_tok);
3098 if (*char_it ==
'(') {
3100 }
else if (*char_it ==
')') {
3103 "Unbalanced parentheses: " +
format);
3107 next_tok += *char_it;
3109 if (!next_tok.empty()) {
3110 toks.push_back(next_tok);
3114 "Unbalanced parentheses: " +
format);
3120 CRegexp re1(
"score\\(([^,]*),([^)]*)\\)");
3121 CRegexp re2(
"score\\(([^)]*)\\)");
3123 CRegexp text_re1(
"text\\(([^,]*),([^)]*)\\)");
3124 CRegexp text_re2(
"text\\(([^)]*)\\)");
3126 vector<string> toks;
3129 ITERATE (vector<string>, it, toks) {
3135 string score_name = re1.
GetSub(*it, 1);
3136 string col_name = re1.
GetSub(*it, 2);
3140 string score_name = re2.
GetSub(*it, 1);
3143 }
else if (text_re1.
IsMatch(s)) {
3144 string score_name = text_re1.
GetSub(*it, 1);
3145 string col_name = text_re1.
GetSub(*it, 2);
3148 }
else if (text_re2.
IsMatch(s)) {
3149 string score_name = text_re2.
GetSub(*it, 1);
3167 (*it)->PrintHeader(
m_Ostr);
3169 list< CIRef<IFormatter> >::const_iterator
i = it;
3184 (*it)->Print(
m_Ostr, align);
3194 list< CIRef<IFormatter> >::const_iterator
i = it;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
string GetAccession() const
Retrieve the accession for this assembly.
string GetName() const
Retrieve the name of this assembly.
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
ostream & AsString(ostream &s) const
void GetLabel(string *label) const
const_iterator end() const
const_iterator begin() const
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
vector< SIndel > GetNonFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
TDim CheckNumRows(void) const
Validatiors.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
vector< SIndel > GetFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
vector< SIndel > GetIndelsWithinRange(const TSeqRange &range, TDim row=-1) const
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
CTabularFormatter_FixedText(const string &col_name, const string &text)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceGi
return only a gi-based seq-id
@ eGetId_HandleDefault
returns the ID associated with a bioseq-handle
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
vector< CSeq_id_Handle > TIds
const CSeqFeatData & GetData(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
position_type GetLength(void) const
TThisType & SetLength(position_type length)
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ eNocase
Case insensitive compare.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
list< CRef< CSubSource > > TSubtype
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
TInt GetInt(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
TUnit & SetUnit(void)
Select the variant.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
const TDenseg & GetDenseg(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TLens & GetLens(void) const
Get the Lens member data.
vector< TSignedSeqPos > TStarts
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
list< CRef< CObject_id > > TId
list< CRef< CUser_object > > TExt
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
TDim GetDim(void) const
Get the Dim member data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
const TExt & GetExt(void) const
Get the Ext member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
const TLocation & GetLocation(void) const
Get the Location member data.
void SetData(TData &value)
Assign a value to Data data member.
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
bool IsGeneral(void) const
Check if variant General is selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
@ eNa_strand_both
in forward orientation
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TSource & GetSource(void) const
Get the variant data.
TTech GetTech(void) const
Get the Tech member data.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
bool CanGetTech(void) const
Check if it is safe to call GetTech method.
list< CRef< CDelta_seq > > Tdata
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ e_Comment
a more extensive comment
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
static const char * prefix[]
static void s_Split(const string &format, const string &separators, vector< string > &toks)
Split a string, but ignore separators within parentheses.
TSeqPos s_FindGaps(const CGC_Assembly &Assembly, const CSeq_id &Id, const TSeqPos Offset, list< TSeqRange > &Gaps)
void s_AlignToSeqRanges(const CSeq_align &align, int row, list< TSeqRange > &ranges)
static string s_CodonVariation(const CSeq_align &align, TSeqPos pos, CScope &scope, int row)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)