72 using namespace sequence;
101 bool found_ambig =
false;
102 string::iterator it = seq_string.begin();
103 while (it != seq_string.end() && !found_ambig) {
104 if (*it !=
'A' && *it !=
'T' && *it !=
'C' && *it !=
'G' && *it !=
'U') {
111 "Feature comment indicates ambiguity in stop codon "
112 "but no ambiguities are present in stop codon.",
m_Feat);
120 bool suppress =
false;
129 "Apparent EC number in CDS comment");
142 "CDS has both RNA editing /exception and /transl_except qualifiers");
147 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \
148 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId())
153 is_nt = is_ng = is_nw = is_nc =
false;
157 switch (sid.
Which()) {
179 bool& is_nt,
bool& is_ng,
bool& is_nw,
bool& is_nc)
181 is_nt = is_ng = is_nw = is_nc =
false;
199 bool is_nt, is_ng, is_nw, is_nc;
213 (is_nt || is_ng || is_nw),
236 return (*it)->GetId();
250 }
else if ((*it)->IsName()) {
265 string error_message;
267 bool got_dash = transl_start ==
'-';
268 string codon_desc = got_dash ?
"illegal" :
"ambiguous";
270 " internal stops (and " + codon_desc +
" start codon). Genetic code [" + gccode +
"]";
273 " internal stops. Genetic code [" + gccode +
"]";
275 return error_message;
286 string error_message;
288 bool got_dash = transl_prot[0] ==
'-';
289 string codon_desc = got_dash ?
"illegal" :
"ambiguous";
291 " internal stops (and " + codon_desc +
" start codon). Genetic code [" + gccode +
"]";
294 " internal stops. Genetic code [" + gccode +
"]";
296 return error_message;
302 bool got_dash = first_char ==
'-';
303 string codon_desc = got_dash ?
"Illegal" :
"Ambiguous";
304 string p_word = got_dash ?
"Probably" :
"Possibly";
309 string error_message;
311 if (internal_stop_count > 0) {
312 error_message = codon_desc +
" start codon (and " +
314 " internal stops). " + p_word +
" wrong genetic code [" +
317 error_message = codon_desc +
" start codon used. Wrong genetic code [" +
318 gccode +
"] or protein should be partial";
320 return error_message;
346 "Unable to fetch CDS product '" +
label +
"'");
350 bool is_nt, is_ng, is_nw, is_nc;
360 "No protein Bioseq given");
363 bool unclassified_except =
false;
365 unclassified_except =
true;
373 "Unparsed transl_except qual (but protein is okay). Skipped");
376 "Unparsed transl_except qual. Skipped");
396 "Unable to translate");
406 "Unnecessary alternative start codon exception");
435 "Suspicious CDS location - reading frame > 1 but not 5' partial");
445 "Suspicious CDS location - reading frame > 1 and not at consensus splice site");
450 "Missing stop codon");
454 "Got stop codon, but 3'end is labeled partial");
458 "Start of location should probably be partial");
463 " base(s) past stop codon");
470 "] is more than 120% of the ";
480 bool rna_editing =
false;
493 msg +=
"translation length [" +
497 msg +=
" (RNA editing present)";
503 bool mismatch_except =
false;
505 mismatch_except =
true;
516 +
") are not equal");
521 "End of location should probably be partial");
525 "This SeqFeat should not be partial");
530 "CDS has exception but passes translation test");
535 "CDS has unclassified exception but only difference is "
542 "CDS has unnecessary translated product replaced exception");
574 size_t num_mismatches = mismatches.size();
576 if (num_mismatches > 10) {
583 ", residue in protein [";
584 msg += mismatches.front().prot_res;
585 msg +=
"] != translation [";
586 msg += mismatches.front().transl_res;
588 if (!nuclocstr.empty()) {
589 msg +=
" at " + nuclocstr;
594 ", residue in protein [";
595 msg += mismatches.back().prot_res;
596 msg +=
"] != translation [";
597 msg += mismatches.back().transl_res;
599 if (!nuclocstr.empty()) {
600 msg +=
" at " + nuclocstr;
609 msg +=
". Genetic code [" + gccode +
"]";
613 for (
size_t i = 0;
i < mismatches.size(); ++
i) {
615 if (mismatches[
i].pos == 0 && mismatches[
i].transl_res ==
'-') {
620 if (mismatches[
i].prot_res ==
'X' &&
621 (mismatches[
i].transl_res ==
'B' || mismatches[
i].transl_res ==
'Z' || mismatches[
i].transl_res ==
'J')) {
630 msg += mismatches[
i].prot_res;
631 msg +=
"] != translation [";
632 msg += mismatches[
i].transl_res;
634 if (!nuclocstr.empty()) {
635 msg +=
" at " + nuclocstr;
646 for (
auto it = problems.begin(); it != problems.end(); it++) {
648 switch (it->problem) {
650 if (!has_exception) {
652 "transl_except qual out of frame.");
656 msg =
"Suspicious transl_except ";
658 msg +=
" at first codon of complete CDS";
662 msg =
"Unnecessary transl_except ";
664 msg +=
" at position ";
670 msg =
"Unexpected transl_except ";
673 +
" just past end of protein";
691 const CSeq_loc& cbr_loc = cbr.
GetLoc();
693 if ( ((comp !=
eContained) && (comp !=
eSame)) || cbr_loc.IsNull() || cbr_loc.IsEmpty()) {
695 "Code-break location not in coding region");
703 "Code-break: SeqLoc [" + lbl +
"] out of range");
707 if (!p_loc || p_loc->
IsNull() || frame != 1) {
709 "Code-break location not in coding region - may be frame problem");
717 "Translation exception locations should not be partial");
721 string msg =
"Multiple code-breaks at same location ";
723 if ( !
str.empty() ) {
749 "An ORF coding region should not have a product");
754 if (feat_is_pseudo) {
756 "A pseudo coding region should not have a product");
759 "A coding region overlapped by a pseudogene should not have a product");
762 "A pseudo coding region should not have a product");
779 "Protein product not packaged in nuc-prot set with nucleotide in small genome set");
782 "Protein product not packaged in nuc-prot set with nucleotide");
787 if ( !pseudo && !conflict ) {
813 "Exception flag should be set in coding region");
817 "Use the proper genetic code, if available, "
818 "or set transl_excepts on specific codons");
821 "protein_id should not be a gbqual on a CDS feature");
824 "gene_synonym should not be a gbqual on a CDS feature");
827 "transcript_id should not be a gbqual on a CDS feature");
832 "conflicting codon_start values");
835 "codon_start value should be 1, 2, or 3");
953 }
catch (
const std::exception& ) {
973 "A coding region contains invalid genetic code [" +
NStr::IntToString(cdsgencode) +
"]");
982 if (biopgencode != cdsgencode
994 "Genetic code conflict between CDS (code " +
996 ") and BioSource.genome biological context (" +
1000 "Genetic code conflict between CDS (code " +
1002 ") and BioSource (code " +
1015 int num_short_exons = 0;
1021 size_t prev_len = 16;
1022 size_t prev_start = 0;
1023 size_t prev_stop = 0;
1025 if (prev_len <= 15) {
1027 if (!message.empty()) {
1033 prev_len = it.GetRange().GetLength();
1034 prev_start = it.GetRange().GetFrom();
1035 prev_stop = it.GetRange().GetTo();
1039 if (num_short_exons > 1) {
1041 "Coding region has multiple internal exons that are too short at positions " + message);
1042 }
else if (num_short_exons == 1) {
1044 "Internal coding region exon is too short at position " + message);
1103 bool supress =
false;
1114 "mRNA contains CDS but internal intron-exon boundaries "
1119 "mRNA overlaps or contains CDS but does not completely "
1120 "contain intervals");
1136 static const list<CSeqFeatData::ESubtype> parent_types = {
1145 feat_tree =
Ref(
new feature::CFeatTree());
1147 for (
auto parent_type : parent_types) {
1148 feat_tree->AddFeaturesFor(mappedFeat, parent_type);
1156 for (
auto parent_type : parent_types) {
1157 CMappedFeat parent = feat_tree->GetParent(fh, parent_type);
1226 if (!far_mrna_nps) {
1309 if (! cds_ref || ! pep_ref) {
1329 "Peptide under CDS matches small Gene");
1360 if (partial5 || partial3) {
1362 "CDS is partial but protein is complete");
1372 "CDS is 5' complete but protein is NH2 partial");
1381 "CDS is 3' partial but protein is NH2 partial");
1388 "CDS is 3' complete but protein is CO2 partial");
1397 "CDS is 5' partial but protein is CO2 partial");
1402 if (partial5 && partial3) {
1403 }
else if (partial5) {
1410 "CDS is 5' partial but protein has neither end");
1411 }
else if (partial3) {
1418 "CDS is 3' partial but protein has neither end");
1421 "CDS is complete but protein has neither end");
1443 "annotated by transcript or proteomic data",
1444 "artificial frameshift",
1445 "mismatches in translation",
1446 "rearrangement required for product",
1447 "reasons given in citation",
1448 "translated product replaced",
1449 "unclassified translation discrepancy"
1477 if (
last.GetRange().GetFrom() == 0) {
1506 if (
first.GetRange().GetFrom() == 0) {
1536 bool found_match =
false;
1545 const CSeq_id& sid = loc_i.GetSeq_id();
1564 if (prod_nps == nuc_nps) {
1570 return !found_match;
1576 if (
abs ((
int)this_start - (
int)last_stop) < 11) {
1577 shortlist.push_back(
TShortIntron(last_stop, this_start));
1578 }
else if (
abs ((
int)this_stop - (
int)last_start) < 11) {
1579 shortlist.push_back(
TShortIntron(last_start, this_stop));
1586 vector<CCdregionValidator::TShortIntron> shortlist;
1588 CSeq_loc_CI li(loc);
1590 TSeqPos last_start = li.GetRange().GetFrom();
1591 TSeqPos last_stop = li.GetRange().GetTo();
1593 last_id->
Assign(li.GetSeq_id());
1597 TSeqPos this_start = li.GetRange().GetFrom();
1598 TSeqPos this_stop = li.GetRange().GetTo();
1599 if (
abs ((
int)this_start - (
int)last_stop) < 11 ||
abs ((
int)this_stop - (
int)last_start) < 11) {
1600 if (li.GetSeq_id().Equals(*last_id)) {
1607 for (
auto id_it : last_bsh.
GetId()) {
1608 if (id_it.GetSeqId()->Equals(li.GetSeq_id())) {
1616 last_start = this_start;
1617 last_stop = this_stop;
1618 last_id->
Assign(li.GetSeq_id());
1641 if (shortlist.size() == 0) {
1647 if (nonsense_introns.size() > 0) {
1651 if (shortlist.size() == 1) {
1653 }
else if (shortlist.size() == 2) {
1658 for (
size_t i = 0;
i < shortlist.size() - 2;
i++) {
1664 "Introns at positions " + message +
" should be at least 10 nt long");
1688 "rearrangement required for product") !=
NPOS ) {
1695 "Expected CDS product absent");
1711 }
catch (
const runtime_error& ) {
1720 if ( transl_prot.empty() || prot_seq.empty() ||
NStr::Equal(transl_prot, prot_seq) ) {
1722 "Coding region conflict flag should not be set");
1725 "Coding region conflict flag is set");
1748 "Unable to find product Bioseq from CDS feature");
1769 "Same product Bioseq from multiple CDS features");
1789 "Coding region and protein feature partials conflict");
1796 if (vec.
IsInGap(pos) || vec[pos] ==
'N') {
1810 bool check_gaps =
false;
1816 bool has_abutting_gap =
false;
1824 pos = is_minus_strand ? start + 1 : start - 1;
1831 if (!has_abutting_gap) {
1849 pos = is_minus_strand ? stop - 1 : stop + 1;
1856 if (!has_abutting_gap) {
1883 if (contained_mrna.size() == 1) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_FEAT_PartialProblemMismatch5Prime
@ eErr_SEQ_FEAT_CDSmRNAmismatch
@ eErr_SEQ_FEAT_CDShasTooManyXs
@ eErr_SEQ_FEAT_TranslExceptPhase
@ eErr_SEQ_FEAT_InvalidCodonStart
@ eErr_SEQ_FEAT_SuspiciousFrame
@ eErr_SEQ_FEAT_UnnecessaryTranslExcept
@ eErr_SEQ_FEAT_PartialsInconsistentCDSProtein
@ eErr_SEQ_FEAT_MissingCDSproduct
@ eErr_SEQ_FEAT_TranslExceptAndRnaEditing
@ eErr_SEQ_FEAT_UnnecessaryException
@ eErr_SEQ_FEAT_PartialProblemMismatch3Prime
@ eErr_SEQ_FEAT_InternalStop
@ eErr_SEQ_FEAT_MisMatchAA
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_ProductFetchFailure
@ eErr_SEQ_FEAT_GenCodeMismatch
@ eErr_SEQ_FEAT_PseudoCdsHasProduct
@ eErr_SEQ_FEAT_ProductLength
@ eErr_SEQ_FEAT_CdTransFail
@ eErr_SEQ_FEAT_BadCDScomment
@ eErr_SEQ_FEAT_NoProtein
@ eErr_SEQ_FEAT_MultipleCDSproducts
@ eErr_SEQ_FEAT_EcNumberInCDSComment
@ eErr_SEQ_FEAT_ShortExon
@ eErr_SEQ_FEAT_StartCodon
@ eErr_SEQ_FEAT_OrfCdsHasProduct
@ eErr_SEQ_FEAT_ErroneousException
@ eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide
@ eErr_SEQ_FEAT_DuplicateTranslExcept
@ eErr_SEQ_FEAT_PartialProblemHasStop
@ eErr_SEQ_FEAT_PseudoCdsViaGeneHasProduct
@ eErr_SEQ_FEAT_CodonQualifierUsed
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_AltStartCodonException
@ eErr_SEQ_FEAT_WrongQualOnCDS
@ eErr_SEQ_FEAT_TranslExceptIsPartial
@ eErr_SEQ_FEAT_ConflictFlagSet
@ eErr_SEQ_FEAT_CDSproductPackagingProblem
@ eErr_SEQ_FEAT_BadConflictFlag
@ eErr_SEQ_FEAT_CDSmRNArange
@ eErr_SEQ_FEAT_TranslExcept
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_FEAT_TerminalXDiscrepancy
@ eErr_SEQ_FEAT_GenCodeInvalid
@ eErr_SEQ_FEAT_PseudoCDSmRNArange
@ eErr_SEQ_FEAT_MissingExceptionFlag
@ eErr_SEQ_FEAT_IntronIsStopCodon
static const char *const sc_BypassCdsPartialCheckText[]
const string s_PlastidTxt[20]
static void s_LocIdType(CBioseq_Handle bsh, bool &is_nt, bool &is_ng, bool &is_nw, bool &is_nc)
#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)
int GetGcodeForInternalStopErrors(const CCdregion &cdr)
string GetStartCodonErrorMessage(const CSeq_feat &feat, const char first_char, size_t internal_stop_count)
string GetInternalStopErrorMessage(const CSeq_feat &feat, size_t internal_stop_count, bool bad_start, char transl_start)
CStaticArraySet< const char *, PCase_CStr > TBypassCdsPartialCheckSet
static bool IsGeneticCodeValid(int gcode)
DEFINE_STATIC_ARRAY_MAP(TBypassCdsPartialCheckSet, sc_BypassCdsPartialCheck, sc_BypassCdsPartialCheckText)
int GetGcodeForName(const string &code_name)
static int s_GetStrictGenCode(const CBioSource &src)
CRef< CSeq_loc > GetLastCodonLoc(const CSeq_feat &cds, CScope &scope)
bool DoesCodingRegionHaveTerminalCodeBreak(const objects::CCdregion &cdr)
size_t GetProtLen() const
size_t GetProdTerminalX() const
vector< STranslExceptProblem > TTranslExceptProblems
size_t GetNumNonsenseIntrons() const
void CalculateTranslationProblems(const CSeq_feat &feat, CBioseq_Handle loc_handle, CBioseq_Handle prot_handle, bool ignore_exceptions, bool far_fetch_cds, bool standalone_annot, bool single_seq, bool is_gpipe, bool is_genomic, bool is_refseq, bool is_nt_or_ng_or_nw, bool is_nc, bool has_accession, CScope *scope)
@ eCDSTranslationProblem_NoStop
@ eCDSTranslationProblem_NoProtein
@ eCDSTranslationProblem_FrameNotConsensus
@ eCDSTranslationProblem_ShouldStartPartial
@ eCDSTranslationProblem_BadStart
@ eCDSTranslationProblem_FrameNotPartial
@ eCDSTranslationProblem_UnnecessaryException
@ eCDSTranslationProblem_UnableToFetch
@ eCDSTranslationProblem_ShouldNotBePartialButIs
@ eCDSTranslationProblem_ErroneousException
@ eCDSTranslationProblem_TooManyX
@ eCDSTranslationProblem_UnqualifiedException
@ eCDSTranslationProblem_StopPartial
@ eCDSTranslationProblem_ShouldBePartialButIsnt
static vector< CRef< CSeq_loc > > GetNonsenseIntrons(const CSeq_feat &feat, CScope &scope)
vector< STranslationMismatch > TTranslationMismatches
const TTranslationMismatches & GetTranslationMismatches() const
size_t GetTranslationProblemFlags() const
size_t GetInternalStopCodons() const
@ eTranslExceptUnnecessary
@ eTranslExceptSuspicious
@ eTranslExceptUnexpected
size_t GetTransLen() const
bool UnableToTranslate() const
int GetRaggedLength() const
char GetTranslStartCharacter() const
size_t GetTranslTerminalX() const
const TTranslExceptProblems & GetTranslExceptProblems() const
bool HasException() const
bool HasUnparsedTranslExcept() const
void x_ReportTranslationMismatches(const CCDSTranslationProblems::TTranslationMismatches &mismatches)
void x_ValidateParentPartialness()
void ReportShortIntrons()
bool x_CDS5primePartialTest() const
void x_ValidateSeqFeatLoc() override
void x_ValidateBadMRNAOverlap()
bool x_CDS3primePartialTest() const
static string x_FormatIntronInterval(const TShortIntron &interval)
void x_ValidateFarProducts()
static bool IsPlastid(int genome)
CConstRef< CSeq_feat > m_Gene
void x_ValidateGeneticCode()
bool x_IsProductMisplaced() const
static vector< TShortIntron > x_GetShortIntrons(const CSeq_loc &loc, CScope *scope)
pair< TSeqPos, TSeqPos > TShortIntron
void x_ValidateCodebreak()
void x_ValidateCDSPeptides()
CCdregionValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool x_BypassCDSPartialTest() const
void x_ValidateConflict()
void x_ReportTranslationProblems(const CCDSTranslationProblems &problems)
void x_ValidateProductPartials()
void x_ValidateCDSPartial()
void x_ValidateProductId()
bool x_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)
string MapToNTCoords(TSeqPos pos)
static void x_AddToIntronList(vector< TShortIntron > &shortlist, TSeqPos last_start, TSeqPos last_stop, TSeqPos this_start, TSeqPos this_stop)
void x_ValidateFeatComment() override
void x_ValidateCommonProduct()
void x_ValidateExceptText(const string &text) override
bool x_ReportOrigProteinId() override
void x_ReportTranslExceptProblems(const CCDSTranslationProblems::TTranslExceptProblems &problems, bool has_exception)
@Gb_qual.hpp User-defined methods of the data storage class.
static const CTrans_table & GetTransTable(int id)
static const CGenetic_code_table & GetCodeTable(void)
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
Exceptions for objmgr/util library.
@ eSubtype_transit_peptide_aa
@ eSubtype_sig_peptide_aa
@ eSubtype_mat_peptide_aa
namespace ncbi::objects::
void PostErr(EDiagSev sv, EErrType et, const string &msg)
CBioseq_Handle m_ProductBioseq
static bool s_IsPseudo(const CSeq_feat &feat)
CBioseq_Handle m_LocationBioseq
virtual void x_ValidateFeatComment()
void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)
void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)
static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)
void ValidateSplice(bool gene_pseudo, bool check_all)
virtual void x_ValidateSeqFeatLoc()
virtual void x_ValidateExceptText(const string &text)
static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
bool IsRemoteFetch() const
bool IsStandaloneAnnot() const
bool IsFarSequence(const CSeq_id &id)
bool IsHugeFileMode() const
bool x_IsFarFetchFailure(const CSeq_loc &loc)
bool RequireLocalProduct(const CSeq_id *sid) const
bool IsFarFetchCDSproducts() const
bool IgnoreExceptions() const
void SetFarFetchFailure()
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
const CSeq_entry & GetTSE() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void SetDiagFilter(EDiagFilter what, const char *filter_str)
Set diagnostic filter.
string GetDiagFilter(EDiagFilter what)
Get current diagnostic filter.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Critical
Critical error message.
@ eDiagFilter_All
for all non-FATAL
@ eDiagFilter_Post
for all non-TRACE, non-FATAL
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
EAccessionInfo
For IdentifyAccession (below)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
CSeq_id::E_Choice Which(void) const
@ eAcc_refseq_wgs_intermed
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CRef< CSeq_loc > ProductToSource(const CSeq_feat &feat, const CSeq_loc &prod_loc, TP2SFlags flags=0, CScope *scope=0)
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
CRef< CSeq_loc > SourceToProduct(const CSeq_feat &feat, const CSeq_loc &source_loc, TS2PFlags flags=0, CScope *scope=0, int *frame=0)
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ fS2P_AllowTer
map the termination codon as a legal location
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
@ eProductToLocation
Map from the feature's product to location.
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
TInst_Length GetInst_Length(void) const
bool IsSetInst(void) const
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetCoding(TCoding coding)
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static const char label[]
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetGenome(void) const
Check if it is safe to call GetGenome method.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetPgcode(void) const
plastid genetic code Check if a value has been assigned to Pgcode data member.
TMgcode GetMgcode(void) const
Get the Mgcode member data.
TGcode GetGcode(void) const
Get the Gcode member data.
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
TPgcode GetPgcode(void) const
Get the Pgcode member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
bool IsSetEc(void) const
E.C.
bool IsSetOrf(void) const
just an ORF ? Check if a value has been assigned to Orf data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
TOrf GetOrf(void) const
Get the Orf member data.
const TLoc & GetLoc(void) const
Get the Loc member data.
list< CRef< CGenetic_code > > Tdata
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsSetConflict(void) const
conflict Check if a value has been assigned to Conflict data member.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TCode & GetCode(void) const
Get the Code member data.
list< CRef< C_E > > Tdata
const TCdregion & GetCdregion(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
const TProduct & GetProduct(void) const
Get the Product member data.
const Tdata & Get(void) const
Get the member data.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
const TComment & GetComment(void) const
Get the Comment member data.
bool CanGetOrf(void) const
Check if it is safe to call GetOrf method.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TProt & GetProt(void) const
Get the variant data.
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
TExcept GetExcept(void) const
Get the Except member data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
TConflict GetConflict(void) const
Get the Conflict member data.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ eFrame_not_set
not set, code uses one
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsOther(void) const
Check if variant Other is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
bool IsGeneral(void) const
Check if variant General is selected.
const TOther & GetOther(void) const
Get the variant data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsSeq(void) const
Check if variant Seq is selected.
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
const TSource & GetSource(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool CanGetCompleteness(void) const
Check if it is safe to call GetCompleteness method.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_delta
sequence made by changes (delta) to others
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
static void text(MDB_val *v)
const struct ncbi::grid::netcache::search::fields::KEY key
#define FOR_EACH_CODEBREAK_ON_CDREGION(Itr, Var)
FOR_EACH_CODEBREAK_ON_CDREGION EDIT_EACH_CODEBREAK_ON_CDREGION.
#define FOR_EACH_GBQUAL_ON_FEATURE
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)