55 using namespace sequence;
58 : m_Feat(feat),
m_Scope(scope), m_Imp(imp), m_ProductIsFar(
false)
68 "The feature is missing a location");
73 bool lowerSev =
false;
77 const CDbtag& dbtag = **it;
78 if ( dbtag.
GetDb() ==
"dbSNP" ) {
86 "Location",
m_Feat, lowerSev);
125 "Inference or experiment qualifier missing but obsolete experimental evidence qualifier set");
151 if (loc.IsInt() || loc.IsWhole()) {
156 for (CSeq_loc_CI citer(loc); citer; ++citer) {
157 const CSeq_id& this_id = citer.GetSeq_id();
158 if (!
prev || !
prev->Equals(this_id)) {
163 prev.Reset(&this_id);
177 switch (sid.
Which()) {
190 "Feature product should not put an accession in the Textseq-id 'name' slot");
193 "Feature product should not use "
194 "Textseq-id 'name' slot");
209 if (id->Which() == sid.
Which()) {
211 string from_seq =
id->AsFastaString();
216 "Capitalization change from product location on feature to product sequence");
219 switch (id->Which()) {
232 "Protein bioseq has Textseq-id 'name' that "
233 "looks like it is derived from a nucleotide "
237 "Protein bioseq has Textseq-id 'name' and no accession");
254 bool is_seqloc_bond =
false;
258 for (CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
259 if (it.GetEmbeddingSeq_loc().IsBond()
260 && (!it.GetEmbeddingSeq_loc().GetBond().IsSetA()
261 || it.GetEmbeddingSeq_loc().GetBond().IsSetB())) {
262 is_seqloc_bond =
true;
267 for (CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
268 if (it.GetEmbeddingSeq_loc().IsBond()) {
269 is_seqloc_bond =
true;
275 for (CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
276 if (it.GetEmbeddingSeq_loc().IsBond()) {
277 is_seqloc_bond =
true;
282 return is_seqloc_bond;
293 if (both || both_rev) {
295 if (both && both_rev) {
296 suffix =
"(forward and reverse)";
299 }
else if (both_rev) {
306 label +
" may not be on both " +
suffix +
" strands");
315 for (CSeq_loc_CI it(loc); it; ++it) {
316 if (it.IsSetStrand()) {
324 if (both && both_rev) {
333 has_parent_gene_id =
false;
339 has_parent_gene_id =
true;
340 if ((*it)->IsSetTag() && (*it)->GetTag().Equals(
tag)) {
372 bool has_parent_gene_id =
false;
373 if (!
HasGeneIdXref(parent, (*it)->GetTag(), has_parent_gene_id)) {
374 if (has_parent_gene_id ||
380 parent = feat_tree->GetParent(parent);
397 if ((*pi)->IsEquiv()) {
399 "Citation on feature has unexpected internal Pub-equiv");
409 "empty inference string",
410 "bad inference prefix",
411 "bad inference body",
412 "single inference field",
413 "spaces in inference",
414 "possible comment in inference",
415 "same species misused",
416 "the value in the accession field is not legal. The only allowed value is accession.version, eg AF123456.1. Problem =",
417 "bad inference accession version",
418 "accession.version not public",
419 "bad accession type",
420 "unrecognized database",
435 "Qualifier other than replace has just quotation marks");
442 "Inference qualifier problem - empty inference string ()");
449 qual.
GetVal() +
" is not in proper EC_number format");
451 string ec_number = qual.
GetVal();
457 "EC_number " + ec_number +
" was deleted");
462 "EC_number " + ec_number +
" was replaced");
467 if (pos == string::npos || !
isdigit(ec_number.c_str()[pos + 1])) {
469 ec_number +
" is not a legal value for qualifier EC_number");
472 ec_number +
" is not a legal preliminary value for qualifier EC_number");
499 "/pseudogene value should not be '" + qual.
GetVal() +
"'",
m_Feat);
502 bool has_space =
false;
503 bool has_char_after_space =
false;
505 if (
isspace((
unsigned char)(*it))) {
507 }
else if (has_space) {
509 has_char_after_space =
true;
513 if (has_char_after_space) {
515 "Number qualifiers should not contain spaces");
520 "feature qualifier " + qual.
GetVal() +
" has SGML");
533 "Unable to find EC number file 'ecnum_ambiguous.txt' in data directory");
537 "Unable to find EC number file 'ecnum_deleted.txt' in data directory");
541 "Unable to find EC number file 'ecnum_replaced.txt' in data directory");
545 "Unable to find EC number file 'ecnum_specific.txt' in data directory");
554 for (
auto it : errors) {
556 it.first, it.second);
583 "Feature comment may refer to reference by serial number - "
584 "attach reference specific comments to the reference "
585 "REMARK instead.",
m_Feat);
589 "feature comment " + comment +
" has SGML",
617 "On partial Bioseq, SeqFeat.partial should be TRUE");
620 else if (is_partial &&
629 "When SeqFeat.product is a partial Bioseq, SeqFeat.location "
630 "should also be partial");
638 "Gene of 'order' with otherwise complete location should "
639 "have partial flag set");
645 bool is_far_fail =
false;
653 string str(
"Inconsistent: Product= complete, Location= ");
655 str +=
"Feature.partial= ";
656 str += is_partial ?
"TRUE" :
"FALSE";
659 }
else if (is_far_fail) {
667 string str(
"Inconsistent: ");
674 str +=
"Feature.partial= ";
675 str += is_partial ?
"TRUE" :
"FALSE";
688 "5' or 3' partial location should not have unclassified"
689 " partial in product molinfo descriptor");
704 "Bond location should only be on bond features");
709 string prefix =
"Feature";
728 if ((*it)->IsGi() || (*it)->IsGibbsq() || (*it)->IsGibbmt()) {
732 (*it)->WriteAsFasta(os2);
736 "Sequence identifier in feature location differs in capitalization with identifier on Bioseq");
744 "Feature on protein indicates negative strand");
751 vector<TSeqPos> gap_starts;
757 "Feature contains more than 50% Ns");
759 for (
auto gap_start : gap_starts) {
766 "Feature inside sequence gap");
771 "Internal interval begins or ends in gap");
775 "Feature crosses gap of unknown length");
780 string(
"Exception while checking for intervals in gaps. EXCEPTION: ") +
782 }
catch (
const std::exception&) {
827 while (map_iter && pos <= stop) {
830 for (; pos < map_end && pos <= stop; pos++) {
889 if ( (*it)->IsLoc() ) {
910 int num_unknown_gap = 0;
911 bool first_in_gap =
false, last_in_gap =
false;
912 bool local_first_gap =
false, local_last_gap =
false;
913 bool startsOrEndsInGap =
false;
916 for (CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
923 if (id_it->Equals(loc_it.GetSeq_id())) {
938 local_first_gap =
false;
939 local_last_gap =
false;
944 string::iterator it = vec_data.begin();
945 while (it != vec_data.end() && pos <
len) {
947 bool unknown_length =
false;
953 unknown_length =
true;
960 unknown_length =
true;
966 local_first_gap =
true;
967 }
else if (pos ==
len - 1) {
968 local_last_gap =
true;
970 if (unknown_length) {
975 }
else if (*it ==
'N') {
992 first_in_gap = local_first_gap;
995 last_in_gap = local_last_gap;
996 if (local_first_gap || local_last_gap) {
997 startsOrEndsInGap =
true;
1001 if (num_real == 0 && num_n == 0) {
1011 if (num_gap == 0 && num_unknown_gap == 0 && num_n == 0) {
1013 }
else if (first_in_gap || last_in_gap) {
1018 gap_starts.push_back(gap_start);
1022 }
else if (num_real == 0 && num_gap == 0 && num_unknown_gap == 0 && num_n >= 50) {
1024 }
else if (startsOrEndsInGap) {
1026 }
else if (num_unknown_gap > 0) {
1050 if ((*it)->IsLiteral()) {
1051 len = (*it)->GetLiteral().GetLength();
1052 }
else if ((*it)->IsLoc()) {
1073 for (CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
1080 if (id_it->Equals(loc_it.GetSeq_id())) {
1096 string::iterator it = vec_data.begin();
1097 while (it != vec_data.end()) {
1105 if ((
unsigned)(*it + 1) <= 256 &&
isalpha(*it)) {
1113 }
catch (
const std::exception& ) {
1118 return (num_n > real_bases);
1129 const CSeq_id* protid =
nullptr;
1154 if (!prot_handle && look_far) {
1167 bool look_far =
false;
1188 "Exception text is present, but exception flag is not set");
1192 "Exception flag is set, but exception text is empty");
1202 if (
text.empty())
return;
1209 bool reasons_in_cit =
false;
1210 bool annotated_by_transcript_or_proteomic =
false;
1211 bool redundant_with_comment =
false;
1212 bool refseq_except =
false;
1213 vector<string> exceptions;
1215 ITERATE(vector<string>, it, exceptions) {
1225 reasons_in_cit =
true;
1227 annotated_by_transcript_or_proteomic =
true;
1232 bool check_refseq =
false;
1234 check_refseq =
true;
1236 check_refseq =
true;
1239 if ((*id_it)->IsOther()) {
1240 check_refseq =
true;
1249 refseq_except =
true;
1262 str +
" is not a legal exception explanation");
1269 redundant_with_comment =
true;
1271 redundant_with_comment =
true;
1275 if (redundant_with_comment) {
1277 "Exception explanation text is also found in feature comment");
1279 if (refseq_except) {
1282 if (!found_just_the_exception) {
1284 "Genome processing exception should not be combined with other explanations");
1290 "Reasons given in citation exception does not have the required citation");
1292 if (annotated_by_transcript_or_proteomic) {
1293 bool has_inference =
false;
1296 has_inference =
true;
1300 if (!has_inference) {
1302 "Annotated by transcript or proteomic data exception does not have the required inference qualifier");
1326 bool is_imp =
false;
1350 const string& qual_str = gbq->GetQual();
1356 auto gbqual = gbqual_and_value.first;
1361 qual_str +
" is improperly capitalized");
1372 "Unknown qualifier " + qual_str);
1387 if (
NStr::Equal(qual_str,
"orig_transcript_id")) {
1391 if (
NStr::Equal(qual_str,
"orig_transcript_id")) {
1401 "Wrong qualifier " + qual_str +
" for feature " +
1407 "feat_class qualifier is only legal for RefSeq");
1412 const string&
val = gbq->GetVal();
1418 "Compound '" +
val +
"' must be split into separate instances of qualifier " + qual_str);
1422 val +
" is not a legal value for qualifier " + qual_str);
1452 val +
" is not a legal value for qualifier " + qual_str);
1464 "Vector Contamination region should be trimmed from sequence");
1473 "A product qualifier is not used on a gene feature");
1481 "locus-tag values should be on genes");
1494 bool multiple_rpt_unit =
false;
1498 }
else if ( *it ==
'(' || *it ==
')' ||
1499 *it ==
',' || *it ==
'.' ||
1500 isdigit((
unsigned char)(*it)) ) {
1501 multiple_rpt_unit =
true;
1511 !multiple_rpt_unit ) {
1513 bool just_nuc_letters =
true;
1514 static const string nuc_letters =
"ACGTNacgtn";
1516 if ( nuc_letters.find(*it) ==
NPOS ) {
1517 just_nuc_letters =
false;
1522 if ( just_nuc_letters ) {
1524 if ( !vec.
empty() ) {
1529 "repeat_region /rpt_unit and underlying "
1530 "sequence do not match");
1536 "Length of rpt_unit_seq is greater than feature length");
1548 const char *cp =
val.c_str();
1549 bool badchars =
false;
1550 while (*cp != 0 && !badchars) {
1553 }
else if (*cp !=
'(' && *cp !=
')'
1555 && *cp !=
',' && *cp !=
';') {
1562 "/rpt_unit_seq has illegal characters");
1570 if (
str.length() > 25) {
1574 if (pos == string::npos) {
1578 int tmp_from, tmp_to;
1586 }
catch (
const std::exception& ) {
1589 if (tmp_from < 0 || tmp_to < 0) {
1601 "/rpt_unit_range is not a base range");
1604 if (from - 1 <
range.GetFrom() || from - 1>
range.GetTo() || to - 1 <
range.GetFrom() || to - 1 >
range.GetTo()) {
1606 "/rpt_unit_range is not within sequence length");
1608 bool nulls_between =
false;
1611 nulls_between =
true;
1614 if (nulls_between) {
1615 bool in_range =
false;
1617 range = it.GetEmbeddingSeq_loc().GetTotalRange();
1618 if (from - 1 <
range.GetFrom() || from - 1>
range.GetTo() || to - 1 <
range.GetFrom() || to - 1 >
range.GetTo()) {
1625 "/rpt_unit_range is not within ordered intervals");
1635 bool only_digits =
true,
1639 if (
isspace((
unsigned char)(*it)) ) {
1642 if ( !
isdigit((
unsigned char)(*it)) ) {
1643 only_digits =
false;
1646 if (only_digits || has_spaces) {
1658 val +
" accession missing version for qualifier compare");
1661 val +
" accession has bad version for qualifier compare");
1664 val +
" is not a legal accession for qualifier compare");
1667 "RefSeq accession " +
val +
" cannot be used for qualifier compare");
1675 const char *src =
str.c_str();
1676 const char *find = consist.c_str();
1679 while (*src != 0 && rval) {
1680 if (strchr (find, *src) ==
NULL) {
1696 val +
" is not a legal value for qualifier " + qual_str
1697 +
" - should only be composed of acgt unambiguous nucleotide bases");
1701 val +
" is not a legal value for qualifier " + qual_str
1702 +
" - should only be composed of acgtmrwsykvhdbn nucleotide bases");
1707 val +
" is not a legal value for qualifier " + qual_str
1708 +
" - should only be composed of acdefghiklmnpqrstuvwy* amino acids");
1713 bool has_fuzz =
false;
1715 if (it.IsPoint() && (it.GetFuzzFrom() || it.GetFuzzTo())) {
1726 "/replace already matches underlying sequence (" +
val +
")");
1729 }
catch (
const std::exception& ) {
1740 field_name +
" contains undesired character");
1744 field_name +
" ends with undesired character");
1749 field_name +
" ends with hyphen");
1770 "feature has exception but passes splice site test");
1791 "Bad sequence at splice donor after exon ending at position "
1795 "Splice donor consensus (GT) not found after exon ending at position "
1806 "Bad sequence at splice acceptor before exon starting at position "
1810 "Splice acceptor consensus (AG) not found before exon starting at position "
1821 for (
auto it = donor_problems.begin(); it != donor_problems.end(); it++) {
1825 for (
auto it = acceptor_problems.begin(); it != acceptor_problems.end(); it++) {
1835 if ((*it)->IsOther() && (*it)->GetOther().IsSetAccession()
1876 if ((*it)->IsOther()) {
1908 " for feature " +
key);
1919 if (strand1 == strand2) {
1947 "Gene cross-reference is not on expected strand");
1955 bool equivalent =
false;
1995 bool has_gene_id_xref =
false;
1998 if ((*xref)->IsSetId() && (*xref)->GetId().IsLocal()) {
2001 if (gene_feats.size() > 0) {
2002 has_gene_id_xref =
true;
2010 if (has_gene_id_xref) {
2022 size_t num_genes = 0;
2024 size_t num_trans_spliced = 0;
2025 bool equivalent =
false;
2037 size_t num_match_by_locus = 0;
2038 size_t num_match_by_locus_tag = 0;
2040 for ( ; gene_it; ++gene_it) {
2044 num_match_by_locus++;
2050 num_match_by_locus_tag++;
2056 "Feature has Gene Xref with locus_tag but no locus, gene with locus_tag and locus exists");
2063 if (
len <
max || num_genes == 0) {
2066 num_trans_spliced = 0;
2069 num_trans_spliced++;
2072 prev_gene = gene_it;
2078 num_trans_spliced++;
2087 if (num_genes > 1 &&
2092 }
else if (equivalent) {
2094 "Feature overlapped by "
2096 +
" identical-length equivalent genes but has no cross-reference");
2099 "Feature overlapped by "
2101 +
" identical-length genes but has no cross-reference");
2103 }
else if (num_genes == 1
2109 const CGb_qual& qual = **qual_iter;
2115 "Redundant allele qualifier (" + allele +
2116 ") on gene and feature");
2119 "Mismatched allele qualifier on gene (" + allele +
2120 ") and feature (" + qual.
GetVal() +
")");
2131 const string& allele = gene_xref->
GetAllele();
2134 const CGb_qual& qual = **qual_iter;
2140 "Redundant allele qualifier (" + allele +
2141 ") on gene and feature");
2144 "Mismatched allele qualifier on gene (" + allele +
2145 ") and feature (" + qual.
GetVal() +
")");
2151 if (num_match_by_locus == 0 && num_match_by_locus_tag == 0) {
2163 const CSeq_id*
id = loc.GetId();
2181 "Feature has gene locus_tag cross-reference but no equivalent gene feature exists");
2186 "Feature has gene locus cross-reference but no equivalent gene feature exists");
2205 if (it->IsSetQual() &&
NStr::Equal(it->GetQual(),
"old_locus_tag")
2228 for (
auto it : feat.
GetQual()) {
2268 string gene_old_locus_tag;
2271 if ((*it)->IsSetQual() &&
NStr::Equal ((*it)->GetQual(),
"old_locus_tag")
2273 gene_old_locus_tag = (*it)->GetVal();
2280 "Old locus tag on feature (" + old_locus_tag
2281 +
") does not match that on gene (" + gene_old_locus_tag +
")");
2292 "old_locus_tag without inherited locus_tag");
2307 if ( imp_loc.find(
"one-of") != string::npos ) {
2309 "ImpFeat loc " + imp_loc +
2310 " has obsolete 'one-of' text for feature " +
key);
2315 if ( imp_loc != temp_loc ) {
2317 "ImpFeat loc " + imp_loc +
" does not equal feature location " +
2318 temp_loc +
" for feature " +
key);
2351 if ((*it)->IsOther()) {
2374 " for feature " +
key);
2418 if ((*it)->IsOther() && (*it)->GetTextseq_Id()->IsSetAccession()
2437 bool has_sfp_pseudo =
false;
2438 bool has_gene_pseudo =
false;
2441 if (it->IsSetQual() &&
2444 sfp_pseudo = it->GetVal();
2445 has_sfp_pseudo =
true;
2450 for (
auto it : gene->
GetQual()) {
2451 if (it->IsSetQual() &&
2454 gene_pseudo = it->GetVal();
2455 has_gene_pseudo =
true;
2460 if (!has_sfp_pseudo && !has_gene_pseudo) {
2462 }
else if (!has_sfp_pseudo) {
2464 }
else if (has_sfp_pseudo && !has_gene_pseudo) {
2466 msg +=
" has pseudogene qualifier, gene does not";
2470 string msg =
"Different pseudogene values on ";
2472 msg +=
" (" + sfp_pseudo +
") and gene (" + gene_pseudo +
")";
2523 "Gene locus_tag does not match general ID of product");
2534 for (
char ch : src) {
2535 unsigned char chu = ch;
2536 if (chu > 31 && chu < 128) {
2552 const string&
str = *it;
2554 const char& ch = *c_it;
2555 unsigned char chu = ch;
2556 if (ch > 127 || (ch < 32 && ch !=
'\t' && ch !=
'\r' && ch !=
'\n')) {
2573 for (
auto it :
prot.GetName()) {
2574 if (
prot.IsSetEc() && !
prot.IsSetProcessed()
2580 "Unknown or hypothetical protein should not have EC number");
2587 "protein description " +
prot.GetDesc() +
" has SGML");
2593 "Comment has same value as protein description");
2598 "Apparent EC number in protein comment");
2605 if (
prot.IsSetName() &&
prot.GetName().size() > 0) {
2608 "Apparent EC number in protein title");
2613 if (
prot.CanGetDb () ) {
2616 if ( (!
prot.IsSetName() ||
prot.GetName().empty()) &&
2617 (!
prot.IsSetProcessed()
2622 "Protein feature has description but no name");
2623 }
else if (
prot.IsSetActivity() && !
prot.GetActivity().empty()) {
2625 "Protein feature has function but no name");
2626 }
else if (
prot.IsSetEc() && !
prot.GetEc().empty()) {
2628 "Protein feature has EC number but no name");
2631 "Protein feature has no name");
2646 if (
prot.IsSetProcessed() ) {
2647 processed =
prot.GetProcessed();
2653 if (
prot.IsSetName() &&
2654 !
prot.GetName().empty() &&
2655 !
prot.GetName().front().empty() ) {
2658 if (
prot.CanGetDesc() && !
prot.GetDesc().empty() ) {
2661 if (
prot.CanGetEc() && !
prot.GetEc().empty() ) {
2664 if (
prot.CanGetActivity() && !
prot.GetActivity().empty() ) {
2667 if (
prot.CanGetDb() && !
prot.GetDb().empty() ) {
2673 "There is a protein feature where all fields are empty");
2682 "'hypothetical protein",
2685 "alternatively spliced",
2686 "bacteriophage hypothetical protein",
2689 "cnserved hypothetical protein",
2690 "conesrved hypothetical protein",
2691 "conserevd hypothetical protein",
2692 "conserved archaeal protein",
2693 "conserved domain protein",
2694 "conserved hypohetical protein",
2695 "conserved hypotehtical protein",
2696 "conserved hypotheical protein",
2697 "conserved hypothertical protein",
2698 "conserved hypothetcial protein",
2699 "conserved hypothetical",
2700 "conserved hypothetical exported protein",
2701 "conserved hypothetical integral membrane protein",
2702 "conserved hypothetical membrane protein",
2703 "conserved hypothetical phage protein",
2704 "conserved hypothetical prophage protein",
2705 "conserved hypothetical protein",
2706 "conserved hypothetical protein - phage associated",
2707 "conserved hypothetical protein fragment 3",
2708 "conserved hypothetical protein, fragment",
2709 "conserved hypothetical protein, putative",
2710 "conserved hypothetical protein, truncated",
2711 "conserved hypothetical protein, truncation",
2712 "conserved hypothetical protein.",
2713 "conserved hypothetical protein; possible membrane protein",
2714 "conserved hypothetical protein; putative membrane protein",
2715 "conserved hypothetical proteins",
2716 "conserved hypothetical protien",
2717 "conserved hypothetical transmembrane protein",
2718 "conserved hypotheticcal protein",
2719 "conserved hypthetical protein",
2720 "conserved in bacteria",
2721 "conserved membrane protein",
2722 "conserved protein",
2723 "conserved protein of unknown function",
2724 "conserved protein of unknown function ; putative membrane protein",
2725 "conserved unknown protein",
2726 "conservedhypothetical protein",
2727 "conserverd hypothetical protein",
2728 "conservered hypothetical protein",
2729 "consrved hypothetical protein",
2730 "converved hypothetical protein",
2734 "duplicated hypothetical protein",
2739 "homeodomain protein",
2741 "hyopthetical protein",
2743 "hypotheical protein",
2744 "hypothertical protein",
2745 "hypothetcical protein",
2747 "hypothetical protein",
2748 "hypothetical conserved protein",
2749 "hypothetical exported protein",
2750 "hypothetical novel protein",
2752 "hypothetical phage protein",
2753 "hypothetical prophage protein",
2754 "hypothetical protein (fragment)",
2755 "hypothetical protein (multi-domain)",
2756 "hypothetical protein (phage associated)",
2757 "hypothetical protein - phage associated",
2758 "hypothetical protein fragment",
2759 "hypothetical protein fragment 1",
2760 "hypothetical protein predicted by genemark",
2761 "hypothetical protein predicted by glimmer",
2762 "hypothetical protein predicted by glimmer/critica",
2763 "hypothetical protein, conserved",
2764 "hypothetical protein, phage associated",
2765 "hypothetical protein, truncated",
2766 "hypothetical protein-putative conserved hypothetical protein",
2767 "hypothetical protein.",
2768 "hypothetical proteins",
2769 "hypothetical protien",
2770 "hypothetical transmembrane protein",
2771 "hypothetoical protein",
2772 "hypothteical protein",
2773 "identified by sequence similarity; putative; orf located~using blastx/framed",
2774 "identified by sequence similarity; putative; orf located~using blastx/glimmer/genemark",
2776 "membrane protein, putative",
2778 "narrowly conserved hypothetical protein",
2781 "orf, conserved hypothetical protein",
2782 "orf, hypothetical",
2783 "orf, hypothetical protein",
2784 "orf, hypothetical, fragment",
2785 "orf, partial conserved hypothetical protein",
2786 "orf; hypothetical protein",
2787 "orf; unknown function",
2789 "partial cds, hypothetical",
2790 "partially conserved hypothetical protein",
2791 "phage hypothetical protein",
2792 "phage-related conserved hypothetical protein",
2793 "phage-related protein",
2795 "possible hypothetical protein",
2797 "predicted coding region",
2798 "predicted protein",
2799 "predicted protein (pseudogene)",
2800 "predicted protein family",
2801 "product uncharacterised protein family",
2803 "protein of unknown function",
2806 "putative conserved protein",
2807 "putative exported protein",
2808 "putative hypothetical protein",
2809 "putative membrane protein",
2810 "putative orf; unknown function",
2811 "putative phage protein",
2814 "repeats containing protein",
2816 "ribosomal protein",
2819 "small hypothetical protein",
2820 "transmembrane protein",
2823 "trp-repeat protein",
2824 "truncated conserved hypothetical protein",
2825 "truncated hypothetical protein",
2826 "uncharacterized conserved membrane protein",
2827 "uncharacterized conserved protein",
2828 "uncharacterized conserved secreted protein",
2829 "uncharacterized protein",
2830 "uncharacterized protein conserved in archaea",
2831 "uncharacterized protein conserved in bacteria",
2832 "unique hypothetical",
2833 "unique hypothetical protein",
2839 "unknown, conserved protein",
2840 "unknown, hypothetical",
2841 "unknown-related protein",
2842 "unknown; predicted coding region",
2844 "unnamed protein product",
2845 "very hypothetical protein"
2857 if (!
prot.IsSetName()) {
2858 if (!
prot.IsSetProcessed() ||
2862 "Protein name is not set");
2869 if (search.empty()) {
2871 "Protein name is empty");
2872 }
else if (sc_BadProtName.find (search.c_str()) != sc_BadProtName.end()
2880 "Uninformative protein name '" + it +
"'");
2896 (it) +
" is not in proper EC_number format");
2898 const string& ec_number = it;
2904 "EC_number " + ec_number +
" was deleted");
2909 "EC_number " + ec_number +
" was transferred and is no longer valid");
2914 if (pos == string::npos || !
isdigit (ec_number.c_str()[pos + 1])) {
2916 ec_number +
" is not a legal value for qualifier EC_number");
2919 ec_number +
" is not a legal preliminary value for qualifier EC_number");
2935 bool report_name =
true;
2937 if (pos == string::npos) {
2939 }
else if (prot_name.length() - pos < 5) {
2942 report_name =
false;
2947 "Protein name ends with bracket and may contain organism name");
2953 if (id_it->IsOther()
2954 && id_it->GetOther().IsSetAccession()
2956 prot_name.substr(21))) {
2958 "Hypothetical protein reference does not match accession");
2967 "Comment has same value as protein name");
2972 "Protein name has internal PMID");
2978 &&
NStr::FindCase(prot_name,
"methyltransferase") == string::npos
2980 if (
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase")) {
2982 }
else if (!
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit")
2983 && !
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit")) {
2985 "Nonstandard ribulose bisphosphate protein name");
2995 "protein name " + prot_name +
" has SGML");
3020 const CSeq_loc& prot_loc =
prot->GetLocation();
3024 bool conflict =
false;
3039 "Molinfo completeness and protein feature partials conflict");
3050 if (
rna.IsSetType()) {
3051 rna_type =
rna.GetType();
3055 if (
rna.CanGetExt() &&
rna.GetExt().IsName()) {
3056 const string& rna_name =
rna.GetExt().GetName();
3060 "rRNA name " + rna_name +
" has SGML");
3069 bool pseudo = feat_pseudo;
3091 rna_typename +
" has no name");
3099 "RNA type 0 (unknown) not supported");
3119 "A pseudo RNA should not have a product");
3120 }
else if (pseudo) {
3122 "An RNA overlapped by a pseudogene should not have a product");
3169 "Type of RNA does not match MolInfo of product Bioseq");
3203 "tRNA data structure on non-tRNA feature");
3210 if ( anticodon_len != 3 ) {
3212 "Anticodon is not 3 bases in length");
3220 "Anticodon location not in tRNA");
3241 "Unparsed anticodon qualifier in tRNA");
3247 "Unparsed product qualifier in tRNA");
3254 if (
rna.IsSetExt() &&
3257 "Unparsed product qualifier in tRNA");
3260 "Missing encoded amino acid qualifier in tRNA");
3265 bool isLessThan100 =
false;
3267 CSeq_loc_CI li(loc);
3269 TSeqPos last_start = li.GetRange().GetFrom();
3270 TSeqPos last_stop = li.GetRange().GetTo();
3272 last_id->
Assign(li.GetSeq_id());
3276 TSeqPos this_start = li.GetRange().GetFrom();
3277 TSeqPos this_stop = li.GetRange().GetTo();
3278 if (
abs ((
int)this_start - (
int)last_stop) < 100 ||
abs ((
int)this_stop - (
int)last_start) < 100) {
3279 if (li.GetSeq_id().Equals(*last_id)) {
3281 isLessThan100 =
true;
3287 for (
auto id_it : last_bsh.
GetId()) {
3288 if (id_it.GetSeqId()->Equals(li.GetSeq_id())) {
3289 isLessThan100 =
true;
3296 last_start = this_start;
3297 last_stop = this_stop;
3298 last_id->
Assign(li.GetSeq_id());
3304 if ( grp ==
NULL ) {
3313 if ( !pseudo && grp !=
NULL ) {
3317 if (isLessThan100 && ! pseudo) {
3323 if (
source.IsSetLineage()) {
3324 string lineage =
source.GetLineage();
3327 "tRNA intron in bacteria is less than 100 bp");
3338 bool ordered =
true;
3339 bool adjacent =
false;
3340 bool unmarked_strand =
false;
3341 bool mixed_strand =
false;
3344 for (CSeq_loc_CI curr(anticodon); curr; ++curr) {
3346 if (curr.GetEmbeddingSeq_loc().IsInt()) {
3348 }
else if (curr.GetEmbeddingSeq_loc().IsPnt()) {
3356 curr.GetEmbeddingSeq_loc().GetLabel(&lbl);
3358 "Anticodon location [" + lbl +
"] out of range");
3361 if (
prev && curr &&
3367 if (prev_range.
GetTo() < curr_range.
GetTo()) {
3374 if (prev_range.
GetTo() > curr_range.
GetTo()) {
3384 if ( curr_range == prev_range && curr_strand == prev_strand ) {
3386 "Duplicate anticodon exons in location");
3388 if ( curr_strand != prev_strand ) {
3390 unmarked_strand =
true;
3392 unmarked_strand =
true;
3394 mixed_strand =
true;
3402 "Adjacent intervals in Anticodon");
3406 ENa_strand ac_strand = anticodon.GetStrand();
3409 "Anticodon strand and tRNA strand do not match.");
3412 "Anticodon strand and tRNA strand do not match.");
3416 bool trans_splice =
false;
3419 trans_splice =
true;
3422 if (!trans_splice) {
3424 anticodon.GetLabel(&loc_lbl);
3427 "Mixed strands in Anticodon [" + loc_lbl +
"]");
3429 if (unmarked_strand) {
3431 "Mixed plus and unknown strands in Anticodon [" + loc_lbl +
"]");
3435 "Intervals out of order in Anticodon [" + loc_lbl +
"]");
3441 int s_LegalNcbieaaValues[] = { 42, 65, 66, 67, 68, 69, 70, 71, 72, 73,
3442 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
3443 84, 85, 86, 87, 88, 89, 90 };
3446 "---",
"Ala",
"Asx",
"Cys",
"Asp",
"Glu",
"Phe",
"Gly",
"His",
"Ile",
3447 "Lys",
"Leu",
"Met",
"Asn",
"Pro",
"Gln",
"Arg",
"Ser",
"Thr",
3448 "Val",
"Trp",
"OTHER",
"Tyr",
"Glx",
"Sec",
"TERM",
"Pyl",
"Xle"
3462 }
catch (
const std::exception& ) {
3471 const list<CRef<CGenetic_code> >& codes = code_table.
Get();
3473 for ( list<
CRef<CGenetic_code> >::const_iterator code_it = codes.begin(), code_it_end = codes.end(); code_it != code_it_end; ++code_it ) {
3474 if ((*code_it)->GetId() == gcode) {
3475 return (*code_it)->GetName();
3496 unsigned char aa = 0, orig_aa;
3497 vector<char> seqData;
3540 bool mustbemethionine =
false;
3545 mustbemethionine =
true;
3549 if (mustbemethionine) {
3553 "Initiation tRNA claims to be tRNA-" + aanm +
3554 ", but should be tRNA-Met");
3570 if ( ncbieaa.length() != 64 ) {
3578 string aaname =
buf;
3584 bool modified_codon_recognition =
false;
3585 bool rna_editing =
false;
3589 modified_codon_recognition =
true;
3596 vector<string> recognized_codon_values;
3597 vector<unsigned char> recognized_taa_values;
3600 if (*iter == 255)
continue;
3605 " is greater than maximum 63");
3607 }
else if (*iter < 0) {
3614 if ( !modified_codon_recognition && !rna_editing ) {
3615 unsigned char taa = ncbieaa[*iter];
3617 recognized_codon_values.push_back (codon);
3618 recognized_taa_values.push_back (
taa);
3621 if ( (aa ==
'U') && (
taa ==
'*') && (*iter == 14) ) {
3629 "Codon recognized by tRNA (" + codon +
") does not match amino acid ("
3630 + aaname +
") specified by genetic code ("
3638 string anticodon =
"?";
3639 vector<string> codon_values;
3640 vector<unsigned char> taa_values;
3648 if (codon.length() > 3) {
3649 codon = codon.substr (0, 3);
3655 char ch = anticodon.c_str()[0];
3673 string::iterator str_it = wobble.begin();
3674 while (str_it != wobble.end()) {
3677 if (index < 64 && index > -1) {
3678 unsigned char taa = ncbieaa[index];
3679 taa_values.push_back(
taa);
3680 codon_values.push_back(codon);
3686 if (anticodon.length() > 3) {
3687 anticodon = anticodon.substr(0, 3);
3690 }
catch (
const std::exception& ) {
3693 if (codon_values.size() > 0) {
3696 for (
size_t i = 0;
i < codon_values.size();
i++) {
3702 if (aa ==
'U' &&
NStr::Equal (anticodon,
"UCA")) {
3704 }
else if (aa ==
'O' &&
NStr::Equal (anticodon,
"CUA")) {
3706 }
else if (aa ==
'I' &&
NStr::Equal (anticodon,
"CAU")) {
3712 "Codons predicted from anticodon (" + anticodon
3713 +
") cannot produce amino acid (" + aaname +
")");
3718 if (recognized_codon_values.size() > 0) {
3720 for (
size_t i = 0;
i < codon_values.size() && !
ok;
i++) {
3721 for (
size_t j = 0; j < recognized_codon_values.size() && !
ok; j++) {
3722 if (
NStr::Equal (codon_values[
i], recognized_codon_values[j])) {
3724 }
else if (
NStr::Equal (codon_values[
i],
"ATG") && aa ==
'I') {
3734 "Codon recognized cannot be produced from anticodon ("
3742 if (orig_aa == 0 || orig_aa == 255) {
3752 if (idx == 0 || idx >= 28) {
3772 bool found_bad =
false;
3773 for (
auto it : scores) {
3787 "tRNA-rRNA overlap");
3794 "tRNA overlaps CDS");
3801 size_t mismatches = 0;
3815 "Unable to transcribe mRNA");
3821 "Unable to fetch mRNA transcript '" +
label +
"'");
3827 if ((*it)->IsOther()) {
3855 "] less than " + farstr +
"product length [" +
3862 +
"] less than " + farstr +
"product length ["
3868 "] less than " + farstr +
"product length [" +
3874 "greater than " + farstr +
"product length [" +
3881 " bases between the transcript and " + farstr +
"product sequence");
3885 "mRNA has exception but passes transcription test");
3890 "mRNA has unclassified exception but only difference is " +
NStr::SizetToString(mismatches)
3895 "mRNA has transcribed product replaced exception");
3946 "protein_id should not be a gbqual on an mRNA feature");
3950 "transcript_id should not be a gbqual on an mRNA feature");
3956 if (
rna.IsSetExt() &&
rna.GetExt().IsName()) {
3957 const string& rna_name =
rna.GetExt().GetName();
3962 "mRNA feature product indicates it should be a tRNA feature");
3967 "mRNA name " + rna_name +
" has SGML");
3985 "Product Bioseq of mRNA feature is not "
3986 "packaged in the record");
3995 "Identical transcript IDs found on multiple mRNAs");
4016 return locus && allele && desc && locus_tag;
4041 "Gene on mRNA bioseq does not match gene on genomic bioseq",
4062 "Focus must be on BioSource descriptor, not BioSource feature.");
4083 "BioSource descriptor must have focus or transgenic "
4084 "when BioSource feature with different taxname is "
4102 "PolyA_site should be a single point");
4134 "sig/mat/transit_peptide feature cannot be associated with a "
4135 "protein product of a coding region feature");
4138 "Peptide processing feature should be converted to the "
4139 "appropriate protein feature subtype");
4163 "Start and stop of " +
key +
" are out of frame with CDS codons");
4168 "Start and stop of " +
key +
" are out of frame with CDS codons");
4172 "Start of " +
key +
" is out of frame with CDS codons");
4176 "Stop of " +
key +
" is out of frame with CDS codons");
4188 bool pseudo = feat_pseudo;
4205 bool pseudo = feat_pseudo;
4215 "Introns should be at least 10 nt long");
4227 if (partial5 && partial3) {
4238 if (scores.size() > 0) {
4249 if (scores.size() > 0) {
4274 bool donor_in_gap =
false;
4275 bool acceptor_in_gap =
false;
4279 donor_in_gap =
true;
4284 acceptor_in_gap =
true;
4287 if (!partial5 && !partial3) {
4288 if (donor_in_gap && acceptor_in_gap) {
4295 bool donor_good =
false;
4296 bool acceptor_good =
false;
4299 if (!partial5 && !donor_in_gap) {
4302 donor[0] = vec[end5 - 1];
4303 donor[1] = vec[end5];
4309 donor[0] = vec[end5];
4310 donor[1] = vec[end5 + 1];
4317 if (!partial3 && !acceptor_in_gap) {
4320 acceptor[0] = vec[end3];
4321 acceptor[1] = vec[end3 + 1];
4322 acceptor_good =
true;
4327 acceptor[0] = vec[end3 - 1];
4328 acceptor[1] = vec[end3];
4329 acceptor_good =
true;
4335 if (!partial5 && !partial3) {
4336 if (donor_good && acceptor_good) {
4345 if (!donor_in_gap) {
4359 "Splice donor consensus (GT) not found at start of terminal intron, position "
4364 "Splice donor consensus (GT) not found at start of intron, position "
4373 if (!acceptor_in_gap) {
4376 if (acceptor_good) {
4386 "Splice acceptor consensus (AG) not found at end of terminal intron, position "
4391 "Splice acceptor consensus (AG) not found at end of intron, position "
4411 bool is_short =
false;
4424 }
else if (partial_right &&
4445 "A note or other qualifier is required for a misc_feature");
4451 string content_label;
4453 if (
NStr::Equal(content_label,
"cold-shock protein")) {
4455 "cspA misc_feature overlapped by cold-shock protein CDS");
4468 bool is_far_delta =
false;
4472 is_far_delta =
true;
4476 if ( !(*sg) )
continue;
4478 is_far_delta =
false;
4482 if (! is_far_delta) {
4484 "An assembly_gap feature should only be on a contig record");
4508 "Assembly_gap flanked by Ns on 5' and 3' sides");
4511 "Assembly_gap flanked by Ns on 5' side");
4514 "Assembly_gap flanked by Ns on 3' side");
4517 for (
size_t i = 0;
i < sequence.size();
i++) {
4518 if (sequence[
i] !=
'N') {
4535 if ((*it)->IsSetQual() &&
NStr::EqualNocase ((*it)->GetQual(),
"estimated_length")
4539 if (estimated_length != loc_len) {
4546 }
catch (
const std::exception& ) {
4553 if ( !vec.
empty() ) {
4558 unsigned int num_gap = 0;
4560 string::iterator it = vec_data.begin();
4561 while (it != vec_data.end()) {
4569 }
else if (*it !=
'-') {
4575 if (num_real > 0 && num_n > 0) {
4580 }
else if (num_real > 0) {
4584 }
else if (num_n > 0) {
4592 +
" gap characters");
4597 }
catch (
const std::exception& ) {
4610 "NULL feature key");
4654 "Feature key Import is no longer legal");
4659 switch ( subtype ) {
4665 "Unknown feature key " +
key);
4672 "Feature key " +
key +
" is no longer legal");
4681 "Pre/pro protein feature cannot be associated with a "
4682 "protein product of a coding region feature");
4685 "Peptide processing feature should be converted to the "
4686 "appropriate protein feature subtype");
4700 "RNA feature should be converted to the appropriate RNA feature "
4701 "subtype, location should be converted manually");
4710 "ImpFeat CDS should be pseudo");
4716 "ImpFeat CDS with /translation found");
4723 "Unknown feature key " +
key);
4730 "repeat_region has no qualifiers");
4738 const string&
val = (*gbqual)->GetVal();
4746 "repeat_region has no qualifiers except rpt_type other");
4756 const string&
val = (*gbqual)->GetVal();
4757 bool missing =
true;
4767 "The regulatory_class 'other' is missing the required /note");
4778 const string&
val = (*gbqual)->GetVal();
4779 if ( recomb_values.
find(
val.c_str()) == recomb_values.
end() ) {
4783 "The recombination_class 'other' is missing the required /note");
static CRef< CScope > m_Scope
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_FEAT_WrongQualOnImpFeat
@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptor
@ eErr_SEQ_FEAT_rRNADoesNotHaveProduct
@ eErr_SEQ_FEAT_WholeLocation
@ eErr_SEQ_FEAT_MobileElementInvalidQualifier
@ eErr_SEQ_FEAT_DuplicateAnticodonInterval
@ eErr_SEQ_FEAT_ShortTRNAIntron
@ eErr_SEQ_FEAT_MinusStrandProtein
@ eErr_SEQ_FEAT_NotSpliceConsensusDonor
@ eErr_SEQ_FEAT_GeneXrefWithoutLocus
@ eErr_SEQ_FEAT_GenesInconsistent
@ eErr_SEQ_FEAT_PseudoRnaHasProduct
@ eErr_SEQ_FEAT_EcNumberDataMissing
@ eErr_SEQ_FEAT_InvalidProductOnGene
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapTRNA
@ eErr_SEQ_FEAT_mRNAUnnecessaryException
@ eErr_SEQ_FEAT_UnknownImpFeatQual
@ eErr_SEQ_FEAT_InvalidCompareBadAccession
@ eErr_SEQ_FEAT_InvalidCompareMissingVersion
@ eErr_SEQ_FEAT_InvalidRptUnitRange
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_BadAnticodonAA
@ eErr_SEQ_FEAT_RnaProductMismatch
@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap
@ eErr_SEQ_FEAT_EcNumberInProteinName
@ eErr_SEQ_FEAT_InvalidTRNAdata
@ eErr_SEQ_FEAT_UnnecessaryException
@ eErr_SEQ_FEAT_AssemblyGapFeatureProblem
@ eErr_SEQ_FEAT_OldLocusTagWithoutLocusTag
@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron
@ eErr_SEQ_FEAT_AnticodonMixedStrand
@ eErr_SEQ_FEAT_UnparsedtRNAProduct
@ eErr_SEQ_FEAT_InconsistentPseudogeneValue
@ eErr_SEQ_FEAT_GeneXrefWithoutGene
@ eErr_SEQ_FEAT_ReplacedEcNumber
@ eErr_SEQ_FEAT_PartialsInconsistent
@ eErr_SEQ_FEAT_InvalidQualifierValue
@ eErr_SEQ_FEAT_DuplicateGeneOntologyTerm
@ eErr_SEQ_FEAT_ProtRefHasNoData
@ eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron
@ eErr_SEQ_FEAT_BadTrnaAA
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_ProductFetchFailure
@ eErr_SEQ_FEAT_MismatchedAllele
@ eErr_SEQ_FEAT_RepeatSeqDoNotMatch
@ eErr_SEQ_FEAT_MissingQualOnImpFeat
@ eErr_SEQ_FEAT_InvalidRptUnitSeqCharacters
@ eErr_SEQ_FEAT_TranscriptLen
@ eErr_SEQ_FEAT_RubiscoProblem
@ eErr_SEQ_FEAT_InvalidAlleleDuplicates
@ eErr_SEQ_FEAT_ImpCDSnotPseudo
@ eErr_SEQ_FEAT_BadCDScomponentOverlapTRNA
@ eErr_SEQ_FEAT_BadEcNumberValue
@ eErr_SEQ_FEAT_EcNumberEmpty
@ eErr_SEQ_FEAT_ImpCDShasTranslation
@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame
@ eErr_SEQ_FEAT_ProteinNameHasPMID
@ eErr_SEQ_FEAT_ImpFeatBadLoc
@ eErr_SEQ_FEAT_MissingQualOnFeature
@ eErr_SEQ_FEAT_PolyAsiteNotPoint
@ eErr_SEQ_FEAT_RepeatRegionNeedsNote
@ eErr_SEQ_FEAT_GeneXrefStrandProblem
@ eErr_SEQ_FEAT_PolyATail
@ eErr_SEQ_FEAT_MissingTrnaAA
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_FEAT_UnparsedtRNAAnticodon
@ eErr_SEQ_FEAT_RefSeqInText
@ eErr_SEQ_FEAT_ErroneousException
@ eErr_SEQ_FEAT_ImproperBondLocation
@ eErr_SEQ_FEAT_InvalidPseudoQualifier
@ eErr_SEQ_FEAT_FeatureSeqIDCaseDifference
@ eErr_SEQ_FEAT_BadProductSeqId
@ eErr_SEQ_FEAT_PeptideFeatureLacksCDS
@ eErr_SEQ_FEAT_InvalidCompareRefSeqAccession
@ eErr_SEQ_FEAT_InvalidReplace
@ eErr_SEQ_FEAT_UnknownImpFeatKey
@ eErr_SEQ_FEAT_IdenticalMRNAtranscriptIDs
@ eErr_SEQ_FEAT_AssemblyGapCoversSequence
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_SplitEcNumber
@ eErr_SEQ_FEAT_AssemblyGapAdjacentToNs
@ eErr_SEQ_FEAT_InvalidPunctuation
@ eErr_SEQ_FEAT_LocusTagProductMismatch
@ eErr_SEQ_FEAT_UnknownFeatureQual
@ eErr_SEQ_FEAT_TranscriptMismatches
@ eErr_SEQ_FEAT_IncorrectQualifierCapitalization
@ eErr_SEQ_FEAT_InvalidNumberQualifier
@ eErr_SEQ_FEAT_FeatureInsideGap
@ eErr_SEQ_FEAT_InvalidRNAFeature
@ eErr_SEQ_FEAT_tRNArange
@ eErr_SEQ_FEAT_GeneIdMismatch
@ eErr_SEQ_FEAT_MissingMRNAproduct
@ eErr_SEQ_FEAT_tRNAmRNAmixup
@ eErr_SEQ_FEAT_UndesiredProteinName
@ eErr_SEQ_FEAT_MrnaTransFail
@ eErr_SEQ_FEAT_InvalidInferenceValue
@ eErr_SEQ_FEAT_GeneXrefNeeded
@ eErr_SEQ_FEAT_InvalidType
@ eErr_SEQ_FEAT_SerialInComment
@ eErr_SEQ_FEAT_BadTrailingCharacter
@ eErr_SEQ_FEAT_IntervalBeginsOrEndsInGap
@ eErr_SEQ_FEAT_ProteinNameEndsInBracket
@ eErr_SEQ_FEAT_BadInternalCharacter
@ eErr_SEQ_FEAT_BadProteinName
@ eErr_SEQ_FEAT_MissingLocation
@ eErr_SEQ_FEAT_ExceptionMissingText
@ eErr_SEQ_FEAT_BadAnticodonCodon
@ eErr_SEQ_FEAT_BadTrailingHyphen
@ eErr_SEQ_FEAT_OldLocusTagMismtach
@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct
@ eErr_SEQ_FEAT_DeletedEcNumber
@ eErr_SEQ_FEAT_FeatureIsMostlyNs
@ eErr_SEQ_FEAT_InvalidMatchingReplace
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_BadEcNumberFormat
@ eErr_SEQ_FEAT_BothStrands
@ eErr_SEQ_FEAT_ExceptionProblem
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_FEAT_ColdShockProteinProblem
@ eErr_SEQ_FEAT_TrnaCodonWrong
@ eErr_SEQ_FEAT_NoNameForProtein
@ eErr_SEQ_FEAT_RptUnitRangeProblem
@ eErr_SEQ_FEAT_InvalidVariationReplace
@ eErr_SEQ_FEAT_SeqLocOrder
@ eErr_SEQ_FEAT_AnticodonStrandConflict
@ eErr_SEQ_FEAT_InvalidRepeatUnitLength
@ eErr_SEQ_FEAT_VectorContamination
@ eErr_SEQ_FEAT_AbuttingIntervals
@ eErr_SEQ_FEAT_EcNumberInProteinComment
@ eErr_SEQ_FEAT_UnnecessaryCitPubEquiv
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_FEAT_RegulatoryClassOtherNeedsNote
@ eErr_SEQ_FEAT_MiscFeatureNeedsNote
@ eErr_SEQ_FEAT_FocusOnBioSourceFeature
@ eErr_SEQ_FEAT_PolyAsignalNotRange
@ eErr_SEQ_DESCR_BioSourceNeedsFocus
@ eErr_SEQ_FEAT_BadTrnaCodon
@ eErr_SEQ_FEAT_FeatureCrossesGap
@ eErr_SEQ_FEAT_SelfReferentialProduct
@ eErr_SEQ_FEAT_GapFeatureProblem
@ eErr_SEQ_FEAT_HypotheticalProteinMismatch
@ eErr_SEQ_FEAT_MissingGeneXref
@ eErr_SEQ_FEAT_RecombinationClassOtherNeedsNote
@ eErr_SEQ_FEAT_MissingExceptionFlag
bool IsOrganismEukaryote() const
int GetGenCode(int def=1) const
bool IsSkippable(void) const
bool IsKnownGap(size_t offset)
bool IsGap(size_t offset)
bool IsUnknownGap(size_t offset)
map< size_t, EGapType > TGapTypeMap
CGapCache(const CSeq_loc &loc, CBioseq_Handle bsh)
@Gb_qual.hpp User-defined methods of the data storage class.
static bool IsLegalMobileElementValue(const string &val)
static bool IsValidPseudogeneValue(const string &val)
static bool IsValidRptTypeValue(const string &val)
static const TLegalRecombinationClassSet & GetSetOfLegalRecombinationClassValues(void)
static const string & GetNcbieaa(int id)
static string IndexToCodon(int index)
static int CodonToIndex(char base1, char base2, char base3)
static const CGenetic_code_table & GetCodeTable(void)
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
bool IsSuppressed(void) const
bool x_IsIntronShort(bool pseudo)
CMRNAValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ValidateMrnaGene()
CConstRef< CSeq_feat > m_Gene
void x_ValidateCommonMRNAProduct()
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CPeptideValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ValidatePeptideOnCodonBoundary()
CConstRef< CSeq_feat > m_CDS
void x_ValidateSeqFeatLoc() override
void x_ValidateSeqFeatLoc() override
void x_ValidateECNumbers()
void x_ValidateProteinName(const string &prot_name)
void x_ReportUninformativeNames()
void x_ValidateMolinfoPartials()
static EECNumberFileStatus GetECNumAmbiguousStatus()
static EECNumberFileStatus GetECNumSpecificStatus()
@ eECFile_not_found
File was not found in expected directory.
static bool IsECNumberSplit(const string &old_ecno)
static bool IsValidECNumberFormat(const string &ecno)
Verify correct form of EC number.
static EECNumberFileStatus GetECNumDeletedStatus()
EECNumberStatus
Enzyme Commission number status.
@ eEC_replaced
Obsolete synonym for some other EC number.
@ eEC_unknown
Unrecognized; possibly malformed.
@ eEC_deleted
Withdrawn, with no (single?) replacement.
static EECNumberFileStatus GetECNumReplacedStatus()
static EECNumberStatus GetECNumberStatus(const string &ecno)
Determine an EC number's validity and specificity.
void x_ValidateTrnaOverlap()
void x_ValidateRnaTrans()
void x_ValidateRnaProductType()
void x_ValidateTrnaCodons()
void x_ReportRNATranslationProblems(size_t problems, size_t mismatches)
void x_ValidateAnticodon(const CSeq_loc &anticodon)
void x_ValidateTrnaData()
void x_ValidateTrnaType()
void x_ValidateRnaProduct(bool feat_pseudo, bool pseudo)
@RNA_ref.hpp User-defined methods of the data storage class.
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
bool IsLegalQualifier(EQualifier qual) const
Test wheather a certain qualifier is legal for the feature.
@ eQual_mobile_element_type
static bool AllowStrandBoth(ESubtype subtype)
ESubtype GetSubtype(void) const
string GetKey(EVocabulary vocab=eVocabulary_full) const
const TQualifiers & GetMandatoryQualifiers(void) const
Get the list of all mandatory qualifiers for the feature.
static std::pair< EQualifier, CTempString > GetQualifierTypeAndValue(CTempString qual)
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
@ eSubtype_mobile_element
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
static const vector< string > & GetRegulatoryClassList()
namespace ncbi::objects::
static bool IsExceptionTextInLegalList(const string &exception_text, bool allow_refseq)
Indicates whether this specific text occurs in the list of legal exceptions.
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
static bool IsExceptionTextRefSeqOnly(const string &exception_text)
Indicates whether this specific text is a RefSeq-only exception.
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
static bool x_IsMostlyNs(const CSeq_loc &loc, CBioseq_Handle bsh)
CBioseq_Handle x_GetFeatureProduct(bool look_far, bool &is_far)
EDiagSev x_SeverityForConsensusSplice()
void x_ValidateBothStrands()
void ValidateCharactersInField(string value, string field_name)
void PostErr(EDiagSev sv, EErrType et, const string &msg)
void x_ValidateLabelVal(const string &val)
CSingleFeatValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ReportAcceptorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
void x_ValidateRptUnitVal(const string &val, const string &key)
void x_ValidateGeneXRef()
void x_ValidateReplaceQual(const string &key, const string &qual_str, const string &val)
void x_CheckForNonAsciiCharacters()
CBioseq_Handle x_GetBioseqByLocation(const CSeq_loc &loc)
void x_ValidateSeqFeatDataType()
CBioseq_Handle m_ProductBioseq
static bool s_IsPseudo(const CSeq_feat &feat)
virtual bool x_ReportOrigProteinId()
CBioseq_Handle m_LocationBioseq
void x_ValidateCompareVal(const string &val)
void x_ValidateRptUnitSeqVal(const string &val, const string &key)
void x_ValidateExtUserObject()
void x_ValidateSeqFeatProduct()
void x_ReportDonorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
void x_ValidateImpFeatQuals()
static bool s_GeneRefsAreEquivalent(const CGene_ref &g1, const CGene_ref &g2, string &label)
void x_ReportECNumFileStatus()
void x_ValidateGbQual(const CGb_qual &qual)
void x_ValidateGeneFeaturePair(const CSeq_feat &gene)
void x_ValidateOldLocusTag(const string &old_locus_tag)
virtual void x_ValidateFeatComment()
void x_ValidateNonImpFeat()
bool x_HasNamedQual(const string &qual_name)
void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)
static bool x_HasSeqLocBond(const CSeq_feat &feat)
void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)
void x_ValidateRptUnitRangeVal(const string &val)
void x_ValidateFeatPartialness()
static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)
void ValidateSplice(bool gene_pseudo, bool check_all)
void x_ReportSpliceProblems(const CSpliceProblems &problems, const string &label)
bool x_AllowFeatureToMatchGapExactly()
static TSeqPos x_FindStartOfGap(CBioseq_Handle bsh, TSeqPos pos, CScope *scope)
static size_t x_CalculateLocationGaps(CBioseq_Handle bsh, const CSeq_loc &loc, vector< TSeqPos > &gap_starts)
virtual void x_ValidateSeqFeatLoc()
virtual void x_ValidateExceptText(const string &text)
static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)
@ eLocationGapInternalIntervalEndpointInGap
@ eLocationGapCrossesUnknownGap
@ eLocationGapContainedInGapOfNs
@ eLocationGapContainedInGap
@ eLocationGapFeatureMatchesGap
static void x_LocHasStrandBoth(const CSeq_loc &feat, bool &both, bool &both_rev)
void x_ValidateImpFeatLoc()
const TSpliceProblemList & GetDonorProblems() const
void CalculateSpliceProblems(const CSeq_feat &feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle)
vector< TSpliceProblem > TSpliceProblemList
@ eSpliceSiteRead_WrongNT
bool IsExceptionUnnecessary() const
bool AreErrorsUnexpected() const
pair< size_t, TSeqPos > TSpliceProblem
const TSpliceProblemList & GetAcceptorProblems() const
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
const_iterator end() const
Return the end of the controlled sequence.
vector< CSeq_feat_Handle > TSeq_feat_Handles
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
@ eInferenceValidCode_valid
static EInferenceValidCode ValidateInference(string inference, bool fetch_accession, CScope *scope=nullptr)
bool DoRubiscoTest() const
bool ReportSpliceAsError() const
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
const CBioSourceKind & BioSourceKind() const
bool IsRemoteFetch() const
CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)
bool DoesAnyFeatLocHaveGI() const
bool IsLocusTagGeneralMatch() const
void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)
bool IsSerialNumberInComment(const string &comment)
bool IsFarSequence(const CSeq_id &id)
const CTSE_Handle & GetTSE_Handle()
bool ValidateInferenceAccessions() const
bool IsHugeFileMode() const
void IncrementPseudogeneCount()
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
bool x_IsFarFetchFailure(const CSeq_loc &loc)
bool IsGenomeSubmission() const
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void IncrementGeneXrefCount()
bool IsFarFetchCDSproducts() const
bool IsValidateExons() const
bool IgnoreExceptions() const
void SetFarFetchFailure()
bool IsRefSeqConventions() const
SValidatorContext & SetContext()
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool IsFarFetchMRNAproducts() const
bool IsTransgenic(const CBioSource &bsrc)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static void chk(int check, const char *fmt,...)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
vector< TGoTermError > GetGoTermErrors(const CSeq_feat &feat)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
string GetLabel(const CSeq_id &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
TSeqPos GetStop(ESeqLocExtremes ext) const
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ eLocationInFrame_InFrame
@ eLocationInFrame_BadStart
@ eLocationInFrame_BadStop
@ eLocationInFrame_BadStartAndStop
@ fFGL_Content
Include its content if there is any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)
Updated version of TestForOverlap64().
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)
Get reverse complement of the seq-loc (?)
@ eSeqlocPartial_Internal
@ eSeqlocPartial_Complete
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
CConstRef< CSeq_feat > GetOverlappingCDS(const CSeq_loc &loc, CScope &scope)
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
bool IsSetExcept(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
const TInst_Ext & GetInst_Ext(void) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool IsSetInst_Ext(void) const
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetExcept_text(void) const
TInst_Length GetInst_Length(void) const
const string & GetExcept_text(void) const
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat::TDbxref & GetDbxref(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
CSeqMap::ESegmentType GetType(void) const
bool IsUnknownLength(void) const
return true if current segment is a gap of unknown length
TSeqPos GetPosition(void) const
return position of current segment in sequence
TSeqPos GetLength(void) const
return length of current segment
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
ENa_strand GetStrand(void) const
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static SIZE_TYPE FindWord(const CTempString str, const CTempString word, ECase use_case=eCase, EDirection direction=eForwardSearch)
Find given word in the string.
static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case sensitive search.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ eReverseSearch
Search in a backward direction.
@ eNocase
Case insensitive compare.
static const char label[]
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
const TSyn & GetSyn(void) const
Get the Syn member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
bool CanGetLocus(void) const
Check if it is safe to call GetLocus method.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool CanGetLocus_tag(void) const
Check if it is safe to call GetLocus_tag method.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetAllele(void) const
Official allele designation Check if a value has been assigned to Allele data member.
bool CanGetAllele(void) const
Check if it is safe to call GetAllele method.
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TAllele & GetAllele(void) const
Get the Allele member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool CanGetTaxname(void) const
Check if it is safe to call GetTaxname method.
EProcessed
processing status
const TName & GetName(void) const
Get the Name member data.
bool IsSetEc(void) const
E.C.
const TEc & GetEc(void) const
Get the Ec member data.
@ eProcessed_signal_peptide
@ eProcessed_transit_peptide
const TPub & GetPub(void) const
Get the variant data.
list< CRef< CPub > > TPub
bool IsPub(void) const
Check if variant Pub is selected.
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
TType GetType(void) const
Get the Type member data.
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
const TAa & GetAa(void) const
Get the Aa member data.
const TCodon & GetCodon(void) const
Get the Codon member data.
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool IsGen(void) const
Check if variant Gen is selected.
TIupacaa GetIupacaa(void) const
Get the variant data.
bool CanGetAnticodon(void) const
Check if it is safe to call GetAnticodon method.
const TGen & GetGen(void) const
Get the variant data.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
E_Choice Which(void) const
Which variant is currently selected.
const TExt & GetExt(void) const
Get the Ext member data.
const TTRNA & GetTRNA(void) const
Get the variant data.
const TClass & GetClass(void) const
Get the Class member data.
@ e_not_set
No variant selected.
@ e_Name
for naming "other" type
@ eType_scRNA
will become ncRNA, with RNA-gen.class = scRNA
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
@ eType_snRNA
will become ncRNA, with RNA-gen.class = snRNA
bool CanGetDbxref(void) const
Check if it is safe to call GetDbxref method.
const TVal & GetVal(void) const
Get the Val member data.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetLoc(void) const
original location string Check if a value has been assigned to Loc data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsBond(void) const
Check if variant Bond is selected.
bool IsProt(void) const
Check if variant Prot is selected.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
const TCit & GetCit(void) const
Get the Cit member data.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsPub(void) const
Check if variant Pub is selected.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
bool CanGetVal(void) const
Check if it is safe to call GetVal method.
bool IsHet(void) const
Check if variant Het is selected.
bool IsSetExp_ev(void) const
Check if a value has been assigned to Exp_ev data member.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
const TComment & GetComment(void) const
Get the Comment member data.
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
TPartial GetPartial(void) const
Get the Partial member data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
vector< CRef< CGb_qual > > TQual
const TQual & GetQual(void) const
Get the Qual member data.
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
const TLoc & GetLoc(void) const
Get the Loc member data.
bool IsRna(void) const
Check if variant Rna is selected.
TExp_ev GetExp_ev(void) const
Get the Exp_ev member data.
const TImp & GetImp(void) const
Get the variant data.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TName & GetName(void) const
Get the Name member data.
ENa_strand
strand of nucleic acid
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool CanGetName(void) const
Check if it is safe to call GetName method.
E_Choice Which(void) const
Which variant is currently selected.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TGeneral & GetGeneral(void) const
Get the variant data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eNa_strand_both_rev
in reverse orientation
@ eNa_strand_both
in forward orientation
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Tpg
Third Party Annot/Seq Genbank.
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
bool CanGetBiomol(void) const
Check if it is safe to call GetBiomol method.
ERepr
representation class
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsDelta(void) const
Check if variant Delta is selected.
const TExt & GetExt(void) const
Get the Ext member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
const TDelta & GetDelta(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const Tdata & Get(void) const
Get the member data.
list< CRef< CDelta_seq > > Tdata
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
@ e_Loc
point to a sequence
static void text(MDB_val *v)
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
static const char * expected[]
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
static const char * suffix[]
static const char * prefix[]
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define FOR_EACH_GBQUAL_ON_FEATURE
#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)
FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
bool ContainsSgml(const string &str)
static string s_AsciiString(const string &src)
static bool s_LocationStrandsIncompatible(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
CSingleFeatValidator * FeatValidatorFactory(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool s_HasNamedQual(const CSeq_feat &feat, const string &qual)
const string kInferenceMessage[]
static bool s_StringConsistsOf(string str, string consist)
static string GetGeneticCodeName(int gcode)
const string kOrigProteinId
int s_LegalNcbieaaValues[]
static const char *const sc_BadProtNameText[]
CStaticArraySet< const char *, PCase_CStr > TBadProtNameSet
static bool s_EqualGene_ref(const CGene_ref &genomic, const CGene_ref &mrna)
static bool s_IsBioseqPartial(CBioseq_Handle bsh)
const char * GetAAName(unsigned char aa, bool is_ascii)
DEFINE_STATIC_ARRAY_MAP(TBadProtNameSet, sc_BadProtName, sc_BadProtNameText)
static const char * kAANames[]
bool HasGeneIdXref(const CMappedFeat &sf, const CObject_id &tag, bool &has_parent_gene_id)
static bool s_RptUnitIsBaseRange(string str, TSeqPos &from, TSeqPos &to)
static bool xf_IsDeltaLitOnly(CBioseq_Handle bsh)
bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)
bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)
static const char * str(char *buf, int n)
Selector used in CSeqMap methods returning iterators.
atomic_bool CheckECNumFileStatus
size_t GetMRNATranslationProblems(const CSeq_feat &feat, size_t &mismatches, bool ignore_exceptions, CBioseq_Handle nuc, CBioseq_Handle rna, bool far_fetch, bool is_gpipe, bool is_genomic, CScope *scope)
@ eMRNAProblem_UnnecessaryException
@ eMRNAProblem_UnableToFetch
@ eMRNAProblem_TranscriptLenLess
@ eMRNAProblem_PolyATail95
@ eMRNAProblem_TranscriptLenMore
@ eMRNAProblem_ProductReplaced
@ eMRNAProblem_ErroneousException
@ eMRNAProblem_PolyATail100