55 #define NCBI_USE_ERRCODE_X Objtools_Validator
70 bool Overlaps(
const CSeq_feat& cds)
const;
72 bool HasMatch()
const;
74 bool OkWithoutCds(
bool isGenbank =
false)
const;
151 for (
auto id : bsh.
GetId()) {
168 if ((*it)->IsSetSubtype() && (*it)->IsSetName() && !
NStr::IsBlank((*it)->GetName())) {
176 if (
source.IsSetLineage()) {
177 string lineage =
source.GetLineage();
188 if (
source.IsSetDivision()) {
189 string div =
source.GetDivision();
195 if (
source.IsSetGenome()) {
230 if (appropriate_parent) {
249 }
catch (
const exception&) {
273 if (appropriate_parent) {
278 }
catch (
const exception& e) {
280 string(
"Exception while validating bioseq. EXCEPTION: ") +
298 const string& db = dbt.
GetDb();
311 const char& ch = *itr;
312 if (ch ==
'|' || ch ==
',')
return ch;
320 for (
size_t i = 0;
i <
id.length();
i++) {
322 return id.c_str()[
i];
332 const char& ch = *itr;
333 if (ch ==
'|' || ch ==
',')
return ch;
347 "BioseqFind (" +
id.AsFastaString() +
348 ") unable to find itself - possible internal error",
ctx);
360 "BioseqFind (" +
id.AsFastaString() +
361 ") unable to find itself - possible internal error",
ctx);
365 "SeqID " +
id.AsFastaString() +
366 " is present on multiple Bioseqs in record",
ctx);
370 "BioseqFind (" +
id.AsFastaString() +
371 ") unable to find itself - possible internal error",
ctx);
377 switch (
id.Which()) {
383 "TPA record " +
ctx.GetId().front()->AsFastaString() +
384 " should have Seq-hist.assembly for PRIMARY block",
397 "Bad character '" +
string(1, badch) +
"' in accession '" + acc +
"'",
ctx);
404 "Bad accession " + acc,
ctx);
415 const string& name = tsid->
GetName();
417 if (
isspace((
unsigned char)(*s))) {
420 "Seq-id.name '" + name +
"' should be a single "
421 "word without any spaces",
ctx);
432 "Bad character '" +
string(1, badch) +
"' in accession '" + acc +
"'",
ctx);
434 size_t num_letters = 0;
435 size_t num_digits = 0;
436 size_t num_underscores = 0;
437 bool bad_id_chars =
false;
440 bool letter_after_digit =
false;
446 for (;
i < acc.length(); ++
i) {
447 if (
isupper((
unsigned char)acc[
i])) {
449 }
else if (
isdigit((
unsigned char)acc[
i])) {
451 }
else if (acc[
i] ==
'_') {
453 if (num_digits > 0 || num_underscores > 1) {
454 letter_after_digit =
true;
461 if (letter_after_digit || bad_id_chars) {
463 "Bad accession " + acc,
ctx);
464 }
else if (is_NZ && (num_letters == 4 || num_letters == 6) &&
465 (num_digits >= 8 && num_digits <= 11) && num_underscores == 0) {
469 }
else if (num_letters == 2 &&
470 (num_digits == 6 || num_digits == 8 || num_digits == 9) &&
471 num_underscores == 1) {
473 }
else if (num_letters == 4 && num_digits == 10 &&
ctx.IsNa()) {
476 "Bad accession " + acc,
ctx);
492 string msg =
"Missing accession for " +
id.AsFastaString();
501 "Seq-id type not handled",
ctx);
507 "Invalid GI number",
ctx);
511 if (!
id.GetGeneral().IsSetDb() ||
NStr::IsBlank(
id.GetGeneral().GetDb())) {
514 if (
id.GetGeneral().IsSetDb()) {
515 const CDbtag& dbt =
id.GetGeneral();
516 size_t dblen = dbt.
GetDb().length();
528 if (dblen > max_dblen) {
535 if (longer_general) {
556 if (badch ==
'\0' && dbt.
IsSetDb()) {
562 "Bad character '" +
string(1, badch) +
"' in sequence ID '" +
id.AsFastaString() +
"'",
ctx);
577 if (
id.IsLocal() &&
id.GetLocal().IsStr()) {
578 const string& acc =
id.GetLocal().GetStr();
582 "Bad character '" +
string(1, badch) +
"' in local ID '" + acc +
"'",
ctx);
592 if (chain_id.size() == 1 && chain_id[0] == chain) {
594 }
else if (
islower(chain) && chain_id.size() == 2
595 && chain_id[0] == chain_id[1]
596 && chain_id[0] ==
toupper(chain)) {
598 }
else if (chain ==
'|' && chain_id ==
"VB") {
602 "PDB Seq-id contains mismatched \'chain\' and"
603 " \'chain-id\' slots",
ctx);
614 if (! IsNCBIFILESeqId(**
i)) {
616 (*i)->GetLabel(&
label);
617 if (
label.length() > 40) {
619 "Sequence ID is unusually long (" +
631 const list< string > *extra_acc =
nullptr;
633 switch (desc.
Which()) {
664 bool found_good =
false;
672 "The only ids on this Bioseq will be stripped during ID load", seq);
682 "No ids on a Bioseq", seq);
694 bool wgs_tech_needs_wgs_accession =
false;
695 bool is_segset_accession =
false;
696 bool has_wgs_general =
false;
697 bool is_eb_db =
false;
698 bool longer_general =
false;
701 if ((*i)->IsOther() || (*i)->IsEmbl() || (*i)->IsTpe()) {
702 longer_general =
true;
710 if ((*i)->IsGeneral() && (*i)->GetGeneral().IsSetDb()) {
715 has_wgs_general =
true;
717 }
else if ((*i)->IsOther() && (*i)->GetOther().IsSetAccession()) {
718 const string& acc = (*i)->GetOther().GetAccession();
721 wgs_tech_needs_wgs_accession =
true;
725 wgs_tech_needs_wgs_accession =
true;
727 }
else if ((*i)->IsEmbl() && (*i)->GetEmbl().IsSetAccession()) {
729 }
else if ((*i)->IsDdbj() && (*i)->GetDdbj().IsSetAccession()) {
734 CBioseq::TId::const_iterator j;
735 for (j =
i, ++j; j != seq.
GetId().end(); ++j) {
738 os <<
"Conflicting ids on a Bioseq: (";
739 (**i).WriteAsFasta(os);
741 (**j).WriteAsFasta(os);
748 if ((*i)->IsGenbank() || (*i)->IsEmbl() || (*i)->IsDdbj()) {
749 wgs_tech_needs_wgs_accession =
true;
757 is_segset_accession =
true;
761 if (is_lrg && ! has_ng) {
763 "LRG sequence needs NG_ accession", seq);
769 unsigned int gi_count = 0;
770 unsigned int accn_count = 0;
771 unsigned int lcl_count = 0;
774 switch ((**k).Which()) {
785 if ((*k)->IsGenbank() || (*k)->IsEmbl() || (*k)->IsDdbj()) {
793 "Accession " + acc +
" has 0 version", seq);
806 "Missing accession for " + tsid->
GetName(), seq);
820 string label = (*k)->AsFastaString();
822 "Missing identifier for " +
label, seq);
838 if (! mi || ! mi->IsSetTech() ||
843 "WGS accession should have Mol-info.tech of wgs", seq);
845 }
else if (mi && mi->IsSetTech() &&
847 wgs_tech_needs_wgs_accession &&
848 ! is_segset_accession &&
857 "Mol-info.tech of wgs should have WGS accession", seq);
862 && (! mi->IsSetBiomol()
866 "genomic RefSeq accession should use genomic or cRNA moltype",
871 if (mi && mi->IsSetBiomol()) {
872 switch (mi->GetBiomol()) {
885 "Molecule type (DNA) does not match biomol (RNA)", seq);
894 if (gi_count > 0 && accn_count == 0 && !
m_Imp.
IsPDB() &&
897 "No accession on sequence with gi number", seq);
899 if (gi_count > 0 && accn_count > 1) {
901 "Multiple accessions on sequence with gi number", seq);
944 CEMBL_block::TKeywords::const_iterator keyword = embl_i->
GetEmbl().
GetKeywords().begin();
959 const string& primary_acc,
965 const list<string>* extra_acc =
nullptr;
980 primary_acc +
" used for both primary and"
981 " secondary accession", seq);
1006 bool has_barcode_tech =
false;
1010 has_barcode_tech =
true;
1013 bool has_barcode_keyword =
false;
1017 has_barcode_keyword =
true;
1021 if (has_barcode_keyword && ! has_barcode_tech) {
1023 "BARCODE keyword without Molinfo.tech barcode",
1027 if (has_barcode_tech && ! has_barcode_keyword && di) {
1029 "Molinfo.tech barcode without BARCODE keyword",
1034 "Sequence has both BARCODE and UNVERIFIED keywords",
1059 "Bioseq.mol is type nucleic acid", seq);
1067 "Non-linear topology set on protein", seq);
1073 "Protein not single stranded", seq);
1088 "Circular Bacteria or Archaea should be chromosome, or plasmid, or extrachromosomal", seq);
1101 "Bioseq.mol is type other", seq);
1171 bool is_wgs =
false;
1172 bool is_grc =
false;
1195 sequence::CDeflineGenerator defline_generator;
1196 string title = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
1204 is_wgs =
IsWGS(bsh);
1206 bool is_gb =
false, is_refseq =
false, is_ng =
false;
1209 const CSeq_id& sid = **sid_itr;
1210 switch (sid.
Which()) {
1236 if (! is_wgs && ! is_grc)
1278 "No CdRegion in nuc-prot set points to this protein",
1284 bool is_complete =
false;
1390 "BioProject entries not present on CON record", seq);
1393 }
catch (
const exception& e) {
1394 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1396 string(
"Exception while validating BioseqContext. EXCEPTION: ") +
1417 "Orphaned stand-alone protein", seq);
1427 if (prot_feats.size() > 1) {
1430 "Protein sequence has multiple unprocessed protein features",
1431 feat->GetOriginalFeature());
1438 "Expected submission citation is missing for this Bioseq", seq);
1448 for (
CFeat_CI feat_ci(bsh, sel); feat_ci; ++feat_ci) {
1450 const CSeq_feat& matpeptide = feat_ci->GetOriginalFeature();
1457 if (matlen != prdlen) {
1459 "Mat_peptide does not match length of instantiated product",
1475 if (m_res != p_res) {
1477 "Mismatch in mat_peptide (" +
string(1, (
char)m_res) +
") and instantiated product (" + \
1515 bool has_cit_sub =
false;
1517 while (p && !has_cit_sub) {
1528 template <
class Iterator,
class Predicate>
1531 while (iter1 != iter1_stop && iter2 != iter2_stop) {
1532 if (! pred(*iter1, *iter2)) {
1538 if (iter1 != iter1_stop || iter2 != iter2_stop) {
1561 if (chs1 == chs2)
return true;
1571 return dbt1->
Compare(*dbt2) == 0;
1674 printf (
"Orgname not set!\n");
1676 printf (
"Lineage not set!\n");
1692 if (start1 == stop2 + 1 || start2 == stop1 + 1) {
1710 if (start1 == stop2 + 1 || start2 == stop1 + 1) {
1728 if (start1 == stop2 + 1 || start2 == stop1 + 1) {
1749 CCacheImpl::TFeatValue::const_iterator feat = rnas.begin();
1750 if (feat != rnas.end()) {
1752 CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1754 for (; feat != rnas.end(); ++feat_prev, ++feat) {
1757 feat->GetLocation(),
m_Scope)) {
1761 const CRNA_ref& tm = feat_prev->GetData().GetRna();
1762 const CRNA_ref& tr = feat->GetData().GetRna();
1766 feat->GetLocation(),
m_Scope)) {
1768 "tRNA contained within tmRNA",
1769 feat->GetOriginalFeature());
1775 }
catch (
const exception& e) {
1776 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1778 string(
"Exception while validating RNA features. EXCEPTION: ") +
1795 CCacheImpl::TFeatValue::const_iterator feat = biosrcs.begin();
1796 if (feat != biosrcs.end()) {
1802 "Source feature is full length, should be descriptor",
1803 feat->GetOriginalFeature());
1808 CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1810 for (; feat != biosrcs.end(); ++feat_prev, ++feat) {
1813 "Multiple full-length source features, should only be one if descriptor is transgenic",
1814 feat->GetOriginalFeature());
1818 feat->GetLocation(),
m_Scope)) {
1824 bool are_identical =
true;
1825 if (feat_prev->IsSetComment() && feat->IsSetComment()
1827 are_identical =
false;
1829 const CBioSource& src_prev = feat_prev->GetData().GetBiosrc();
1830 const CBioSource& src = feat->GetData().GetBiosrc();
1833 are_identical =
false;
1840 are_identical =
false;
1845 are_identical =
false;
1850 "Multiple equivalent source features should be combined into one multi-interval feature",
1851 feat->GetOriginalFeature());
1855 }
catch (
const exception& e) {
1856 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1858 string(
"Exception while validating source features. EXCEPTION: ") +
1871 if ((*it)->IsGen() && (*it)->GetGen().IsSetCit()
1872 && ! (*it)->GetGen().IsSetCit()
1873 && ! (*it)->GetGen().IsSetJournal()
1874 && ! (*it)->GetGen().IsSetDate()
1875 && (*it)->GetGen().IsSetSerial_number()) {
1896 CCacheImpl::TFeatValue::const_iterator feat = pubs.begin();
1897 if (feat != pubs.end()) {
1900 "Publication feature is full length, should be descriptor",
1901 feat->GetOriginalFeature());
1904 CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1906 if (feat_prev != pubs.end()) {
1910 for (; feat != pubs.end(); ++feat, ++feat_prev) {
1913 "Publication feature is full length, should be descriptor",
1914 feat->GetOriginalFeature());
1917 bool are_identical =
true;
1918 if (feat_prev->IsSetComment() && feat->IsSetComment()
1920 are_identical =
false;
1926 are_identical =
false;
1930 prev_label.swap(
label);
1935 if (are_identical) {
1937 "Multiple equivalent publication features should be combined into one multi-interval feature",
1938 feat->GetOriginalFeature());
1942 }
catch (
const exception& e) {
1943 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1945 string(
"Exception while validating pub features. EXCEPTION: ") +
1977 const CBioseq& seq,
const vector<CTempString>& labels)
1979 if (labels.size() <= 1) {
1987 static const string kWarningPrefix =
1988 "Multiple equivalent publications annotated on this sequence [";
1989 static const string::size_type kMaxSummaryLen = 100;
1994 TLabelCount label_count;
1996 ITERATE(vector<CTempString>, label_it, labels) {
1997 ++label_count[*label_it];
2001 vector<CTempString> sorted_dup_labels;
2002 ITERATE(TLabelCount, label_count_it, label_count) {
2003 int num_appearances = label_count_it->second;
2005 if (num_appearances > 1) {
2006 const CTempString& dup_label = label_count_it->first;
2007 sorted_dup_labels.push_back(dup_label);
2013 string err_msg = kWarningPrefix;
2014 ITERATE(vector<CTempString>, dup_label_it, sorted_dup_labels) {
2017 err_msg.resize(kWarningPrefix.length());
2018 if (summary.
length() > kMaxSummaryLen) {
2019 err_msg += summary.
substr(0, kMaxSummaryLen);
2038 vector<int> serials;
2039 vector<CTempString> published_labels;
2040 vector<CTempString> unpublished_labels;
2055 back_inserter(published_labels));
2057 back_inserter(unpublished_labels));
2061 bool otherpub =
false;
2063 switch ((*pub_it)->Which()) {
2065 muid = (*pub_it)->GetMuid();
2068 pmid = (*pub_it)->GetPmid();
2077 bool collision =
false;
2079 if (muids_seen.
find(muid) != muids_seen.
end()) {
2086 if (pmids_seen.
find(pmid) != pmids_seen.
end()) {
2094 "Multiple publications with identical PubMed ID", *
ctx, *it);
2113 if ((*id)->IsGi()) {
2114 gi = (*id)->GetGi();
2126 if ((*id)->IsGi()) {
2127 if (gi == (*id)->GetGi()) {
2129 "Replaced by gi (" +
2141 if ((*id)->IsGi()) {
2142 if (gi == (*id)->GetGi()) {
2166 if (
id.Match(**it)) {
2181 switch (seqdata.
Which()) {
2225 if (
prot[
prot.size() - 1] ==
'*') {
2235 if (mi && mi->IsSetCompleteness()) {
2242 }
catch (
const std::exception&) {
2263 mix.
Set().push_back(*it);
2335 if (
prev.IsSetExcept() &&
prev.GetExcept() &&
prev.IsSetExcept_text()) {
2346 for (
auto it : currP.
GetName()) {
2351 for (
auto it : prevP.
GetName()) {
2398 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \
2399 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId())
2414 if (entry.
IsSeq()) {
2436 if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech() && (*it)->GetMolinfo().GetTech() ==
CMolInfo::eTech_wgs) {
2457 if (entry.
IsSeq()) {
2549 if ((*id)->IsPdb()) {
2600 && (*it)->GetSet().IsSetClass()
2609 if ((*loc)->IsNull()) {
2613 if (locs.size() - nulls < parts.size()) {
2615 "Parts set contains too many Bioseqs", seq);
2617 }
else if (locs.size() - nulls > parts.size()) {
2619 "Parts set does not contain enough Bioseqs", seq);
2625 size_t size = locs.size();
2626 CSeg_ext::Tdata::const_iterator loc_it = locs.begin();
2627 CBioseq_set::TSeq_set::const_iterator part_it = parts.begin();
2628 for (
size_t i = 0;
i <
size; ++
i) {
2630 if ((*loc_it)->IsNull()) {
2634 if (! (*part_it)->IsSeq()) {
2636 "Parts set component is not Bioseq", seq);
2640 if (!
IsIdIn(loc_id, (*part_it)->GetSeq())) {
2642 "Segmented bioseq seq_ext does not correspond to parts "
2643 "packaging order", seq);
2651 ERR_POST_X(4,
"Seq-loc not for unique sequence");
2654 string err_msg =
"Unknown error:";
2655 err_msg += x1.
what();
2658 }
catch (std::exception& x2) {
2659 string err_msg =
"Unknown error:";
2660 err_msg += x2.what();
2673 if (! inst.
IsSetExt())
return false;
2677 if (! (*iter)->IsLiteral())
continue;
2689 bool has_gap =
false;
2692 if ((*iter)->IsLiteral() &&
2693 (! (*iter)->GetLiteral().IsSetSeq_data() || (*iter)->GetLiteral().GetSeq_data().IsGap())) {
2709 string title = sequence::CDeflineGenerator().GenerateDefline(bsh);
2725 "Complete genome in title without complete flag set",
2734 "Circular topology without complete flag set",
ctx, *desc);
2742 "Title contains 'complete genome' but sequence has gaps", seq);
2756 if (! (*sg))
continue;
2773 if (! (*sg) )
continue;
2804 if (! (*sg))
continue;
2836 "WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence.", seq);
2845 "TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence.", seq);
2854 "Genome submission includes wrong gap type. Gaps for genomes should be Assembly Gaps with linkage evidence.", seq);
2894 bool has_biosample =
false;
2895 bool has_bioproject =
false;
2903 has_biosample =
true;
2907 has_bioproject =
true;
2919 for (
auto it : uo.
GetData()) {
2920 if (! it->GetLabel().IsStr()) {
2923 const string&
label = it->GetLabel().GetStr();
2926 const string&
str = it->GetData().GetStr();
2927 auto fst =
str.find_first_of(
"0123456789");
2931 const string&
str = it->GetData().GetStr();
2932 auto lst =
str.find_first_of(
"0123456789");
2936 if ((fr != 0) && (to != 0)) {
2937 int df = to - fr + 1;
2950 if (! has_biosample && ! has_bioproject) {
2952 "WGS master lacks both BioSample and BioProject",
2954 }
else if (! has_biosample) {
2956 "WGS master lacks BioSample",
2958 }
else if (! has_bioproject) {
2960 "WGS master lacks BioProject",
2963 if (! has_biosample || ! has_bioproject) {
2971 bool only_local =
true;
2972 bool is_NCACNTNW =
false;
2973 bool is_patent =
false;
2975 if (! (*id_it)->IsLocal()) {
2977 if ((*id_it)->IsPatent()) {
2985 if (is_NCACNTNW || is_patent) {
2987 }
else if (is_circular) {
2989 }
else if (only_local) {
3011 if (vec[
i] ==
'N') {
3014 if (max_stretch < this_stretch) {
3015 max_stretch = this_stretch;
3020 if (this_stretch >= 10) {
3024 if (vec.
size() > 20 &&
i > vec.
size() - 10) {
3030 if (max_stretch < this_stretch) {
3031 max_stretch = this_stretch;
3036 if (max_stretch < this_stretch) {
3037 max_stretch = this_stretch;
3062 if (max_stretch >= 15) {
3069 "Sequence has a stretch of at least 10 Ns within the first 20 bases", seq);
3074 "Sequence has a stretch of at least 10 Ns within the last 20 bases", seq);
3086 bool at_least_one =
false;
3088 for (
CSeqVector_CI sv_iter(vec); (sv_iter) && rval; ++sv_iter) {
3089 if (*sv_iter !=
'N') {
3092 at_least_one =
true;
3096 return (rval && at_least_one);
3103 switch (seq_data.
Which()) {
3105 vector<char>::const_iterator it = seq_data.
GetNcbi4na().
Get().begin();
3106 unsigned char mask = 0xf0;
3107 unsigned char shift = 4;
3108 for (
size_t n = 0;
n <
len;
n++) {
3109 unsigned char c = ((*it) &
mask) >> shift;
3125 for (
size_t n = 0;
n <
len &&
n < s.length();
n++) {
3139 for (
size_t n = 0;
n <
len;
n++) {
3157 for (
CSeqMap_CI seq_iter(bsh, sel); seq_iter; ++seq_iter) {
3158 switch (seq_iter.GetType()) {
3160 count +=
CountNs(seq_iter.GetData(), seq_iter.GetLength());
3193 bool is_first =
true;
3197 if ((*iter)->IsLoc()) {
3200 if ((*iter)->IsLiteral()) {
3257 int max_stretch = 0;
3258 auto IsN = [](
char c) {
return c ==
'N'; };
3260 for (
auto begin_it = find_if_not(begin(vec), end(vec), IsN);
3261 begin_it != end(vec);) {
3262 auto distanceToEnd = distance(begin_it, end(vec));
3264 auto interval = (distanceToEnd > threshold) ? threshold : distanceToEnd;
3265 auto end_it = find_if(begin_it,
next(begin_it, interval), IsN);
3266 const auto current_stretch = distance(begin_it, end_it);
3267 if (current_stretch >= threshold) {
3271 if (current_stretch > max_stretch) {
3272 max_stretch = current_stretch;
3274 begin_it = find_if_not(end_it, end(vec), IsN);
3326 bool begin_ambig =
false, end_ambig =
false;
3334 bool is_circular =
false;
3357 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases",
3362 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases",
3377 TSeqPos num_ns = 0, max_stretch = 0;
3380 int pct_n = (num_ns * 100) / seq.
GetLength();
3386 if (max_stretch >= 15) {
3392 "Sequence has a stretch of at least 10 Ns within the first 20 bases", seq);
3396 "Sequence has a stretch of at least 10 Ns within the last 20 bases", seq);
3402 int pct_n =
PctNs(bsh);
3418 }
catch (exception&) {
3455 vector<TSeqPos> gapPositions;
3464 for (; gap_it; ++gap_it) {
3469 gapPositions.push_back(gp_start);
3470 gapPositions.push_back(gp_end);
3477 vector<TSeqPos> featPositions;
3483 for (; feat_it; ++feat_it) {
3494 featPositions.push_back(ft_start);
3495 featPositions.push_back(ft_end);
3502 int remaininig_gaps = (
int) gapPositions.size() / 2;
3503 int remaining_feats = (
int) featPositions.size() / 2;
3505 if (remaininig_gaps < 1 || remaining_feats < 1) {
3512 TSeqPos gap_start = gapPositions[gap_idx];
3514 TSeqPos gap_end = gapPositions[gap_idx];
3518 TSeqPos feat_start = featPositions[feat_idx];
3520 TSeqPos feat_end = featPositions[feat_idx];
3526 while (remaininig_gaps >= 0 && remaining_feats >= 0) {
3527 if (gap_end < feat_start) {
3528 if (remaininig_gaps <= 0) {
3531 gap_start = gapPositions[gap_idx];
3533 gap_end = gapPositions[gap_idx];
3536 }
else if (feat_end < gap_start) {
3537 if (remaining_feats <= 0) {
3540 feat_start = featPositions[feat_idx];
3542 feat_end = featPositions[feat_idx];
3547 if (feat_start != gap_start || feat_end != gap_end) {
3550 if (remaininig_gaps <= 0) {
3553 gap_start = gapPositions[gap_idx];
3555 gap_end = gapPositions[gap_idx];
3558 if (remaining_feats <= 0) {
3561 feat_start = featPositions[feat_idx];
3563 feat_end = featPositions[feat_idx];
3571 }
catch (
const exception&) {
3586 "Fuzzy length on " + rpr +
" Bioseq", seq);
3593 "Invalid Bioseq length [" +
len +
"]", seq);
3607 "HTGS 2 raw seq has no gaps and no graphs", seq);
3624 "Using a nucleic acid alphabet on a protein sequence",
3636 "Using a protein alphabet on a nucleic acid",
3645 "Sequence alphabet not set",
3650 bool check_alphabet =
false;
3651 unsigned int factor = 1;
3657 check_alphabet =
true;
3677 "Sequence alphabet not set",
3682 if (calc_len % factor) {
3691 if (calc_len > data_len) {
3693 "Bioseq.seq_data too short [" + data_len_str +
3694 "] for given length [" + s_len +
"]", seq);
3696 }
else if (calc_len < data_len) {
3698 "Bioseq.seq_data is larger [" + data_len_str +
3699 "] than given length [" + s_len +
"]", seq);
3702 if (check_alphabet) {
3703 unsigned int trailingX = 0;
3705 bool leading_x =
false, found_lower =
false, cds_5_prime =
false;
3712 for (
CSeqVector_CI sv_iter(*sv), sv_res_iter(sv_res); (sv_iter) && (sv_res_iter); ++sv_iter, ++sv_res_iter) {
3718 }
else if (res ==
'*' && bsh.
IsAa()) {
3720 }
else if (res ==
'-' && bsh.
IsAa()) {
3728 if (++bad_cnt > 10) {
3730 "More than 10 invalid residues. Checking stopped",
3742 string msg =
"Invalid";
3743 if (seq.
IsNa() && strchr (
"EFIJLOPQXZ", res) !=
NULL) {
3744 msg +=
" nucleotide";
3745 }
else if (seq.
IsNa() && res ==
'U') {
3746 msg +=
" nucleotide";
3762 }
else if (res ==
'-' || sv->IsInGap(pos - 1)) {
3764 }
else if (res ==
'*') {
3766 }
else if (res ==
'X') {
3772 string msg =
"Invalid residue [";
3787 if (seq.
IsAa() && (leading_x || trailingX > 0)) {
3797 cds_seq = cds_seq.substr(1);
3799 cds_seq = cds_seq.substr(2);
3806 if (cds_seq.length() >= 3) {
3807 string lastcodon = cds_seq.substr(cds_seq.length() - 3);
3828 "Sequence starts with leading X", seq);
3833 string msg =
"Sequence ends in " +
3835 if (trailingX > 1) {
3843 "Sequence contains lower-case characters", seq);
3846 if (terminations > 0 || dashes > 0) {
3863 string protein_label;
3869 if (! prots.empty()) {
3871 prots[0].GetData().GetProt();
3873 protein_label = first_prot.
GetName().front();
3877 }
catch (
const std::exception&) {
3881 gene_label =
"gene?";
3884 protein_label =
"prot?";
3888 if (gap_at_start && dashes == 1) {
3890 "gap symbol at start of protein sequence (" + gene_label +
" - " + protein_label +
")",
3892 }
else if (gap_at_start) {
3894 "gap symbol at start of protein sequence (" + gene_label +
" - " + protein_label +
")",
3897 "[" +
NStr::SizetToString (dashes - 1) +
"] internal gap symbols in protein sequence (" + gene_label +
" - " + protein_label +
")",
3901 "[" +
NStr::SizetToString (dashes) +
"] internal gap symbols in protein sequence (" + gene_label +
" - " + protein_label +
")",
3906 if (terminations > 0) {
3907 string msg =
"[" +
NStr::SizetToString(terminations) +
"] termination symbols in protein sequence";
3908 msg +=
" (" + gene_label +
" - " + protein_label +
")";
3919 bool is_wgs =
IsWGS(bsh);
3923 bool has_gap_char =
false;
3928 const size_t run_len_cutoff = ( is_wgs ? 20 : 100 );
3929 for (
CSeqVector_CI sv_iter(sv); (sv_iter); ++sv_iter, ++pos) {
3939 has_gap_char =
true;
3945 if (run_len >= run_len_cutoff && start_pos > 1) {
3957 "Raw nucleotide should not contain gap characters", seq);
3969 string id_test_label;
3986 if (seqlen > loclen) {
3991 }
else if (seqlen < loclen) {
4010 list<CRef<CSeq_loc>>::const_iterator i2 = i1;
4011 for (++i2; i2 != locs.end(); ++i2) {
4019 if ((**i1).IsWhole() && (**i2).IsWhole()) {
4022 "Segmented sequence has multiple references to " +
4027 "Segmented sequence has multiple references to " +
4028 sid +
" that are not SEQLOC_WHOLE", seq);
4039 bool got_partial =
false;
4041 if (! (*sd)->IsMolinfo() || ! (*sd)->GetMolinfo().IsSetCompleteness()) {
4045 switch ((*sd)->GetMolinfo().GetCompleteness()) {
4050 "Complete segmented sequence with MolInfo partial", seq);
4056 "No-left inconsistent with segmented SeqLoc",
4064 "No-right inconsistent with segmented SeqLoc",
4072 "No-ends inconsistent with segmented SeqLoc",
4081 if (! got_partial) {
4083 "Partial segmented sequence without MolInfo partial", seq);
4114 if ((*it)->IsSwissprot()) {
4126 }
else if (
cmp > 0) {
4132 if (start1 < start2) {
4134 }
else if (start2 < start1) {
4141 if (stop1 < stop2) {
4161 }
else if ((*sg)->IsLoc()) {
4162 const CSeq_id*
id = (*sg)->GetLoc().GetId();
4180 if (! loc.
IsInt()) {
4189 far_loc->
SetInt().SetFrom(start - 2);
4190 far_loc->
SetInt().SetTo(start - 1);
4199 far_loc->
SetInt().SetFrom(stop + 1);
4200 far_loc->
SetInt().SetTo(stop + 2);
4218 "Delta seq component should not be of type whole", seq);
4225 "Delta component is gi|0", seq);
4240 if (seq_len <= stop) {
4241 string id_label =
id->AsFastaString();
4244 +
") greater than length of " + id_label
4249 string id_label =
id->AsFastaString();
4251 "Scaffold points to some but not all of " +
4252 id_label +
", excluded portion contains features", seq);
4256 "Unable to find far delta sequence component", seq);
4259 }
catch (
const std::exception&) {
4270 "-1 length on seq-loc of delta seq_ext", seq);
4273 if (loc_str.empty()) {
4278 "Short length (-1) on seq-loc (" + loc_str +
") of delta seq_ext", seq);
4283 if (loc_len <= 10) {
4286 if (loc_str.empty()) {
4292 ") on seq-loc (" + loc_str +
") of delta seq_ext", seq);
4300 if (loc_str.empty()) {
4304 "No length for Seq-loc (" + loc_str +
") of delta seq-ext",
4314 }
else if (seg.
IsLoc()) {
4333 "proximity ligation",
4351 bool is_unspec =
false;
4356 int linktype = evidence.
GetType();
4357 if (linktype == 8) {
4407 "No CDelta_ext data for delta Bioseq", seq);
4410 bool any_tech_ok =
false;
4411 bool has_gi =
false;
4416 }
else if ((*id_it)->IsGi()) {
4421 if (! any_tech_ok && seq.
IsNa()
4435 bool last_is_gap =
false;
4436 int prev_gap_linkage = -1;
4438 int gap_linkage = -1;
4440 size_t num_gaps = 0;
4441 size_t num_adjacent_gaps = 0;
4442 bool non_interspersed_gaps =
false;
4444 int num_gap_known_or_spec = 0;
4445 int num_gap_unknown_unspec = 0;
4447 vector<CConstRef<CSeq_loc> > delta_locs;
4453 "NULL pointer in delta seq_ext valnode (segment " +
4457 switch ((**sg).Which()) {
4459 const CSeq_loc& loc = (**sg).GetLoc();
4461 delta_locs.push_back(
tmp);
4465 if (! last_is_gap && !
first) {
4466 non_interspersed_gaps =
true;
4468 last_is_gap =
false;
4469 prev_gap_linkage = -1;
4484 "Seq-lit of length 0 in delta chain", seq);
4489 if (! last_is_gap && !
first) {
4490 non_interspersed_gaps =
true;
4492 last_is_gap =
false;
4493 prev_gap_linkage = -1;
4496 vector<TSeqPos> badIdx;
4498 const string* ss =
nullptr;
4499 switch (data.
Which()) {
4511 ITERATE (vector<TSeqPos>, ci, badIdx) {
4513 "Invalid residue [" +
4523 ITERATE (vector<TSeqPos>, it, badIdx) {
4525 "Invalid residue [" +
4526 ss->substr(*it, 1) +
"] at position [" +
4535 if (max_ns >= 0 && adjacent_ns >
unsigned(max_ns)) {
4554 num_gap_unknown_unspec++;
4556 num_gap_known_or_spec++;
4570 "First delta seq component is a gap", seq);
4574 (prev_gap_type == gap_type ||
4575 prev_gap_linkage != gap_linkage ||
4578 ++num_adjacent_gaps;
4587 "Gap of length 0 in delta chain", seq);
4590 "Gap of length 0 with unknown fuzz in delta chain", seq);
4595 "Gap of unknown length should have length 100", seq);
4599 prev_gap_type = gap_type;
4600 prev_gap_linkage = gap_linkage;
4607 "CDelta_seq::Which() is e_not_set", seq);
4611 if (num_gap_unknown_unspec > 0 && num_gap_known_or_spec == 0) {
4612 if (num_gap_unknown_unspec > 1) {
4615 " Seq-gaps have unknown type and unspecified linkage", seq);
4618 "Single Seq-gap has unknown type and unspecified linkage", seq);
4633 if (non_interspersed_gaps && ! has_gi && mi &&
4647 "HTGS delta seq should have gaps between all sequence runs", seq);
4649 if (num_adjacent_gaps >= 1) {
4650 string msg = (num_adjacent_gaps == 1) ?
4651 "There is 1 adjacent gap in delta seq" :
4653 " adjacent gaps in delta seq";
4663 "Last delta seq component is a gap", seq);
4667 if (num_gaps == 0 && mi) {
4672 "HTGS 2 delta seq has no gaps and no graphs", seq);
4677 if (delta_locs.size() > 1) {
4679 vector<CConstRef<CSeq_loc>>::iterator it1 = delta_locs.begin();
4680 vector<CConstRef<CSeq_loc>>::iterator it2 = it1;
4682 while (it2 != delta_locs.end()) {
4685 string seq_label = (*it1)->GetId()->AsFastaString();
4691 +
" on a Bioseq " + seq_label,
4701 "Self-referential delta sequence", seq);
4710 if (delta_i->Empty()) {
4718 if (res ==
'N' && ! sv.
IsInGap(pos - 1)) {
4720 "Ambiguous residue N is adjacent to a gap around position " +
NStr::SizetToString (pos + 1),
4725 if (delta_len > 0 && pos + delta_len <
len) {
4726 if (sv.
IsInGap(pos + delta_len - 1)) {
4728 if (res ==
'N' && ! sv.
IsInGap(pos + delta_len)) {
4730 "Ambiguous residue N is adjacent to a gap around position " +
NStr::SizetToString(pos + delta_len + 1),
4738 }
catch (
const std::exception&) {
4747 bool has_gi =
false;
4749 if ((*id_it)->IsGi()) {
4762 int linkevarray[13];
4763 for (
int i = 0;
i < 13;
i++) {
4766 bool is_unspec =
false;
4771 int linktype = evidence.
GetType();
4772 if (linktype == 8) {
4776 if (linktype == 255) {
4777 (linkevarray[11])++;
4778 }
else if (linktype < 0 || linktype > 10) {
4779 (linkevarray[12])++;
4781 (linkevarray[linktype])++;
4784 if (linkevarray[8] > 0 && linkcount > linkevarray[8]) {
4786 "Seq-gap type has unspecified and additional linkage evidence", seq);
4788 for (
int i = 0;
i < 13;
i++) {
4789 if (linkevarray[
i] > 1) {
4797 "Seq-gap with linkage evidence must have linkage field set to linked", seq);
4808 if (linkevarray[8] > 0 && linkcount == linkevarray[8]) {
4812 "Contamination gaps must have linkage evidence 'unspecified'", seq);
4817 " should not have linkage evidence", seq);
4826 "Seq-gap type == scaffold is missing required linkage evidence", seq);
4829 bool suppress_SEQ_INST_SeqGapProblem =
false;
4833 if ((**it).IsCreate_date())
4837 suppress_SEQ_INST_SeqGapProblem =
true;
4842 if (! suppress_SEQ_INST_SeqGapProblem)
4844 "Seq-gap type == repeat and linkage == linked is missing required linkage evidence", seq);
4849 "Contamination gap-types must be linked and have linkage-evidence of type 'unspecified'", seq);
4866 rpr =
"constructed";
4868 const string err0 =
"Bioseq-ext not allowed on " + rpr +
" Bioseq";
4869 const string err1 =
"Missing or incorrect Bioseq-ext on " + rpr +
" Bioseq";
4870 const string err2 =
"Missing Seq-data on " + rpr +
" Bioseq";
4871 const string err3 =
"Seq-data not allowed on " + rpr +
" Bioseq";
4940 "Invalid Bioseq->repr = " +
4963 if (! it->IsLoc())
continue;
4965 if (! hdl)
continue;
4973 if (parent_location == cgenome)
break;
5023 "Transgenic source descriptor requires presence of source feature",
5030 "Genome difference between parent and component",
5037 "Mitochondrial Metazoan sequences should be less than 65000 bp",
5049 "No Mol-info applies to this Bioseq",
5067 if (all_feat_it->IsSetCit() || all_feat_it->GetData().IsPub()) {
5080 if (closest_molinfo) {
5083 "Suspicious use of complete",
ctx, *closest_molinfo);
5086 "Suspicious use of complete", seq);
5113 sequence::CDeflineGenerator defline_generator;
5114 title = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
5123 bool reported =
false;
5129 if ((*it)->IsGenbank()) {
5140 "Circular topology has complete flag set, but title should say complete sequence or complete genome",
5215 if (it->IsSeq() && it->GetSeq().IsSetInst_Repr() &&
5217 parent = it->GetSeq();
5250 vector<CConstRef<CSeq_feat>> containing_genes;
5251 vector<int> num_contained;
5254 vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();
5255 vector<int>::iterator nit = num_contained.begin();
5256 while (cit != containing_genes.end() && nit != num_contained.end()) {
5262 if (n_right < left) {
5270 cit = containing_genes.erase(cit);
5271 nit = num_contained.erase(nit);
5281 containing_genes.push_back(ref);
5282 num_contained.push_back(0);
5285 vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();
5286 vector<int>::iterator nit = num_contained.begin();
5287 while (cit != containing_genes.end() && nit != num_contained.end()) {
5296 }
catch (
const exception& e) {
5298 string(
"Exception while validating bioseq MultipleGeneOverlap. EXCEPTION: ") +
5306 string msg(
"gene [");
5311 msg +=
"] overlaps CDS but does not completely contain it";
5320 msg +=
"] overlaps mRNA but does not completely contain it";
5343 if (!
label.empty()) {
5351 if (!
label.empty()) {
5367 if (connected_gene) {
5410 if ((*db)->CanGetDb() &&
5422 if (vec.
IsInGap(pos) || vec[pos] ==
'N') {
5442 if (pos <
len - after && vec.
IsInGap(pos + after)) {
5462 if (pos >= before && vec.
IsInGap(pos - before)) {
5485 for (
CSeq_loc_CI sl_iter(loc); sl_iter; ++sl_iter) {
5492 if (
first.GetStrand() !=
last.GetStrand()) {
5515 if (start >=
len || stop >=
len) {
5520 swap(acceptor, donor);
5521 stop =
len - donor - 1;
5522 start =
len - acceptor - 1;
5545 }
catch (exception&) {
5555 if ((res1 ==
'G' && res2 ==
'T') ||
5556 (res1 ==
'G' && res2 ==
'C')) {
5562 }
catch (exception&) {
5571 if ((res1 ==
'A') && (res2 ==
'G')) {
5577 }
catch (exception&) {
5590 switch (partial_type) {
5650 vector<CMappedFeat> children = tr->GetChildren(feat);
5651 ITERATE(vector<CMappedFeat>, it, children) {
5661 bool look_for_gene =
true;
5666 if (cds_children.size() > 0) {
5667 look_for_gene =
false;
5668 for (
auto it = cds_children.begin(); it != cds_children.end(); it++) {
5685 if (! rval && look_for_gene) {
5702 bool bad_seq =
false;
5703 bool is_gap =
false;
5704 bool abuts_n =
false;
5712 #ifdef USE_FEAT_TREE_FOR_EXON
5732 const CSeq_loc& mrna_loc = s->second->GetLocation();
5761 "PartialLocation: Improper use of partial (greater than or less than)", feat);
5780 "PartialLocation: Internal partial intervals do not include first/last residue of sequence", feat);
5807 if (! partial_start && ! partial_stop) {
5829 if (intron_start == stop + 1 && partial_stop) {
5832 if (intron_start > stop + 1) {
5835 if (start > 0 && partial_start) {
5837 if (intron_stop == start - 1) {
5857 string msg = (partial_type == 0 ?
"Start" :
"Stop");
5858 msg +=
" does not include first/last residue of ";
5861 bool organelle =
false;
5862 bool not_expected =
false;
5863 if (at_splice_or_gap) {
5869 msg +=
"organelle ";
5873 not_expected =
true;
5883 msg +=
" (organelle does not use standard splice site convention)";
5886 msg +=
" (but is at consensus splice site)";
5895 if (partial_type == 0) {
5898 }
else if (organelle) {
5906 }
else if (organelle) {
5913 PostErr(sev, err_type, msg, feat);
5928 "Feature products should be entire sequences.", *(feat.
GetSeq_feat()));
5949 bool no_nonconsensus_except =
true;
5953 if (
NStr::Find (except_text,
"nonconsensus splice site") != string::npos ||
5954 NStr::Find (except_text,
"heterogeneous population sequenced") != string::npos ||
5955 NStr::Find (except_text,
"low-quality sequence region") != string::npos ||
5956 NStr::Find (except_text,
"artificial location") != string::npos) {
5957 no_nonconsensus_except =
false;
5962 string comment_text;
5969 "Partial CDS on complete sequence",
5975 for (
int j = 0; j < 2; ++j) {
5976 if (partial_loc & errtype) {
5977 bool bad_seq =
false;
5978 bool is_gap =
false;
5979 bool abuts_n =
false;
5995 }
else if (bad_seq) {
5998 "PartialLocation: Start does not include first/last residue of sequence (and is at bad sequence)" :
5999 "PartialLocation: Stop does not include first/last residue of sequence (and is at bad sequence)"),
6003 &&
NStr::Find(except_text,
"rearrangement required for product") != string::npos) {
6006 NStr::Find(comment_text,
"coding region disrupted by sequencing gap") != string::npos) {
6010 }
else if (! no_nonconsensus_except) {
6021 "5' partial is not at beginning of sequence, gap, or consensus splice site",
6025 "3' partial is not at end of sequence, gap, or consensus splice site",
6034 "Start does not include first/last residue of sequence", *(feat.
GetSeq_feat()));
6035 }
else if (j == 1) {
6037 "Stop does not include first/last residue of sequence", *(feat.
GetSeq_feat()));
6072 const CBioseq& seq,
bool is_complete)
6077 if ((*it)->IsGenbank()) {
6078 if ((*it)->GetGenbank().IsSetAccession()) {
6079 accession = (*it)->GetGenbank().GetAccession();
6082 }
else if ((*it)->IsDdbj()) {
6083 if ((*it)->GetDdbj().IsSetAccession()) {
6084 accession = (*it)->GetDdbj().GetAccession();
6087 }
else if ((*it)->IsGi()) {
6093 unsigned int nummrna = 0, numcds = 0, numcrgn = 0, numvseg = 0, numdseg = 0, numjseg = 0;
6094 int numgene = 0, num_pseudomrna = 0, num_pseudocds = 0, num_rearrangedcds = 0;
6095 vector< CConstRef < CSeq_id > > cds_products, mrna_products;
6097 int num_full_length_prot_ref = 0;
6104 bool is_emb =
false, non_pseudo_16S_rRNA =
false;
6107 if ((*seq_it)->IsEmbl()) {
6109 }
else if ((*seq_it)->IsOther()) {
6114 int firstcdsgencode = 0;
6115 bool mixedcdsgencodes =
false;
6128 string locus = gene_ref.
GetLocus();
6131 const CSeq_feat& gene_feat = gene_it->GetOriginalFeature();
6138 "locus collides with locus_tag in another gene", feat);
6148 cds_products.push_back(ref);
6157 num_rearrangedcds++;
6166 if ((*it)->IsId()) {
6167 cdsgencode = (*it)->GetId();
6170 if (cdsgencode != 0) {
6171 if (firstcdsgencode == 0) {
6172 firstcdsgencode = cdsgencode;
6173 }
else if (firstcdsgencode != cdsgencode) {
6174 mixedcdsgencodes =
true;
6184 mrna_products.push_back(ref);
6200 non_pseudo_16S_rRNA =
true;
6225 if ((
range.IsWhole() ||
6230 num_full_length_prot_ref++;
6239 "Genes on protein sequences with PGAP annotation should not have locus tags.", feat);
6245 "Invalid feature for a protein Bioseq.", feat);
6260 bool slippage_except =
false;
6261 bool circular_rna =
false;
6269 if ((! excpet || ! slippage_except) && ! circular_rna) {
6272 "Multi-interval CDS feature is invalid on an mRNA "
6282 "mRNA feature is invalid on an mRNA (cDNA) Bioseq.",
6288 if (imp.
GetKey() ==
"intron") {
6290 "Invalid feature for an mRNA Bioseq.", feat);
6301 "Feature has 'far' location - accession not packaged in record",
6313 bool isEukaryote =
false;
6314 bool isMicrosporidia =
false;
6323 isMicrosporidia =
true;
6327 if (isEukaryote && (! isMicrosporidia) &&
6338 "Improper 16S ribosomal RNA",
6345 if (mixedcdsgencodes) {
6351 "Multiple CDS genetic codes on sequence", seq);
6356 if (is_aa && num_full_length_prot_ref == 0) {
6365 const CSeq_feat& prot_feat = it->GetOriginalFeature();
6368 if ((
range.IsWhole() ||
6369 (
range.GetFrom() == 0 &&
range.GetTo() == parent_len - 1)) &&
6373 num_full_length_prot_ref++;
6375 }
catch (
const exception&) {
6377 if ((
range.IsWhole() ||
6378 (
range.GetFrom() == 0 &&
range.GetTo() == parent_len - 1)) &&
6379 (! it->GetData().GetProt().IsSetProcessed() ||
6382 num_full_length_prot_ref++;
6389 if (is_aa && num_full_length_prot_ref == 0 && ! is_virtual && !
m_Imp.
IsPDB()) {
6393 if (is_aa && num_full_length_prot_ref > 1 && !
SeqIsPatent(seq)) {
6396 +
" full-length protein features present on protein", seq);
6411 bool cds_products_unique =
true;
6412 if (cds_products.size() > 1) {
6413 stable_sort(cds_products.begin(), cds_products.end(),
s_SeqIdCompare);
6417 bool mrna_products_unique =
true;
6418 if (mrna_products.size() > 1) {
6419 stable_sort(mrna_products.begin(), mrna_products.end(),
s_SeqIdCompare);
6423 if (numcds > 0 && nummrna > 1) {
6424 if (cds_products.size() > 0 && cds_products.size() + num_pseudocds + num_rearrangedcds != numcds) {
6430 if (cds_products.size() > 0 && (! cds_products_unique)) {
6432 "CDS products are not unique", seq);
6434 if (mrna_products.size() > 0 && mrna_products.size() + num_pseudomrna != nummrna) {
6440 if (mrna_products.size() > 0 && (! mrna_products_unique)) {
6442 "mRNA products are not unique", seq);
6465 }
catch (
const exception& e) {
6466 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
6468 string(
"Exception while validating Seqfeat Context. EXCEPTION: ") +
6486 if ((*id_it)->IsGi()) {
6487 return (*id_it)->GetGi();
6541 bool match1 =
false, match2 =
false;
6542 bool has1 =
false, has2 =
false;
6544 if ((*itx)->IsSetId()) {
6553 if ((*itx)->IsSetId()) {
6561 if ((has1 || has2) && (! match1 || ! match2)) {
6580 if (
id.GetGi() == gi) {
6588 }
catch (
const std::exception&) {
6681 const CSeq_loc& utr5_loc = s->second->GetLocation();
6684 if (utr5_start == mrna_start) {
6685 if (mrna_stop >= utr5_stop && mrna_stop - utr5_stop < 6) {
6687 }
else if (utr5_stop >= mrna_stop && utr5_stop - mrna_stop < 6) {
6696 const CSeq_loc& utr3_loc = s->second->GetLocation();
6699 if (utr3_stop == mrna_stop) {
6700 if (mrna_start >= utr3_start && mrna_start - utr3_start < 6) {
6702 }
else if (utr3_start >= mrna_start && utr3_start - mrna_start < 6) {
6720 m_ProductsUnique(
true)
6776 if (unmatched_mrnas.
empty()) {
6781 if (! (*xref_it)->IsSetId() ||
6782 ! (*xref_it)->GetId().IsLocal()) {
6786 ITERATE(vector<CSeq_feat_Handle>, h, handles) {
6791 if (mrna_it != unmatched_mrnas.
end()) {
6794 unmatched_mrnas.
erase(mrna_it);
6806 if (unmatched_mrnas.
empty()) {
6814 if (feats.size() == 0) {
6816 while (mrna_it != unmatched_mrnas.
end()) {
6817 if (
Overlaps(mrna_it->second->GetSeqfeat())) {
6820 unmatched_mrnas.
erase(mrna_it);
6828 if (mrna_it != unmatched_mrnas.
end()) {
6834 unmatched_mrnas.
erase(mrna_it);
6847 string product_string;
6850 return product_string;
6855 return product_string;
6861 list<CConstRef<CSeq_feat>>::iterator it =
m_OtherMrnas.begin();
6862 list<string> product_strings;
6865 if (mrna_it == unmatched_mrnas.
end()) {
6869 product_strings.push_back(product_string);
6879 const auto num_products = product_strings.size();
6880 if (product_strings.size() > 1) {
6882 product_strings.sort();
6883 product_strings.unique();
6884 const auto num_unique_products = product_strings.size();
6885 if (num_unique_products == num_products) {
6928 if (num_mrnas < 2) {
6934 +
" mRNAs, but product locations are unique",
6953 const auto& cds_feat = cds_match.
GetSeqfeat();
6958 if (xrefs_match == 2) {
6960 "MrnaProteinLink inconsistent with feature ID cross-references",
6968 size_t start_pos =
NStr::Find(protein_id,
"gnl|");
6969 if (start_pos == string::npos) {
6972 start_pos =
NStr::Find(protein_id,
"|", start_pos + 5);
6973 if (start_pos == string::npos) {
6976 size_t end_pos =
NStr::Find(protein_id,
"|", start_pos + 1);
6978 if (end_pos == string::npos) {
6979 prot_tag = protein_id.substr(start_pos + 1);
6981 prot_tag = protein_id.substr(start_pos + 1, end_pos - start_pos - 1);
7004 "CDS-mRNA pair has one missing protein_id (" + protein_id +
")", cds);
7012 "CDS-mRNA pair has mismatching protein_ids (" +
7020 if ((*id_it)->IsGeneral()) {
7023 "CDS-mRNA pair has mismatching protein_ids (" +
7024 (*id_it)->AsFastaString() +
", " + protein_id +
")", cds);
7038 if (
NStr::Equal(protein_id, (*id_it)->AsFastaString())) {
7047 "CDS-mRNA pair has one missing protein_id (" + protein_id +
")", cds);
7057 const auto& cds_feat = cds_match.
GetSeqfeat();
7058 string cds_transcript_id;
7059 string mrna_transcript_id;
7060 string mrna_protein_id;
7061 bool must_reconcile =
false;
7062 if (mrna_feat.IsSetQual()) {
7064 if ((*q)->IsSetQual() && (*q)->IsSetVal()) {
7066 mrna_transcript_id = (*q)->GetVal();
7067 must_reconcile =
true;
7069 mrna_protein_id = (*q)->GetVal();
7070 must_reconcile =
true;
7075 if (cds_feat.IsSetQual()) {
7077 if ((*q)->IsSetQual() && (*q)->IsSetVal()) {
7079 cds_transcript_id = (*q)->GetVal();
7080 must_reconcile =
true;
7086 if (must_reconcile) {
7087 if (!
NStr::Equal(mrna_transcript_id, cds_transcript_id)) {
7089 "CDS-mRNA pair has mismatching transcript_ids ("
7090 + cds_transcript_id +
"," + mrna_transcript_id +
")",
7150 for (
auto it : feat.
GetQual()) {
7236 if (strand1 == strand2) {
7243 "No parent for (pseudo) CdRegion", ft1);
7246 "No parent for CdRegion", ft1);
7254 "No parent for (pseudo) CdRegion", ft2);
7257 "No parent for CdRegion", ft2);
7271 unsigned int lclcds = 0, lclcrgn = 0, lclvseg = 0, lcldseg = 0, lcljseg = 0, lclnone = 0, lclothr = 0;
7278 if (sbt == CSeqFeatData::ESubtype::eSubtype_cdregion) {
7285 if (ptyp == CSeqFeatData::ESubtype::eSubtype_C_region) {
7287 }
else if (ptyp == CSeqFeatData::ESubtype::eSubtype_V_segment) {
7289 }
else if (ptyp == CSeqFeatData::ESubtype::eSubtype_D_segment) {
7291 }
else if (ptyp == CSeqFeatData::ESubtype::eSubtype_J_segment) {
7309 locus = gene.GetLocus();
7311 locus = gene.GetLocus_tag();
7313 CConstRef<CSeq_loc> gloc = gne->GetMappedLocation();
7316 gloc->GetLabel(&locus);
7321 if (locus.length() > 0) {
7322 PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
7323 "No parent for CdRegion (gene is " + locus +
")", *sf);
7325 PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
7326 "No parent for CdRegion", *sf);
7349 list<CRef<CCdsMatchInfo>> cds_list;
7355 if (! mapped_feat.IsSetData()) {
7359 if (mapped_feat.GetData().IsCdregion()) {
7360 const auto& cds_feat = *mapped_feat.GetSeq_feat();
7365 if (cds_feat.IsSetPseudo() && cds_feat.GetPseudo()) {
7366 cds_match->SetPseudo();
7374 cds_list.push_back(cds_match);
7376 const auto& feat = *mapped_feat.GetSeq_feat();
7381 if (! mrna_map.
empty()) {
7385 const size_t num_mrna = mrna_map.
size();
7388 for (
auto&& cds : cds_list) {
7389 cds->AssignXrefMatch(mrna_map, tse);
7392 if (! mrna_map.
empty()) {
7393 for (
auto&& cds : cds_list) {
7394 if (! cds->HasMatch()) {
7395 cds->AssignOverlapMatch(mrna_map, *
m_Scope);
7400 int num_matched_cds = 0;
7401 int num_unmatched_cds = 0;
7402 for (
auto&& cds : cds_list) {
7410 if (cds->IsPseudo() ||
7411 (cds->GetSeqfeat().IsSetExcept() &&
7412 cds->GetSeqfeat().IsSetExcept_text() &&
7413 NStr::Find(cds->GetSeqfeat().GetExcept_text(),
"rearrangement required for product") != string::npos)) {
7414 cds->NeedsMatch(
false);
7418 if (cds->HasMatch()) {
7421 ++num_unmatched_cds;
7427 if (num_unmatched_cds > 0 &&
7429 if (num_unmatched_cds >= 10) {
7430 const auto nmcds = num_matched_cds + num_unmatched_cds;
7434 +
" CDSs unmatched",
7437 for (
const auto& cds : cds_list) {
7438 if (! cds->HasMatch() && cds->NeedsMatch()) {
7440 "Unmatched CDS", cds->GetSeqfeat());
7447 size_t num_unmatched_mrna = 0;
7456 num_unmatched_mrna++;
7461 if (num_unmatched_mrna > 10) {
7469 "No CDS location match for 1 mRNA", it->second->GetSeqfeat());
7483 TFeatCount cds_count, mrna_count;
7487 TGeneList gene_labels, gene_locus_tags;
7496 const CSeq_feat& feat = it->GetOriginalFeature();
7501 if (cds_count.find(gene) == cds_count.end()) {
7502 cds_count[gene] = mrna_count[gene] = 0;
7518 ITERATE (TFeatCount, it, cds_count) {
7520 mrna_num = mrna_count[it->first];
7521 if (cds_num > 0 && mrna_num > 1 && cds_num != mrna_num) {
7525 ") count for gene", *it->first);
7552 const size_t num_cds = cd_region_feats.size();
7562 strand = cd_region_feats.back().GetLocation().GetStrand();
7565 bool is_mrna =
false;
7581 "CDS should not be on minus strand of mRNA molecule", cdregion_it->GetOriginalFeature());
7586 if (is_mrna || (num_cds == 1 && num_gene < 2)) {
7596 bool first_cds =
true;
7599 vector<CCacheImpl::SFeatKey> featKeys;
7606 featKeys.push_back(multi_feat_key_template);
7608 featKeys.push_back(multi_feat_key_template);
7610 featKeys.push_back(multi_feat_key_template);
7612 featKeys.push_back(multi_feat_key_template);
7626 "3'UTR is not on minus strand", cug_it->GetOriginalFeature());
7627 }
else if (utr5_right > 0 && utr5_right + 1 != this_left) {
7629 "Previous 5'UTR does not abut next 3'UTR", cug_it->GetOriginalFeature());
7631 utr3_right = this_right;
7633 if (utr3_right > 0 && utr3_right + 1 != this_left) {
7635 "CDS does not abut 3'UTR", cug_it->GetOriginalFeature());
7638 cds_right = this_right;
7642 "5'UTR is not on minus strand", cug_it->GetOriginalFeature());
7643 }
else if (cds_right > 0 && cds_right + 1 != this_left) {
7645 "5'UTR does not abut CDS", cug_it->GetOriginalFeature());
7647 utr5_right = this_right;
7659 "5'UTR is not on plus strand", cug_it->GetOriginalFeature());
7660 }
else if (utr3_right > 0 && utr3_right + 1 != this_left) {
7662 "Previous 3'UTR does not abut next 5'UTR", cug_it->GetOriginalFeature());
7664 utr5_right = this_right;
7666 if (utr5_right > 0 && utr5_right + 1 != this_left && first_cds ) {
7669 "5'UTR does not abut CDS", cug_it->GetOriginalFeature());
7672 cds_right = this_right;
7676 "3'UTR is not on plus strand", cug_it->GetOriginalFeature());
7677 }
else if (cds_right > 0 && cds_right + 1 != this_left && num_3utr == 1) {
7679 "CDS does not abut 3'UTR", cug_it->GetOriginalFeature());
7681 if (is_mrna && num_cds == 1 && num_3utr == 1 && this_right != (
int) seq.
GetBioseqLength() - 1) {
7683 "3'UTR does not extend to end of mRNA", cug_it->GetOriginalFeature());
7709 if (!
rna.IsSetType()) {
7711 }
else if (!
rna.IsSetExt()) {
7714 const string& product =
rna.GetExt().GetName();
7738 if (
rna.GetExt().IsName()) {
7739 product =
rna.GetExt().GetName();
7744 product = (*it)->GetVal();
7749 }
else if (
rna.GetExt().IsGen()) {
7750 if (
rna.GetExt().GetGen().IsSetProduct()) {
7751 product =
rna.GetExt().GetGen().GetProduct();
7781 if (start < 0 || (
unsigned int) stop >= seq.
GetInst_Length() || start > stop) {
7788 if ((*it)->IsLiteral()) {
7789 this_len = (*it)->GetLiteral().GetLength();
7790 }
else if ((*it)->IsLoc()) {
7793 if ((*it)->IsLiteral() &&
7794 (! (*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
7875 "Inconsistent strands for rRNA components",
7881 }
else if (right1 + 1 < left2) {
7888 "ITS does not abut adjacent rRNA component",
7894 "ITS does not abut adjacent rRNA component",
7898 }
else if (right1 + 1 > left2) {
7904 "ITS overlaps adjacent rRNA component",
7914 "ITS overlaps adjacent rRNA component",
7922 }
else if (! is_organelle) {
7933 "Problem with order of abutting rRNA components",
7945 "Problem with order of abutting rRNA components",
7970 if (! same_annot && ! same_label) {
7991 if (g1 && g2 && g1 != g2) {
7999 &&
prev.IsSetPartial() &&
prev.GetPartial()) {
8008 &&
prev.IsSetPseudo() &&
prev.GetPseudo()) {
8059 "Duplicate feature", feat2);
8069 "Features have identical intervals, but labels differ",
8077 "Duplicate feature (packaged in different feature table)",
8084 "Features have identical intervals, but labels "
8085 "differ (packaged in different feature table)",
8120 string msg =
"Signal, Transit, or Mature peptide features overlap";
8132 cds_loc = cds_loc.substr(8);
8139 cds_loc =
" (parent CDS is on " + cds_loc +
")";
8143 }
catch (
const exception&) {
8146 if (! reported_last_peptide) {
8156 reported_last_peptide =
true;
8158 reported_last_peptide =
false;
8172 bool fruit_fly =
false;
8199 CCacheImpl::TFeatValue::const_iterator curr_it = prev_it;
8207 if (curr_start > prev_end) {
8216 CCacheImpl::TFeatValue::const_iterator prev_prot =
m_AllFeatIt->begin();
8218 CCacheImpl::TFeatValue::const_iterator curr_prot = prev_prot;
8220 bool reported_last_peptide =
false;
8221 for (; curr_prot !=
m_AllFeatIt->end(); ++prev_prot, ++curr_prot) {
8225 }
catch (
const exception& e) {
8226 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
8228 string(
"Exception while validating duplicate/overlapping features. EXCEPTION: ") +
8236 vector<int> intervalpoints;
8243 intervalpoints.push_back(ivl.
GetFrom());
8244 intervalpoints.push_back(ivl.
GetTo());
8248 intervalpoints.push_back(ivl.
GetFrom());
8249 intervalpoints.push_back(ivl.
GetTo());
8253 }
else if (part.
IsPnt()) {
8255 intervalpoints.push_back(pnt.
GetPoint());
8256 intervalpoints.push_back(pnt.
GetPoint());
8261 return intervalpoints;
8269 while (feat_ci_dup) {
8276 const CSeq_loc& part = curr.GetEmbeddingSeq_loc();
8281 }
else if (part.
IsPnt()) {
8288 if (start + 1 == fr && stop - 1 == to) {
8291 if (start + 1 == fr && to ==
max) {
8312 for (
CFeat_CI feat_ci(bsh, sel); feat_ci; ++feat_ci) {
8314 const CSeq_feat& const_feat = feat_ci->GetOriginalFeature();
8319 unsigned len = (unsigned)intervalpoints.size();
8325 bool twintron =
true;
8327 for (
unsigned pos = 1; pos <
max; pos += 2) {
8328 Int4 intL = intervalpoints[pos];
8329 Int4 intR = intervalpoints[pos + 1];
8344 "Multi-interval intron contains possible twintron",
8348 "An intron should not have multiple intervals",
8353 if (
NStr::Find(e.
what(),
"Error: Cannot resolve") == string::npos) {
8365 if ((*db)->CanGetDb()) {
8378 bool has_local =
false, has_genbank =
false;
8379 bool has_gi =
false, has_tpa =
false, has_bankit =
false, has_smart =
false;
8382 switch ((*it)->Which()) {
8403 if ((*it)->GetGeneral().IsSetDb()) {
8416 if (has_genbank)
return false;
8417 if (has_tpa)
return true;
8418 if (has_refseq)
return false;
8419 if (has_bankit)
return true;
8420 if (has_smart)
return true;
8421 if (has_gi)
return false;
8422 if (has_local)
return true;
8431 out_date_str = date.
GetStr();
8432 }
else if (date.
IsStd()) {
8434 date.
GetDate(&out_date_str,
"%{%3N %{%D, %}%}%Y");
8454 vector<string> sc_prefixes;
8462 sc_prefixes.push_back(
prefix);
8468 sort(sc_prefixes.begin(), sc_prefixes.end());
8471 ITERATE(vector<string>, it, sc_prefixes) {
8477 "Multiple structured comments with prefix " + previous,
8486 "Multiple structured comments with prefix " + previous,
8577 bool is_master =
false;
8592 switch (sid.
Which()) {
8619 bool embl_or_ddbj =
false;
8621 if ((*id)->IsEmbl() || (*id)->IsDdbj()) {
8622 embl_or_ddbj =
true;
8627 return embl_or_ddbj;
8634 if ((*id)->IsGenbank()) {
8645 if ((*id)->IsOther()) {
8662 "Undesired multiple comment descriptors, identical text",
8687 if ((*id_it)->IsOther() && (*id_it)->GetOther().IsSetAccession()) {
8688 string accession = (*id_it)->GetOther().GetAccession();
8698 if (! is_nc && ! is_ac) {
8733 int tech = -1, completeness = -1;
8739 bool is_genome_assembly =
false;
8740 bool is_assembly =
false;
8741 bool is_finished_status =
false;
8750 switch (desc.
Which()) {
8754 if (pos != string::npos) {
8757 if (pos != string::npos) {
8760 if (pos != string::npos) {
8761 bool report_fasta_brackets =
true;
8763 if ((*id_it)->IsGeneral()) {
8764 const CDbtag& dbtag = (*id_it)->GetGeneral();
8768 report_fasta_brackets =
false;
8774 if (report_fasta_brackets) {
8776 const CBioSource& bsrc = (*bs_ref).GetSource();
8782 if (pos2 != string::npos) {
8784 if (pos2 != string::npos) {
8785 report_fasta_brackets =
false;
8792 if (report_fasta_brackets) {
8794 "Title may have unparsed [...=...] construct",
8805 vector<string> keywords;
8808 keywords.push_back(*
key);
8815 switch (desc.
Which()) {
8852 if (! use_ctx || ! use_ctx->
IsSet()
8858 "Inconsistent create_dates [" + current_str +
8859 "] and [" + create_str +
"]", *use_ctx, desc);
8862 create_desc = &desc;
8876 update_desc = &desc;
8918 "Non-TPA record " + id_str +
" should not have TpaAssembly object", seq);
8924 "RefGeneTracking object should only be in RefSeq record",
8943 "Structured Comment is non-compliant, keyword should be removed",
ctx, desc);
8948 if ((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {
8949 if (
NStr::EqualNocase((*field)->GetLabel().GetStr(),
"StructuredCommentPrefix")) {
8950 const string&
prefix = (*field)->GetData().GetStr();
8952 is_genome_assembly =
true;
8956 }
else if (
NStr::EqualNocase((*field)->GetLabel().GetStr(),
"Current Finishing Status")) {
8957 const string&
prefix = (*field)->GetData().GetStr();
8959 is_finished_status =
true;
8998 if ((*id_it)->IsOther()) {
9025 title = title.substr (11);
9030 "RefSeq nucleotide title does not start with organism name",
9033 }
else if (seq.
IsAa()) {
9034 taxname =
"[" + taxname +
"]";
9038 "RefSeq protein title does not end with organism name",
9056 "Undesired multiple name descriptors, identical text",
9060 "Undesired multiple name descriptors, different text",
9072 "Nucleic acid with protein sequence method",
9085 const string&
buf = seq.
GetId().front()->AsFastaString();
9100 "Multiple GenBank blocks",
ctx, *last_gb);
9105 "Multiple EMBL blocks",
ctx, *last_embl);
9110 "Multiple PIR blocks",
ctx, *last_pir);
9115 "Multiple PDB blocks",
ctx, *last_pdb);
9120 "Multiple PRF blocks",
ctx, *last_prf);
9125 "Multiple SWISS-PROT blocks",
ctx, *last_sp);
9143 bool has_tpa_inf =
false, has_tpa_exp =
false;
9151 if (has_tpa_inf && has_tpa_exp) {
9153 "TPA:experimental and TPA:inferential should not both be in the same set of keywords",
9164 "TSA sequence should not be DNA", seq);
9175 int& last_completeness,
9181 bool is_synthetic_construct =
false;
9182 bool is_artificial =
false;
9187 if (! is_synthetic_construct) {
9190 if (! is_artificial) {
9200 if (seq_biomol < 0) {
9201 seq_biomol = biomol;
9208 "Nucleic acid with Molinfo = peptide",
ctx, desc);
9213 if (! is_artificial) {
9215 "Molinfo-biomol = other genetic",
ctx, desc);
9224 "Molinfo-biomol other used",
ctx, desc);
9234 "] used on protein",
ctx, desc);
9236 if (biomol != seq_biomol) {
9238 "Inconsistent Molinfo-biomol [" +
9251 "mRNA should be single stranded not double stranded",
ctx, desc);
9254 if (is_synthetic_construct && ! seq.
IsAa()) {
9272 "Nucleic acid with protein sequence method",
ctx, desc);
9293 "Protein with nucleic acid sequence method",
ctx, desc);
9319 "HTGS/STS/GSS/WGS sequence should be genomic", seq);
9324 "HTGS/STS/GSS/WGS sequence should not be RNA", seq);
9332 "EST sequence should be mRNA", seq);
9344 bool has_draft =
false;
9345 bool has_prefin =
false;
9346 bool has_activefin =
false;
9347 bool has_fulltop =
false;
9357 has_activefin =
true;
9368 "HTGS 3 sequence should not have HTGS_DRAFT keyword", seq);
9372 "HTGS 3 sequence should not have HTGS_PREFIN keyword", seq);
9374 if (has_activefin) {
9376 "HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword", seq);
9380 "HTGS 3 sequence should not have HTGS_FULLTOP keyword", seq);
9384 if (last_tech > 0) {
9385 if (last_tech != tech) {
9394 if (last_tech > -1) {
9395 if (last_tech != 0) {
9398 +
"] and [0]",
ctx, desc);
9406 if (last_completeness > 0) {
9416 if (last_completeness > -1) {
9417 if (last_completeness != 0) {
9420 +
"] and [0]",
ctx, desc);
9423 last_completeness = 0;
9428 if (closest_molinfo) {
9482 const string& lineage,
9483 const string& stranded_mol,
9503 "dsRNA virus should be genomic RNA",
9522 mssg =
"single-stranded RNA";
9524 mssg =
"double-stranded RNA";
9526 mssg =
"single-stranded DNA";
9528 mssg =
"double-stranded DNA";
9530 mssg =
"unknown-stranded RNA";
9532 mssg =
"unknown-stranded DNA";
9536 "Taxonomy indicates " + mssg +
9538 ") is conflicting.",
9545 const string& lineage,
9546 const string& stranded_mol,
9566 "Ambisense virus should be genomic RNA or cRNA",
9575 "Retrovirus should be genomic RNA or genomic DNA",
9581 bool negative_strand_virus =
false;
9582 bool plus_strand_virus =
false;
9584 negative_strand_virus =
true;
9587 plus_strand_virus =
true;
9589 if (! negative_strand_virus && ! plus_strand_virus) {
9593 bool is_synthetic =
false;
9595 is_synthetic =
true;
9596 }
else if (
source.IsSetOrigin()) {
9601 is_synthetic =
true;
9605 bool has_cds =
false;
9606 bool has_plus_cds =
false;
9607 bool has_minus_cds =
false;
9613 has_minus_cds =
true;
9615 has_plus_cds =
true;
9617 if (has_minus_cds && has_plus_cds) {
9624 bool has_minus_misc_feat =
false;
9625 bool has_plus_misc_feat =
false;
9633 has_minus_misc_feat =
true;
9635 has_plus_misc_feat =
true;
9638 if (has_minus_misc_feat && has_plus_misc_feat) {
9645 if (negative_strand_virus) {
9647 if (has_minus_cds) {
9650 "Negative-sense single-stranded RNA virus with minus strand CDS should be genomic RNA",
9655 if (has_plus_cds && ! is_synthetic && ! is_ambisense) {
9658 "Negative-sense single-stranded RNA virus with plus strand CDS should be cRNA",
9663 if (has_minus_misc_feat) {
9666 "Negative-sense single-stranded RNA virus with nonfunctional minus strand misc_feature should be genomic RNA",
9671 if (has_plus_misc_feat && ! is_synthetic && ! is_ambisense) {
9674 "Negative-sense single-stranded RNA virus with nonfunctional plus strand misc_feature should be cRNA",
9680 if (plus_strand_virus) {
9682 if (has_minus_cds) {
9684 "CDS should not be on minus strand of a positive-sense single-stranded RNA virus",
9688 if (! is_synthetic && ! is_ambisense) {
9691 "Positive-sense single-stranded RNA virus should be genomic RNA",
9702 {
"Alphasatellitidae",
"ssDNA"},
9703 {
"Anelloviridae",
"ssDNA(-)"},
9704 {
"Bacilladnaviridae",
"ssDNA"},
9705 {
"Bidnaviridae",
"ssDNA"},
9706 {
"Circoviridae",
"ssDNA(+/-)"},
9707 {
"Geminiviridae",
"ssDNA(+/-)"},
9708 {
"Genomoviridae",
"ssDNA"},
9709 {
"Hepadnaviridae",
"dsDNA-RT"},
9710 {
"Inoviridae",
"ssDNA(+)"},
9711 {
"Microviridae",
"ssDNA(+)"},
9712 {
"Nanoviridae",
"ssDNA(+)"},
9713 {
"Ortervirales",
"ssRNA-RT"},
9714 {
"Caulimoviridae",
"dsDNA-RT"},
9715 {
"Parvoviridae",
"ssDNA(+/-)"},
9716 {
"Alphapleolipovirus",
"dsDNA; ssDNA"},
9717 {
"Riboviria",
"RNA"},
9718 {
"Albetovirus",
"ssRNA(+)"},
9719 {
"Alphatetraviridae",
"ssRNA(+)"},
9720 {
"Alvernaviridae",
"ssRNA(+)"},
9721 {
"Amalgaviridae",
"dsRNA"},
9722 {
"Astroviridae",
"ssRNA(+)"},
9723 {
"Aumaivirus",
"ssRNA(+)"},
9724 {
"Avsunviroidae",
"ssRNA"},
9725 {
"Barnaviridae",
"ssRNA(+)"},
9726 {
"Benyviridae",
"ssRNA(+)"},
9727 {
"Birnaviridae",
"dsRNA"},
9728 {
"Botourmiaviridae",
"ssRNA(+)"},
9729 {
"Botybirnavirus",
"dsRNA"},
9730 {
"Bromoviridae",
"ssRNA(+)"},
9731 {
"Caliciviridae",
"ssRNA(+)"},
9732 {
"Carmotetraviridae",
"ssRNA(+)"},
9733 {
"Chrysoviridae",
"dsRNA"},
9734 {
"Closteroviridae",
"ssRNA(+)"},
9735 {
"Cystoviridae",
"dsRNA"},
9736 {
"Deltavirus",
"ssRNA(-)"},
9737 {
"dsRNA viruses",
"dsRNA"},
9738 {
"Endornaviridae",
"dsRNA"},
9739 {
"Flaviviridae",
"ssRNA(+)"},
9740 {
"Hepeviridae",
"ssRNA(+)"},
9741 {
"Hypoviridae",
"ssRNA(+)"},
9742 {
"Idaeovirus",
"ssRNA(+)"},
9743 {
"Kitaviridae",
"ssRNA(+)"},
9744 {
"Leviviridae",
"ssRNA(+)"},
9745 {
"Luteoviridae",
"ssRNA(+)"},
9746 {
"Matonaviridae",
"ssRNA(+)"},
9747 {
"Megabirnaviridae",
"dsRNA"},
9748 {
"Narnaviridae",
"ssRNA(+)"},
9749 {
"Haploviricotina",
"ssRNA(-)"},
9750 {
"Arenaviridae",
"ssRNA(+/-)"},
9751 {
"Coguvirus",
"ssRNA(-)"},
9752 {
"Cruliviridae",
"ssRNA(-)"},
9753 {
"Fimoviridae",
"ssRNA(-)"},
9754 {
"Hantaviridae",
"ssRNA(-)"},
9755 {
"Leishbuviridae",
"ssRNA(-)"},
9756 {
"Mypoviridae",
"ssRNA(-)"},
9757 {
"Nairoviridae",
"ssRNA(-)"},
9758 {
"Peribunyaviridae",
"ssRNA(-)"},
9759 {
"Phasmaviridae",
"ssRNA(-)"},
9760 {
"Banyangvirus",
"ssRNA(+/-)"},
9761 {
"Beidivirus",
"ssRNA(-)"},
9762 {
"Goukovirus",
"ssRNA(-)"},
9763 {
"Horwuvirus",
"ssRNA(-)"},
9764 {
"Hudivirus",
"ssRNA(-)"},
9765 {
"Hudovirus",
"ssRNA(-)"},
9766 {
"Kabutovirus",
"ssRNA(-)"},
9767 {
"Laulavirus",
"ssRNA(-)"},
9768 {
"Mobuvirus",
"ssRNA(-)"},
9769 {
"Phasivirus",
"ssRNA(-)"},
9770 {
"Phlebovirus",
"ssRNA(+/-)"},
9771 {
"Pidchovirus",
"ssRNA(-)"},
9772 {
"Tenuivirus",
"ssRNA(-)"},
9773 {
"Wenrivirus",
"ssRNA(-)"},
9774 {
"Wubeivirus",
"ssRNA(-)"},
9775 {
"Tospoviridae",
"ssRNA(+/-)"},
9776 {
"Wupedeviridae",
"ssRNA(-)"},
9777 {
"Insthoviricetes",
"ssRNA(-)"},
9778 {
"Nidovirales",
"ssRNA(+)"},
9779 {
"Nodaviridae",
"ssRNA(+)"},
9780 {
"Papanivirus",
"ssRNA(+)"},
9781 {
"Partitiviridae",
"dsRNA"},
9782 {
"Permutotetraviridae",
"ssRNA(+)"},
9783 {
"Picobirnaviridae",
"dsRNA"},
9784 {
"Picornavirales",
"ssRNA(+)"},
9785 {
"Pospiviroidae",
"ssRNA"},
9786 {
"Potyviridae",
"ssRNA(+)"},
9787 {
"Quadriviridae",
"dsRNA"},
9788 {
"Reoviridae",
"dsRNA"},
9789 {
"Sarthroviridae",
"ssRNA(+)"},
9790 {
"Sinaivirus",
"ssRNA(+)"},
9791 {
"Solemoviridae",
"ssRNA(+)"},
9792 {
"Solinviviridae",
"ssRNA(+)"},
9793 {
"Togaviridae",
"ssRNA(+)"},
9794 {
"Tombusviridae",
"ssRNA(+)"},
9795 {
"Totiviridae",
"dsRNA"},
9796 {
"Tymovirales",
"ssRNA(+)"},
9797 {
"Virgaviridae",
"ssRNA(+)"},
9798 {
"Virtovirus",
"ssRNA(+)"},
9799 {
"ssRNA viruses",
"ssRNA"},
9800 {
"unclassified ssRNA viruses",
"ssRNA"},
9801 {
"unclassified ssRNA negative-strand viruses",
"ssRNA(-)"},
9802 {
"unclassified ssRNA positive-strand viruses",
"ssRNA(+)"},
9803 {
"unclassified viroids",
"ssRNA"},
9804 {
"DNA satellites",
"DNA"},
9805 {
"RNA satellites",
"RNA"},
9806 {
"Smacoviridae",
"ssDNA"},
9807 {
"Spiraviridae",
"ssDNA(+)"},
9808 {
"Tolecusatellitidae",
"ssDNA"},
9809 {
"unclassified viruses",
"unknown"},
9810 {
"unclassified DNA viruses",
"DNA"},
9811 {
"unclassified archaeal dsDNA viruses",
"dsDNA"},
9812 {
"unclassified dsDNA phages",
"dsDNA"},
9813 {
"unclassified dsDNA viruses",
"dsDNA"},
9814 {
"unclassified ssDNA bacterial viruses",
"ssDNA"},
9815 {
"unclassified ssDNA viruses",
"ssDNA"},
9816 {
"environmental samples",
"unknown"},
9830 for (
auto it : moltypes) {
9832 if (it->GetIval2() == 1) {
9833 viral_map [sName] = it->GetSval();
9857 return "ssRNA(+/-)";
9862 return "ssRNA(+/-)";
9867 return "ssRNA(+/-)";
9872 return "ssRNA(+/-)";
9885 if (s_ViralMap.empty()) {
9886 for (
const auto& x : kViralStrandMap) {
9892 for (
const auto& x : s_ViralMap) {
9907 if (new_mod != old_mod) {
9922 int last_na_mod = -1;
9923 int last_organelle = -1;
9924 int last_partialness = -1;
9925 int last_left_right = -1;
9930 CSeqdesc::TModif::const_iterator it = modif.begin();
9931 while (it != modif.end()) {
9969 last_left_right = modval;
9985 int last_na_mol = 0;
9993 "Nucleic acid with GIBB-mol = peptide",
10000 "GIBB-mol unknown or other used",
10010 if (last_na_mol != modval) {
10017 last_na_mol = modval;
10033 if (
source.CanGetOrigin() &&
10037 if (
source.CanGetOrg() &&
source.GetOrg().CanGetOrgname()) {
10058 const CRNA_ref& rna_ref =
fi->GetData().GetRna();
10070 const CDate& update,
10071 const CDate& create,
10082 string err_msg =
"Inconsistent create_date [";
10083 err_msg += create_str;
10084 err_msg +=
"] and update_date [";
10085 err_msg += update_str;
10090 err_msg, *
ctx, desc);
10103 bool is_wp =
false;
10105 const CSeq_id& sid = **sid_itr;
10110 if (acc ==
"WP_") {
10118 "Inconsistent organism names [" + this_org.
GetTaxname() +
10149 const string&
type)
10155 bool reported_first =
false;
10156 bool lastIsSplit =
false;
10157 const string* strp =
nullptr;
10162 strp = &(it->first);
10172 message =
"Colliding " +
type +
" in gene features";
10174 message =
"Colliding " +
type +
" (with different capitalization) in gene features";
10180 bool suppress_message =
false;
10184 it->second->IsSetExcept() && it->second->IsSetExcept_text()
10185 &&
NStr::FindNoCase(it->second->GetExcept_text(),
"trans-splicing") != string::npos) {
10187 suppress_message =
true;
10191 if (suppress_message) {
10194 (*it->second).GetLocation(),
10198 message +
", but feature locations are identical", *it->second);
10199 }
else if (! is_gene_locus) {
10205 if (! suppress_message && ((! isSplit) || (! lastIsSplit))) {
10206 if (! reported_first) {
10209 reported_first =
true;
10217 strp = &(it->first);
10238 const CSeq_feat& feat =
fi->GetOriginalFeature();
10264 if (gene_it != locus_map.
end()) {
10265 bool found =
false;
10274 "gene synonym has same value (" + syngene_it->first +
") as locus of another gene feature",
10275 *syngene_it->second);
10282 }
catch (
const exception& e) {
10283 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
10285 string(
"Exception while validating colliding genes. EXCEPTION: ") +
10294 if (! seq.
IsNa()) {
10299 bool embl_ddbj =
false;
10301 if ((*id)->IsDdbj() || (*id)->IsEmbl()) {
10312 bool complete_genome =
false;
10318 sequence::CDeflineGenerator defline_generator;
10319 title = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
10323 if (! complete_genome) {
10330 complete_genome =
true;
10337 if (! complete_genome) {
10343 if (!
si || !
si->GetSource().IsSetDivision() ||
si->GetSource().GetDivision() !=
"BCT") {
10348 bool bioproject_accession_set =
false;
10351 if (ui->GetUser().IsSetData() && ui->GetUser().IsSetType() && ui->GetUser().GetType().IsStr() &&
NStr::EqualCase(ui->GetUser().GetType().GetStr(),
"DBLink")) {
10352 bioproject_accession_set = ! ui->GetUser().GetData().empty();
10357 if (bioproject_accession_set)
10361 bool no_gaps =
true;
10365 if (
delta.IsSet()) {
10369 if ((*part)->IsLiteral()) {
10396 "No BioProject Accession exists for what appears to be a complete genome",
10404 const CSeq_id* gb_id =
nullptr;
10409 const CDbtag* general_id =
nullptr;
10412 switch ((*id)->Which()) {
10414 gb_id =
id->GetPointer();
10418 gi = (*id)->GetGi();
10422 general_id = &((*id)->GetGeneral());
10430 if (gi ==
ZERO_GI && gb_id) {
10439 if (! id_set.empty()) {
10441 switch ((*id).Which()) {
10444 db_gb_id->
Assign(*(id->GetSeqId()));
10447 db_gi = (*id).GetGi();
10451 db_general_id->
Assign(*((*id).GetSeqId()));
10462 "New gi number (" + gi_str +
")" +
10466 if (gb_id && db_gb_id) {
10467 if (! gb_id->
Match(*db_gb_id)) {
10470 ") does not match one in NCBI sequence repository (" + db_gb_id->
AsFastaString() +
10471 ") on gi (" + gi_str +
")", seq);
10473 }
else if (gb_id) {
10475 "Gain of accession (" + gb_id->
AsFastaString() +
") on gi (" +
10476 gi_str +
") compared to the NCBI sequence repository", seq);
10477 }
else if (db_gb_id) {
10480 ") on gi (" + gi_str +
") compared to the NCBI sequence repository", seq);
10483 string new_gen_label, old_gen_label;
10484 if (general_id && db_general_id) {
10487 general_id->
GetLabel(&new_gen_label);
10489 "New general ID (" + new_gen_label +
10490 ") does not match one in NCBI sequence repository (" + old_gen_label +
10491 ") on gi (" + gi_str +
")", seq);
10493 }
else if (general_id) {
10494 general_id->
GetLabel(&new_gen_label);
10496 "Gain of general ID (" + new_gen_label +
") on gi (" +
10497 gi_str +
") compared to the NCBI sequence repository", seq);
10498 }
else if (db_general_id) {
10501 "Loss of general ID (" + old_gen_label +
") on gi (" +
10502 gi_str +
") compared to the NCBI sequence repository", seq);
10592 if ((*iter)->IsLoc()) {
10658 ENa_strand strand =
f->GetLocation().GetStrand();
10659 if (
f->GetData().IsCdregion()) {
10661 cds_minus =
f->GetSeq_feat();
10663 cds_plus =
f->GetSeq_feat();
10667 utr3_minus =
f->GetSeq_feat();
10669 utr3_plus =
f->GetSeq_feat();
10670 if (! cds_plus && utr5_plus &&
x_ReportUTRPair(*utr5_plus, *utr3_plus)) {
10672 "CDS not between 5'UTR and 3'UTR on plus strand", *utr3_plus);
10680 utr5_minus =
f->GetSeq_feat();
10681 if (! cds_minus && utr3_minus &&
x_ReportUTRPair(*utr5_minus, *utr3_minus)) {
10683 "CDS not between 5'UTR and 3'UTR on minus strand", *utr5_minus);
10685 utr5_minus.
Reset();
10687 utr3_minus.
Reset();
10689 utr5_plus =
f->GetSeq_feat();
10697 CValidError_bioseq::CmRNACDSIndex::CmRNACDSIndex()
10702 CValidError_bioseq::CmRNACDSIndex::~CmRNACDSIndex()
10714 bool match =
false;
10784 if (
id.GetGi() == gi) {
10791 }
catch (
const std::exception&) {
static CRef< CScope > m_Scope
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldNotBeRNA
@ eErr_SEQ_INST_BadDeltaSeq
@ eErr_SEQ_DESCR_InconsistentBioSources_ConLocation
@ eErr_SEQ_FEAT_mRNAgeneRange
@ eErr_SEQ_DESCR_FinishedStatusForWGS
@ eErr_SEQ_DESCR_InconsistentTaxName
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_FEAT_TRNAinsideTMRNA
@ eErr_SEQ_INST_CompleteGenomeHasGaps
@ eErr_SEQ_INST_BadSeqIdCharacter
@ eErr_SEQ_INST_CompleteTitleProblem
@ eErr_SEQ_INST_HistoryGiCollision
@ eErr_SEQ_DESCR_UnwantedCompleteFlag
@ eErr_SEQ_INST_mRNAshouldBeSingleStranded
@ eErr_SEQ_FEAT_MultipleGenCodes
@ eErr_SEQ_DESCR_DBLinkBadAssembly
@ eErr_SEQ_DESCR_WGSmasterLacksBioProject
@ eErr_SEQ_INST_HighNContentStretch
@ eErr_SEQ_INST_HighNcontent3Prime
@ eErr_SEQ_INST_TerminalGap
@ eErr_SEQ_INST_MultipleAccessions
@ eErr_SEQ_DESCR_MultipleDBLinkObjects
@ eErr_SEQ_INST_BadProteinStart
@ eErr_SEQ_FEAT_PartialProblem3Prime
@ eErr_SEQ_FEAT_ProductShouldBeWhole
@ eErr_SEQ_INST_ProteinShouldNotHaveGaps
@ eErr_SEQ_INST_ESTshouldBemRNA
@ eErr_SEQ_DESCR_BadKeywordUnverified
@ eErr_SEQ_FEAT_ITSdoesNotAbutRRNA
@ eErr_SEQ_DESCR_InvalidMolInfo
@ eErr_SEQ_DESCR_InconsistentMolInfoTechnique
@ eErr_SEQ_DESCR_NoOrganismInTitle
@ eErr_SEQ_DESCR_InconsistentMolInfo
@ eErr_SEQ_INST_TSAMasterLacksStrucComm
@ eErr_SEQ_INST_WholeComponent
@ eErr_SEQ_FEAT_BadRRNAcomponentOrder
@ eErr_SEQ_INST_ReprInvalid
@ eErr_SEQ_INST_TSAseqGapProblem
@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldBeGenomic
@ eErr_SEQ_INST_SeqLitDataLength0
@ eErr_SEQ_INST_CircBactGenomeProblem
@ eErr_SEQ_INST_WGSMasterLacksStrucComm
@ eErr_SEQ_INST_ContigsTooShort
@ eErr_SEQ_DESCR_NoMolInfoFound
@ eErr_SEQ_PKG_OrphanedProtein
@ eErr_SEQ_INST_SeqGapBadLinkage
@ eErr_SEQ_INST_SelfReferentialSequence
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_INST_DeltaComponentIsGi0
@ eErr_SEQ_FEAT_CDSmRNANotMatched
@ eErr_SEQ_FEAT_FeatContentDup
@ eErr_SEQ_INST_MolNotSet
@ eErr_SEQ_DESCR_WGSMasterLacksBothBioSampleBioProject
@ eErr_SEQ_INST_GiWithoutAccession
@ eErr_SEQ_INST_MissingGaps
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapRRNA
@ eErr_SEQ_DESCR_FastaBracketTitle
@ eErr_SEQ_FEAT_MisMatchAA
@ eErr_SEQ_INST_StopInProtein
@ eErr_SEQ_INST_UnknownLengthGapNot100
@ eErr_SEQ_FEAT_MultipleProtRefs
@ eErr_SEQ_FEAT_MultipleEquivPublications
@ eErr_SEQ_DESCR_DBLinkProblem
@ eErr_SEQ_INST_InvalidLen
@ eErr_SEQ_DESCR_TPAassemblyWithoutTPAKeyword
@ eErr_SEQ_DESCR_InvalidForTypeGIBB
@ eErr_SEQ_FEAT_InvalidFeatureForProtein
@ eErr_SEQ_INST_HighNContentPercent
@ eErr_SEQ_DESCR_RefGeneTrackingOnNonRefSeq
@ eErr_SEQ_FEAT_IdenticalGeneSymbolAndSynonym
@ eErr_SEQ_FEAT_MultipleEquivBioSources
@ eErr_SEQ_INST_HighNcontent5Prime
@ eErr_SEQ_INST_TSAshouldBNotBeDNA
@ eErr_SEQ_DESCR_MissingChromosome
@ eErr_SEQ_INST_BadProteinMoltype
@ eErr_SEQ_DESCR_NucleotideTechniqueOnProtein
@ eErr_SEQ_INST_CompleteCircleProblem
@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs
@ eErr_SEQ_FEAT_CDSmRNAMismatchProteinIDs
@ eErr_SEQ_FEAT_CDSmRNAMismatchTranscriptIDs
@ eErr_SEQ_FEAT_PartialProblemOrganelle3Prime
@ eErr_SEQ_INST_OverlappingDeltaRange
@ eErr_SEQ_FEAT_OverlappingPeptideFeat
@ eErr_SEQ_DESCR_BadKeywordNoTechnique
@ eErr_SEQ_FEAT_ExtraProteinFeature
@ eErr_SEQ_INST_SeqLocLength
@ eErr_SEQ_INST_FarLocationExcludesFeatures
@ eErr_SEQ_DESCR_InconsistentVirusMoltype
@ eErr_SEQ_INST_IdOnMultipleBioseqs
@ eErr_SEQ_DESCR_MoltypeOtherGenetic
@ eErr_SEQ_INST_HighNpercent3Prime
@ eErr_SEQ_INST_BadSecondaryAccn
@ eErr_SEQ_INST_InvalidAlphabet
@ eErr_SEQ_FEAT_CDSonMinusStrandMRNA
@ eErr_SEQ_INST_MolNuclAcid
@ eErr_SEQ_DESCR_MoltypeOther
@ eErr_SEQ_DESCR_Inconsistent
@ eErr_SEQ_INST_ExtNotAllowed
@ eErr_SEQ_DESCR_InconsistentRefSeqMoltype
@ eErr_SEQ_FEAT_PartialProblem5Prime
@ eErr_SEQ_FEAT_CDSmRNAMismatchLocation
@ eErr_SEQ_INST_TrailingX
@ eErr_SEQ_DESCR_InconsistentDates
@ eErr_SEQ_INST_CircularProtein
@ eErr_SEQ_INST_NoIdOnBioseq
@ eErr_SEQ_INST_PartsOutOfOrder
@ eErr_SEQ_FEAT_BadFullLengthFeature
@ eErr_SEQ_DESCR_InconsistentGenBankblocks
@ eErr_SEQ_FEAT_FarLocation
@ eErr_SEQ_INST_MolinfoOther
@ eErr_SEQ_INST_BadSeqIdLength
@ eErr_SEQ_INST_SeqDataNotAllowed
@ eErr_SEQ_INST_BadHTGSeq
@ eErr_SEQ_FEAT_PartialProblemOrganelle5Prime
@ eErr_SEQ_DESCR_NoKeywordHasTechnique
@ eErr_SEQ_INST_UnexpectedIdentifierChange
@ eErr_SEQ_INST_WGSseqGapProblem
@ eErr_SEQ_DESCR_MultipleStrucComms
@ eErr_SEQ_FEAT_InconsistentRRNAstrands
@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus5Prime
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapAndOrder
@ eErr_SEQ_DESCR_DBLinkBadFormat
@ eErr_SEQ_FEAT_InvalidForType
@ eErr_SEQ_FEAT_GeneLocusCollidesWithLocusTag
@ eErr_SEQ_FEAT_CDSgeneRange
@ eErr_SEQ_INST_MitoMetazoanTooLong
@ eErr_SEQ_DESCR_CompleteGenomeLacksBioProject
@ eErr_SEQ_DESCR_CollidingPubMedID
@ eErr_SEQ_FEAT_DuplicateFeat
@ eErr_SEQ_INST_ExtBadOrMissing
@ eErr_SEQ_FEAT_FeatureProductInconsistency
@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType
@ eErr_SEQ_FEAT_DuplicateGeneConflictingLocusTag
@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource
@ eErr_SEQ_INST_InstantiatedGapMismatch
@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS
@ eErr_SEQ_INST_PartialInconsistent
@ eErr_SEQ_FEAT_CollidingLocusTags
@ eErr_SEQ_DESCR_MultipleNames
@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus3Prime
@ eErr_SEQ_INST_BadSeqIdFormat
@ eErr_SEQ_FEAT_NoCDSbetweenUTRs
@ eErr_SEQ_INST_ZeroGiNumber
@ eErr_INTERNAL_Exception
@ eErr_SEQ_INST_ConflictingIdsOnBioseq
@ eErr_SEQ_DESCR_WrongOrganismFor16SrRNA
@ eErr_SEQ_INST_HistAssemblyMissing
@ eErr_SEQ_PKG_NoCdRegionPtr
@ eErr_SEQ_INST_InternalNsInSeqRaw
@ eErr_SEQ_INST_TerminalNs
@ eErr_SEQ_FEAT_SeqFeatXrefProblem
@ eErr_SEQ_DESCR_BadKeywordForStrucComm
@ eErr_SEQ_FEAT_CDSdoesNotMatchVDJC
@ eErr_SEQ_DESCR_InconsistentMolType
@ eErr_SEQ_FEAT_CDSmRNAMissingProteinIDs
@ eErr_SEQ_DESCR_WGSmasterLacksBioSample
@ eErr_SEQ_FEAT_MultiIntervalIntron
@ eErr_SEQ_DESCR_InconsistentTPA
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_INST_HighNpercent5Prime
@ eErr_SEQ_DESCR_ScaffoldLacksBioProject
@ eErr_SEQ_INST_InternalNsAdjacentToGap
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_DESCR_MultipleComments
@ eErr_SEQ_INST_SeqDataNotFound
@ eErr_SEQ_INST_InternalGapsInSeqRaw
@ eErr_SEQ_FEAT_MultipleGeneOverlap
@ eErr_SEQ_INST_DuplicateSegmentReferences
@ eErr_SEQ_DESCR_InconsistentWGSFlags
@ eErr_SEQ_FEAT_CDSmRNAmismatchCount
@ eErr_SEQ_FEAT_UTRdoesNotExtendToEnd
@ eErr_SEQ_INST_SeqLitGapLength0
@ eErr_SEQ_INST_SeqIdNameHasSpace
@ eErr_SEQ_DESCR_ProteinTechniqueOnNucleotide
@ eErr_SEQ_DESCR_CollidingPublications
@ eErr_SEQ_FEAT_PartialProblemmRNASequence3Prime
@ eErr_SEQ_INST_InternalNsInSeqLit
@ eErr_SEQ_INST_SeqDataLenWrong
@ eErr_SEQ_INST_GapInProtein
@ eErr_SEQ_INST_SeqGapProblem
@ eErr_SEQ_INST_InvalidResidue
@ eErr_SEQ_FEAT_PartialProblemmRNASequence5Prime
@ eErr_SEQ_FEAT_InvalidFeatureForMRNA
@ eErr_SEQ_FEAT_CDSwithNoMRNA
@ eErr_GENERIC_DeltaSeqError
const string & GetLineage(void) const
const string & GetTaxname(void) const
bool IsSetLineage(void) const
bool IsSetTaxname(void) const
size_t IterateFeatures(Fnc m)
CSeq_entry * GetParentEntry(void) const
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
TSeqPos GetLength(void) const
void GetLabel(string *label, ELabelType type, bool worst=false) const
bool AssignMatch(TmRNAList &mrna_map, CFeatTree &feat_tree, CScope &scope)
bool Overlaps(const CSeq_feat &mrna) const
sequence::EOverlapType m_OverlapType
const CSeq_feat & GetSeqfeat() const
bool AssignXrefMatch(TmRNAList &unmatched_mrnas, const CTSE_Handle &tse)
CConstRef< CSeq_feat > m_Cds
bool AssignOverlapMatch(TmRNAList &unmatched_mrnas, CScope &scope)
CCdsMatchInfo(const CSeq_feat &cds, CScope *scope)
bool AreMrnaProductsUnique()
CRef< CMrnaMatchInfo > m_BestMatch
const CMrnaMatchInfo & GetMatch() const
void SetMatch(CRef< CMrnaMatchInfo > match)
list< CConstRef< CSeq_feat > > m_OtherMrnas
void UpdateOtherMrnas(const TmRNAList &unmatched_mrnas)
ECompare Compare(const CDate &date) const
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
@ eCompare_before
*this comes first.
@ eCompare_same
They're equivalent.
void GetLabel(string *label) const
bool Match(const CDbtag &dbt2) const
int Compare(const CDbtag &dbt2) const
CSeqFeatData::ESubtype GetSubtype(void) const
CRef< CFeatureIndex > GetBestParent(void)
CRef< CFeatureIndex > GetBestGene(void)
CSeq_feat_Handle GetSeqFeatHandle(void) const
const CMappedFeat GetMappedFeat(void) const
CConstRef< CSeq_loc > GetMappedLocation(void) const
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
static bool IsPseudo(const CSeq_feat &feat)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
void GetLabel(string *label) const
bool IsSuppressed(void) const
@Imp_feat.hpp User-defined methods of the data storage class.
CConstRef< CSeq_feat > m_Mrna
bool Overlaps(const CSeq_feat &cds) const
CMrnaMatchInfo(const CSeq_feat &mrna, CScope *scope)
void SetPseudo(bool val=true)
const CSeq_feat & GetSeqfeat() const
bool OkWithoutCds(bool isGenbank=false) const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Exceptions for objmgr/util library.
@OrgMod.hpp User-defined methods of the data storage class.
const string & GetLineage(void) const
bool IsSetLineage(void) const
@Pubdesc.hpp User-defined methods of the data storage class.
@RNA_ref.hpp User-defined methods of the data storage class.
CRef< CBioseqIndex > GetBioseqIndex(void)
ESubtype GetSubtype(void) const
@ eSubtype_transit_peptide_aa
@ eSubtype_sig_peptide_aa
@ eSubtype_mat_peptide_aa
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
static bool IsAa(EMol mol)
static string GetMoleculeClass(EMol mol)
static bool IsNa(EMol mol)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Base class for all serializable objects.
static bool NeedsNoText(const TSubtype &subtype)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from this TSE.
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
bool GetInheritedPropertyDefines(const string &prop_name, TInfoList &results_out, TTaxId subtree_root=TAX_ID_CONST(1))
bool GetScientificName(TTaxId tax_id, string &name_out)
list< CRef< CTaxon1_info > > TInfoList
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
bool IsRefGeneTracking() const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
bool IsStructuredComment() const
EObjectType GetObjectType() const
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
void ValidateSeqAnnotContext(const CSeq_annot &annot, const CBioseq &seq)
static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsPdb(const CBioseq &seq)
void ValidateUpdateDateContext(const CDate &update, const CDate &create, const CBioseq &seq, const CSeqdesc &desc)
void ValidateOrgContext(const COrg_ref &this_org, const COrg_ref &org, const CBioseq &seq, const CSeqdesc &desc)
CBioseq_Handle m_CurrentHandle
void ValidateInst(const CBioseq &seq)
static bool IsRefSeq(const CBioseq &seq)
void x_ValidateMultiplePubs(const CBioseq_Handle &bsh)
bool IsHistAssemblyMissing(const CBioseq &seq)
void ReportBadAssemblyGap(const CBioseq &seq)
static bool IsSelfReferential(const CBioseq &seq)
EDiagSev x_DupFeatSeverity(const CSeq_feat &curr, const CSeq_feat &prev, bool viral, bool htgs, bool same_annot, bool same_label)
CRef< CSeq_loc > GetLocFromSeq(const CBioseq &seq)
void x_ValidateBarcode(const CBioseq &seq)
void x_CompareStrings(const TStrFeatMap &str_feat_map, const string &type)
void x_CheckGeneralIDs(const CBioseq &seq)
void x_TranscriptIDsMatch(const string &protein_id, const CSeq_feat &cds)
static bool IsTSAAccession(const CSeq_id &id)
static bool IsEmblOrDdbj(const CBioseq &seq)
void x_CheckMrnaProteinLink(const CCdsMatchInfo &cds_match)
bool x_IsRangeGap(const CBioseq_Handle &seq, int start, int stop)
void ValidateBioseq(const CBioseq &seq)
void ValidateWGSMaster(CBioseq_Handle bsh)
CValidError_descr m_DescrValidator
void ValidateDeltaLoc(const CSeq_loc &loc, const CBioseq &seq, TSeqPos &len)
bool x_IsSameAsCDS(const CMappedFeat &feat)
void x_ValidateMolInfoForBioSource(const CBioSource &src, const CMolInfo &minfo, const CSeqdesc &desc)
void x_CheckForMultiplemRNAs(CCdsMatchInfo &cds_match, const TmRNAList &unmatched_mrnas)
void ValidateHistory(const CBioseq &seq)
void x_ValidateCompletness(const CBioseq &seq, const CMolInfo &mi)
bool SuppressTrailingXMsg(const CBioseq &seq)
void ValidateMolInfoContext(const CMolInfo &minfo, int &seq_biomol, int &tech, int &completeness, const CBioseq &seq, const CSeqdesc &desc)
bool x_HasCitSub(CBioseq_Handle bsh) const
static bool x_HasGap(const CBioseq &seq)
void ValidateSeqParts(const CBioseq &seq)
void x_ReportOverlappingPeptidePair(CSeq_feat_Handle f1, CSeq_feat_Handle f2, const CBioseq &bioseq, bool &reported_last_peptide)
void ValidateSegRef(const CBioseq &seq)
void x_CheckSingleStrandedRNAViruses(const CBioSource &source, const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, const CBioseq_Handle &bsh, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateSecondaryAccConflict(const string &primary_acc, const CBioseq &seq, int choice)
static bool IsWGSMaster(const CBioseq &seq, CScope &scope)
void x_ValidateTitle(const CBioseq &seq)
void ValidateMultipleGeneOverlap(const CBioseq_Handle &bsh)
void ValidateSeqFeatContext(const CBioseq &seq, bool is_complete)
void ValidateDelta(const CBioseq &seq)
static bool HasBadWGSGap(const CBioseq &seq)
static bool x_HasPGAPStructuredComment(CBioseq_Handle bsh)
bool m_report_missing_chromosome
CValidError_annot m_AnnotValidator
void ValidateTwintrons(const CBioseq &seq)
unsigned int x_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
void x_ValidateGeneCDSmRNACounts()
void x_ReportStartStopPartialProblem(int partial_type, bool at_splice_or_gap, bool abuts_n, const CSeq_feat &feat)
static size_t x_BadMetazoanMitochondrialLength(const CBioSource &src, const CSeq_inst &inst)
void ReportBadTSAGap(const CBioseq &seq)
void ValidateSeqGap(const CSeq_gap &gap, const CBioseq &seq)
void ValidateBadGeneOverlap(const CSeq_feat &feat)
bool x_IsPartialAtSpliceSiteOrGap(const CSeq_loc &loc, unsigned int tag, bool &bad_seq, bool &is_gap, bool &abuts_n)
void x_SetupCommonFlags(CBioseq_Handle bsh)
bool m_splicing_not_expected
bool x_IsDeltaLitOnly(const CSeq_inst &inst) const
void ValidateNsAndGaps(const CBioseq &seq)
void ValidateCompleteGenome(const CBioseq &seq)
bool x_IsMicroRNA() const
CValidError_bioseq(CValidError_imp &imp)
void ValidateRawConst(const CBioseq &seq)
void ValidateBioseqContext(const CBioseq &seq)
bool CdError(const CBioseq_Handle &bsh)
void x_ReportLineageConflictWithMol(const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, CSeq_inst::EMol mol, const CSerialObject &obj, const CSeq_entry *ctx)
bool ValidateRepr(const CSeq_inst &inst, const CBioseq &seq)
void ValidateFeatPartialInContext(const CMappedFeat &feat, bool is_complete)
void ValidateGBBlock(const CGB_block &gbblock, const CBioseq &seq, const CSeqdesc &desc)
bool IsMrna(const CBioseq_Handle &bsh)
void ReportBadWGSGap(const CBioseq &seq)
bool x_SuppressDicistronic(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly)
static bool IsWGSAccession(const CSeq_id &id)
void ValidateSeqLen(const CBioseq &seq)
bool x_PartialAdjacentToIntron(const CSeq_loc &loc)
void x_CheckOrigProteinAndTranscriptIds(const CCdsMatchInfo &cds_match)
size_t GetDataLen(const CSeq_inst &inst)
void CheckForPubOnBioseq(const CBioseq &seq)
void x_CalculateNsStretchAndTotal(const CSeqVector &seqvec, TSeqPos &num_ns, TSeqPos &max_stretch, bool &n5, bool &n3)
void CheckForMolinfoOnBioseq(const CBioseq &seq)
static bool IsAllNs(const CSeqVector &vec)
static string s_GetStrandedMolStringFromLineage(const string &lineage)
bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)
bool GraphsOnBioseq() const
void CheckTpaHistory(const CBioseq &seq)
static bool IsPartial(const CBioseq &seq, CScope &scope)
const CCacheImpl::TFeatValue * m_AllFeatIt
void x_ValidateCDSmRNAmatch(const CBioseq_Handle &seq)
void ReportModifInconsistentError(int new_mod, int &old_mod, const CSeqdesc &desc, const CSeq_entry &ctx)
static bool x_IgnoreEndGap(CBioseq_Handle bsh, CSeq_gap::TType gap_type)
static bool x_ParentAndComponentLocationsDiffer(CBioseq_Handle bsh, CBioSource::TGenome parent_location)
void x_ValidateCDSVDJCmatch(const CBioseq_Handle &seq)
bool x_ShowBioProjectWarning(const CBioseq &seq)
void CheckForMultipleStructuredComments(const CBioseq &seq)
void ValidateCollidingGenes(const CBioseq &seq)
bool x_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
static bool IsGenbank(const CBioseq &seq)
void x_ReportDuplicatePubLabels(const CBioseq &seq, const vector< CTempString > &labels)
void ValidateSeqIds(const CBioseq &seq)
void x_ReportInternalPartial(const CSeq_feat &feat)
void ValidateModifDescriptors(const CBioseq &seq)
void x_ReportSuspiciousUseOfComplete(const CBioseq &seq, EDiagSev sev)
CValidError_feat m_FeatValidator
static int PctNs(CBioseq_Handle bsh)
void ReportBadGenomeGap(const CBioseq &seq)
static bool IsWp(CBioseq_Handle bsh)
void ValidateDupOrOverlapFeats(const CBioseq &seq)
bool x_MatchesOverlappingFeaturePartial(const CMappedFeat &feat, unsigned int partial_type)
void CheckForMissingChromosome(CBioseq_Handle bsh)
bool IsIdIn(const CSeq_id &id, const CBioseq &seq)
void ValidateMoltypeDescriptors(const CBioseq &seq)
size_t NumOfIntervals(const CSeq_loc &loc)
void x_ReportImproperPartial(const CSeq_feat &feat)
bool IsFlybaseDbxrefs(const TDbtags &dbxrefs)
void CheckSourceDescriptor(const CBioseq_Handle &bsh)
void x_ReportGeneOverlapError(const CSeq_feat &feat, const string &gene_label)
void x_CheckForMultipleComments(CBioseq_Handle bsh)
void ValidateIDSetAgainstDb(const CBioseq &seq)
static bool IsMaster(const CBioseq &seq)
bool x_IsActiveFin() const
bool x_ReportUTRPair(const CSeq_feat &utr5, const CSeq_feat &utr3)
void x_ValidateAbuttingRNA(const CBioseq_Handle &seq)
void x_ValidateSourceFeatures(const CBioseq_Handle &bsh)
void ValidateSeqId(const CSeq_id &id, const CBioseq &ctx, bool longer_general=false)
~CValidError_bioseq() override
void x_ValidateAbuttingUTR(const CBioseq_Handle &seq)
bool x_ReportDupOverlapFeaturePair(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly, bool viral, bool htgs)
void x_ValidateCDSagainstVDJC(const CBioseq_Handle &seq)
static bool IsWGS(const CBioseq &seq)
size_t x_CountAdjacentNs(const CSeq_literal &lit)
void ValidateSeqDescContext(const CBioseq &seq)
void x_ValidateOverlappingRNAFeatures(const CBioseq_Handle &bsh)
bool GetTSANStretchErrors(const CBioseq &seq)
const CCacheImpl::TFeatValue * m_GeneIt
void GapByGapInst(const CBioseq &seq)
void x_ValidatePubFeatures(const CBioseq_Handle &bsh)
void ValidateSeqDescr(const CSeq_descr &descr, const CSeq_entry &ctx)
bool ValidateStructuredComment(const CSeqdesc &desc, bool report)
void ValidateSeqFeatContext(const CSeq_feat &feat, const CBioseq &seq)
void SetScope(CScope &scope)
void SetTSE(CSeq_entry_Handle seh)
void ValidateGraphsOnBioseq(const CBioseq &seq)
const CSeq_entry_Handle & GetTSEH()
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
bool IsSyntheticConstruct(const CBioSource &src)
bool HasGiOrAccnVer() const
const SValidatorContext & GetContext() const
void AddBioseqWithNoBiosource(const CBioseq &seq)
CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)
bool IsValidateIdSet() const
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
static bool IsWGSIntermediate(const CBioseq &seq)
bool IsNoCitSubPubs() const
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
bool IsSeqSubmitParent() const
bool x_IsFarFetchFailure(const CSeq_loc &loc)
void AddBioseqWithNoPub(const CBioseq &seq)
bool IsGenomeSubmission() const
void AddProtWithoutFullRef(const CBioseq_Handle &seq)
bool IsArtificial(const CBioSource &src)
void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)
void IncrementTpaWithHistoryCount()
bool IsNoBioSource() const
bool IsLocalGeneralOnly() const
void SetFarFetchFailure()
void IncrementTpaWithoutHistoryCount()
bool IsRefSeqConventions() const
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool DoCompareVDJCtoCDS() const
bool ShouldSubdivide() const
bool IsTransgenic(const CBioSource &bsrc)
vector< string > m_unpublished_labels
vector< string > m_published_labels
const TFeatValue & GetFeatFromCache(const SFeatKey &featKey)
AutoPtr< TFeatValue > GetFeatFromCacheMulti(const vector< SFeatKey > &featKeys)
const CPubdescInfo & GetPubdescToInfo(CConstRef< CPubdesc > pub)
static const CSeqFeatData::ESubtype kAnyFeatSubtype
static const CSeqFeatData::E_Choice kAnyFeatType
std::vector< CMappedFeat > TFeatValue
@ fLabel_Unique
Append a unique tag [V1].
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
const_iterator find(const key_type &key) const
const_iterator end() const
iterator insert(const value_type &val)
container_type::iterator iterator
container_type::value_type value_type
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char si[8][64]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
bool AllowOrphanedProtein(const CBioseq &seq, bool force_refseq=false)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
EDiagSev
Severity level for the posted diagnostics.
#define LOG_POST_XX(error_name, err_subcode, message)
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
void Critical(CExceptionArgs_Base &args)
void Error(CExceptionArgs_Base &args)
const string & GetMsg(void) const
Get message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
const TPrim & Get(void) const
#define ENUM_METHOD_NAME(EnumName)
const string AsFastaString(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static const size_t kMaxLocalIDLength
ID length restrictions.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
int CompareOrdered(const CSeq_id &sid2) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static bool IsValidLocalID(const CTempString &s)
Perform rudimentary validation on potential local IDs, whose contents should be pure ASCII and limite...
static const size_t kMaxGeneralTagLength
CSeq_id::E_Choice Which(void) const
string GetLabel(const CSeq_id &id)
static const size_t kMaxGeneralDBLength
@ e_YES
SeqIds compared, but are different.
@ eContent
Untagged human-readable accession or the like.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
CMappedFeat GetParent(const CMappedFeat &feat)
Return nearest parent of a feature.
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ fFGL_Content
Include its content if there is any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)
Updated version of TestForOverlap64().
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ eSeqlocPartial_Nointernal
@ eSeqlocPartial_Complete
@ eSeqlocPartial_Limwrong
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)
const CSeq_feat * GetPROTForProduct(const CBioseq &product, CScope *scope)
Get the mature peptide feature of a protein.
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
vector< CSeq_id_Handle > TIds
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
bool IsSetExcept(void) const
const CFeat_id & GetId(void) const
bool IsSetInst_Mol(void) const
bool IsSetComment(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
bool IsSetExcept_text(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetProduct(void) const
bool IsSetInst_Length(void) const
TInst_Topology GetInst_Topology(void) const
const string & GetComment(void) const
TInst_Length GetInst_Length(void) const
const string & GetExcept_text(void) const
bool IsSetInst(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
bool IsSetInst_Topology(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const TId & GetId(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetInst_Mol(void) const
const TInst & GetInst(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
SSeqMapSelector & SetResolveCount(size_t res_cnt)
Set max depth of resolving seq-map.
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
bool GetPartial(void) const
SSeqMapSelector & SetFlags(TFlags flags)
Select segment type(s)
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
TSeqPos GetPosition(void) const
return position of current segment in sequence
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
TCoding GetCoding(void) const
Target sequence coding.
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
@ eSeqData
real sequence data
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
size_type length(void) const
Return the length of the represented array.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ eNocase
Case insensitive compare.
static const char label[]
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
bool IsSetExtra_acc(void) const
Check if a value has been assigned to Extra_acc data member.
const TExtra_acc & GetExtra_acc(void) const
Get the Extra_acc member data.
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
bool IsSetExtra_accessions(void) const
Check if a value has been assigned to Extra_accessions data member.
const TExtra_accessions & GetExtra_accessions(void) const
Get the Extra_accessions member data.
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
const TName & GetName(void) const
Get the Name member data.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eOrigin_synthetic
purely synthetic
@ eOrigin_mut
artificially mutagenized
@ eOrigin_artificial
artificially engineered
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsLim(void) const
Check if variant Lim is selected.
bool IsSetYear(void) const
full year (including 1900) Check if a value has been assigned to Year data member.
bool IsStd(void) const
Check if variant Std is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
TLim GetLim(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrs
const TStd & GetStd(void) const
Get the variant data.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TLineage & GetLineage(void) const
Get the Lineage member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool CanGetDiv(void) const
Check if it is safe to call GetDiv method.
const TDiv & GetDiv(void) const
Get the Div member data.
const TSubname & GetSubname(void) const
Get the Subname member data.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
const TName & GetName(void) const
Get the Name member data.
TProcessed GetProcessed(void) const
Get the Processed member data.
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
list< CRef< CPub > > Tdata
const Tdata & Get(void) const
Get the member data.
const TEquiv & GetEquiv(void) const
Get the variant data.
bool IsEquiv(void) const
Check if variant Equiv is selected.
bool IsSub(void) const
Check if variant Sub is selected.
TType GetType(void) const
Get the Type member data.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
const TName & GetName(void) const
Get the variant data.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsName(void) const
Check if variant Name is selected.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
bool IsSetExt(void) const
user defined structure extension Check if a value has been assigned to Ext data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
const TId & GetId(void) const
Get the Id member data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TCode & GetCode(void) const
Get the Code member data.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
list< CRef< C_E > > Tdata
const TCdregion & GetCdregion(void) const
Get the variant data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TProduct & GetProduct(void) const
Get the Product member data.
const Tdata & Get(void) const
Get the member data.
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
vector< CRef< CGb_qual > > TQual
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsRegion(void) const
Check if variant Region is selected.
const TImp & GetImp(void) const
Get the variant data.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ e_Pub
publication applies to this seq
bool IsGenbank(void) const
Check if variant Genbank is selected.
TChain GetChain(void) const
Get the Chain member data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsSetChain(void) const
Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsTpg(void) const
Check if variant Tpg is selected.
const TName & GetName(void) const
Get the Name member data.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
bool IsTpd(void) const
Check if variant Tpd is selected.
TPoint GetPoint(void) const
Get the Point member data.
bool IsOther(void) const
Check if variant Other is selected.
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool IsEmbl(void) const
Check if variant Embl is selected.
E_Choice Which(void) const
Which variant is currently selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
const TOther & GetOther(void) const
Get the variant data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Tdata & Set(void)
Assign a value to data member.
const TChain_id & GetChain_id(void) const
Get the Chain_id member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsTpe(void) const
Check if variant Tpe is selected.
bool IsPnt(void) const
Check if variant Pnt is selected.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_Tpg
Third Party Annot/Seq Genbank.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
@ eClass_parts
parts for 2 or 3
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
const TIupacaa & GetIupacaa(void) const
Get the variant data.
bool IsSetLinkage(void) const
Check if a value has been assigned to Linkage data member.
TRepr GetRepr(void) const
Get the Repr member data.
bool IsMap(void) const
Check if variant Map is selected.
const TSeg & GetSeg(void) const
Get the variant data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
bool IsRef(void) const
Check if variant Ref is selected.
bool IsSetReplaced_by(void) const
these seqs make this one obsolete Check if a value has been assigned to Replaced_by data member.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
TLinkage GetLinkage(void) const
Get the Linkage member data.
TStrand GetStrand(void) const
Get the Strand member data.
ERepr
representation class
const TInst & GetInst(void) const
Get the Inst member data.
const TGap & GetGap(void) const
Get the variant data.
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
TTopology GetTopology(void) const
Get the Topology member data.
const TIupacna & GetIupacna(void) const
Get the variant data.
const TUpdate_date & GetUpdate_date(void) const
Get the variant data.
const TNcbipna & GetNcbipna(void) const
Get the variant data.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
const TNcbipaa & GetNcbipaa(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
const TNcbi8aa & GetNcbi8aa(void) const
Get the variant data.
const TLiteral & GetLiteral(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
bool IsLoc(void) const
Check if variant Loc is selected.
const TId & GetId(void) const
Get the Id member data.
bool IsSetHist(void) const
sequence history Check if a value has been assigned to Hist data member.
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
TTech GetTech(void) const
Get the Tech member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
const Tdata & Get(void) const
Get the member data.
bool IsSetReplaces(void) const
seq makes these seqs obsolete Check if a value has been assigned to Replaces data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
bool IsNcbi8na(void) const
Check if variant Ncbi8na is selected.
TLength GetLength(void) const
Get the Length member data.
const TOrg & GetOrg(void) const
Get the variant data.
TLength GetLength(void) const
Get the Length member data.
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
list< CRef< CSeq_id > > TId
const TGenbank & GetGenbank(void) const
Get the variant data.
bool IsSeg(void) const
Check if variant Seg is selected.
list< CRef< CSeq_id > > TIds
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
TMol GetMol(void) const
Get the Mol member data.
const TIds & GetIds(void) const
Get the Ids member data.
const TLinkage_evidence & GetLinkage_evidence(void) const
Get the Linkage_evidence member data.
bool IsName(void) const
Check if variant Name is selected.
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
bool IsSetFuzz(void) const
could be unsure Check if a value has been assigned to Fuzz data member.
TType GetType(void) const
Get the Type member data.
bool IsDelta(void) const
Check if variant Delta is selected.
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
bool CanGetHist(void) const
Check if it is safe to call GetHist method.
const THist & GetHist(void) const
Get the Hist member data.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
const TExt & GetExt(void) const
Get the Ext member data.
bool CanGetRepr(void) const
Check if it is safe to call GetRepr method.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
TMol_type GetMol_type(void) const
Get the variant data.
const TEmbl & GetEmbl(void) const
Get the variant data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
bool CanGetType(void) const
Check if it is safe to call GetType method.
EMol
molecule class in living organism
bool IsSetLength(void) const
must give a length in residues Check if a value has been assigned to Length data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TDelta & GetDelta(void) const
Get the variant data.
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
const TLoc & GetLoc(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const TModif & GetModif(void) const
Get the variant data.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
const TCreate_date & GetCreate_date(void) const
Get the variant data.
bool IsLiteral(void) const
Check if variant Literal is selected.
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
list< CRef< CDelta_seq > > Tdata
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
const Tdata & Get(void) const
Get the member data.
bool IsGap(void) const
Check if variant Gap is selected.
const TPub & GetPub(void) const
Get the Pub member data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
bool IsNcbi2na(void) const
Check if variant Ncbi2na is selected.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
list< CRef< CSeq_loc > > Tdata
const TNcbi8na & GetNcbi8na(void) const
Get the variant data.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TComment & GetComment(void) const
Get the variant data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
bool IsIupacna(void) const
Check if variant Iupacna is selected.
const TName & GetName(void) const
Get the variant data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
list< CRef< CLinkage_evidence > > TLinkage_evidence
const TRef & GetRef(void) const
Get the variant data.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
bool IsSetLinkage_evidence(void) const
Check if a value has been assigned to Linkage_evidence data member.
bool IsSetTopology(void) const
Check if a value has been assigned to Topology data member.
bool IsSetFuzz(void) const
length uncertainty Check if a value has been assigned to Fuzz data member.
E_Choice Which(void) const
Which variant is currently selected.
@ eRepr_const
constructed sequence
@ eRepr_ref
reference to another sequence
@ eRepr_seg
segmented sequence
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_map
ordered map of any kind
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_physmap
from physical mapping techniques
@ eTech_htc
high throughput cDNA
@ eTech_both
concept transl. w/ partial pept. seq.
@ eTech_targeted
targeted locus sets/studies
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_concept_trans
conceptual translation
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_standard
standard sequencing
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_seq_pept
peptide was sequenced
@ eTech_survey
one-pass genomic sequence
@ eTech_barcode
barcode of life project
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eTech_concept_trans_a
conceptual transl. supplied by author
@ eTech_genemap
from genetic mapping techniques
@ e_not_set
No variant selected.
@ e_Ncbipna
nucleic acid probabilities
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Ncbi2na
2 bit nucleic acid code
@ e_Iupacna
IUPAC 1 letter nuc acid code.
@ e_Ncbipaa
amino acid probabilities
@ e_Ncbi8na
8 bit extended nucleic acid code
@ e_Ncbi4na
4 bit nucleic acid code
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ e_Ncbi8aa
8 bit extended amino acid codes
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
@ eGIBB_mod_no_right
missing right end (3' or COOH)
@ eGIBB_mod_mitochondrial
@ eGIBB_mod_no_left
missing left end (5' for na, NH2 for aa)
@ e_Embl
EMBL specific information.
@ e_Org
if all from one organism
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Pir
PIR specific info.
@ e_Genbank
GenBank specific info.
@ e_Prf
PRF specific information.
@ e_Mol_type
type of molecule
@ e_Sp
SWISSPROT specific info.
@ e_Comment
a more extensive comment
@ e_Method
sequencing method
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Pdb
PDB specific information.
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
@ eType_clone
Deprecated. Used only for AGP 1.1.
@ eType_fragment
Deprecated. Used only for AGP 1.1.
@ eMol_not_set
> cdna = rna
@ eMol_na
just a nucleic acid
@ eStrand_ss
single strand
@ e_Literal
a piece of sequence
@ e_Loc
point to a sequence
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
if(yy_accept[yy_current_state])
static void text(MDB_val *v)
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
Miscellaneous common-use basic types and functionality.
Defines: CTimeFormat - storage class for time format.
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
double f(double x_, const double &y_)
double df(double x_, const double &y_)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
static const char * prefix[]
#define FOR_EACH_PUB_ON_PUBDESC(Itr, Var)
FOR_EACH_PUB_ON_PUBDESC EDIT_EACH_PUB_ON_PUBDESC.
#define FOR_EACH_DESCRIPTOR_ON_BIOSEQ
#define FOR_EACH_ANNOT_ON_BIOSEQ
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define IF_EXISTS_CLOSEST_BIOSOURCE(Cref, Var, Lvl)
IF_EXISTS_CLOSEST_BIOSOURCE.
#define FOR_EACH_KEYWORD_ON_GENBANKBLOCK(Itr, Var)
FOR_EACH_KEYWORD_ON_GENBANKBLOCK EDIT_EACH_KEYWORD_ON_GENBANKBLOCK.
CSubSource::TSubtype TSUBSOURCE_SUBTYPE
#define NCBI_GENOME(Type)
@NAME Convenience macros for NCBI objects
#define FOR_EACH_SYNONYM_ON_GENEREF(Itr, Var)
FOR_EACH_SYNONYM_ON_GENEREF EDIT_EACH_SYNONYM_ON_GENEREF.
#define NCBI_ORGMOD(Type)
COrgMod definitions.
#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)
FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.
#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)
FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.
COrgMod::TSubtype TORGMOD_SUBTYPE
#define FOR_EACH_SEQFEATXREF_ON_SEQFEAT(Itr, Var)
FOR_EACH_SEQFEATXREF_ON_SEQFEAT EDIT_EACH_SEQFEATXREF_ON_SEQFEAT.
#define FOR_EACH_DBXREF_ON_FEATURE
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)
#define BEGIN_COMMA_END(container)
static const char * str(char *buf, int n)
CSeqFeatData::ESubtype feat_subtype
bool operator()(const CTempString &lhs, const CTempString &rhs) const
bool operator()(const CTempString &lhs, const CTempString &rhs) const
Selector used in CSeqMap methods returning iterators.
map< string, string > TViralMap
bool HasExcludedAnnotation(const CSeq_loc &loc, CBioseq_Handle far_bsh)
static bool s_NotPeptideException(const CSeq_feat &curr, const CSeq_feat &prev)
static char CheckForBadFileIDSeqIdChars(const string &id)
bool s_ContainedIn(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
bool s_FieldHasLabel(const CUser_field &field, const string &label)
bool s_AfterIsGapORN(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)
bool s_CheckIntervals(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
MAKE_CONST_MAP(kViralStrandMap, string, string, { {"root", "dsDNA"}, {"Alphasatellitidae", "ssDNA"}, {"Anelloviridae", "ssDNA(-)"}, {"Bacilladnaviridae", "ssDNA"}, {"Bidnaviridae", "ssDNA"}, {"Circoviridae", "ssDNA(+/-)"}, {"Geminiviridae", "ssDNA(+/-)"}, {"Genomoviridae", "ssDNA"}, {"Hepadnaviridae", "dsDNA-RT"}, {"Inoviridae", "ssDNA(+)"}, {"Microviridae", "ssDNA(+)"}, {"Nanoviridae", "ssDNA(+)"}, {"Ortervirales", "ssRNA-RT"}, {"Caulimoviridae", "dsDNA-RT"}, {"Parvoviridae", "ssDNA(+/-)"}, {"Alphapleolipovirus", "dsDNA; ssDNA"}, {"Riboviria", "RNA"}, {"Albetovirus", "ssRNA(+)"}, {"Alphatetraviridae", "ssRNA(+)"}, {"Alvernaviridae", "ssRNA(+)"}, {"Amalgaviridae", "dsRNA"}, {"Astroviridae", "ssRNA(+)"}, {"Aumaivirus", "ssRNA(+)"}, {"Avsunviroidae", "ssRNA"}, {"Barnaviridae", "ssRNA(+)"}, {"Benyviridae", "ssRNA(+)"}, {"Birnaviridae", "dsRNA"}, {"Botourmiaviridae", "ssRNA(+)"}, {"Botybirnavirus", "dsRNA"}, {"Bromoviridae", "ssRNA(+)"}, {"Caliciviridae", "ssRNA(+)"}, {"Carmotetraviridae", "ssRNA(+)"}, {"Chrysoviridae", "dsRNA"}, {"Closteroviridae", "ssRNA(+)"}, {"Cystoviridae", "dsRNA"}, {"Deltavirus", "ssRNA(-)"}, {"dsRNA viruses", "dsRNA"}, {"Endornaviridae", "dsRNA"}, {"Flaviviridae", "ssRNA(+)"}, {"Hepeviridae", "ssRNA(+)"}, {"Hypoviridae", "ssRNA(+)"}, {"Idaeovirus", "ssRNA(+)"}, {"Kitaviridae", "ssRNA(+)"}, {"Leviviridae", "ssRNA(+)"}, {"Luteoviridae", "ssRNA(+)"}, {"Matonaviridae", "ssRNA(+)"}, {"Megabirnaviridae", "dsRNA"}, {"Narnaviridae", "ssRNA(+)"}, {"Haploviricotina", "ssRNA(-)"}, {"Arenaviridae", "ssRNA(+/-)"}, {"Coguvirus", "ssRNA(-)"}, {"Cruliviridae", "ssRNA(-)"}, {"Fimoviridae", "ssRNA(-)"}, {"Hantaviridae", "ssRNA(-)"}, {"Leishbuviridae", "ssRNA(-)"}, {"Mypoviridae", "ssRNA(-)"}, {"Nairoviridae", "ssRNA(-)"}, {"Peribunyaviridae", "ssRNA(-)"}, {"Phasmaviridae", "ssRNA(-)"}, {"Banyangvirus", "ssRNA(+/-)"}, {"Beidivirus", "ssRNA(-)"}, {"Goukovirus", "ssRNA(-)"}, {"Horwuvirus", "ssRNA(-)"}, {"Hudivirus", "ssRNA(-)"}, {"Hudovirus", "ssRNA(-)"}, {"Kabutovirus", "ssRNA(-)"}, {"Laulavirus", "ssRNA(-)"}, {"Mobuvirus", "ssRNA(-)"}, {"Phasivirus", "ssRNA(-)"}, {"Phlebovirus", "ssRNA(+/-)"}, {"Pidchovirus", "ssRNA(-)"}, {"Tenuivirus", "ssRNA(-)"}, {"Wenrivirus", "ssRNA(-)"}, {"Wubeivirus", "ssRNA(-)"}, {"Tospoviridae", "ssRNA(+/-)"}, {"Wupedeviridae", "ssRNA(-)"}, {"Insthoviricetes", "ssRNA(-)"}, {"Nidovirales", "ssRNA(+)"}, {"Nodaviridae", "ssRNA(+)"}, {"Papanivirus", "ssRNA(+)"}, {"Partitiviridae", "dsRNA"}, {"Permutotetraviridae", "ssRNA(+)"}, {"Picobirnaviridae", "dsRNA"}, {"Picornavirales", "ssRNA(+)"}, {"Pospiviroidae", "ssRNA"}, {"Potyviridae", "ssRNA(+)"}, {"Quadriviridae", "dsRNA"}, {"Reoviridae", "dsRNA"}, {"Sarthroviridae", "ssRNA(+)"}, {"Sinaivirus", "ssRNA(+)"}, {"Solemoviridae", "ssRNA(+)"}, {"Solinviviridae", "ssRNA(+)"}, {"Togaviridae", "ssRNA(+)"}, {"Tombusviridae", "ssRNA(+)"}, {"Totiviridae", "dsRNA"}, {"Tymovirales", "ssRNA(+)"}, {"Virgaviridae", "ssRNA(+)"}, {"Virtovirus", "ssRNA(+)"}, {"ssRNA viruses", "ssRNA"}, {"unclassified ssRNA viruses", "ssRNA"}, {"unclassified ssRNA negative-strand viruses", "ssRNA(-)"}, {"unclassified ssRNA positive-strand viruses", "ssRNA(+)"}, {"unclassified viroids", "ssRNA"}, {"DNA satellites", "DNA"}, {"RNA satellites", "RNA"}, {"Smacoviridae", "ssDNA"}, {"Spiraviridae", "ssDNA(+)"}, {"Tolecusatellitidae", "ssDNA"}, {"unclassified viruses", "unknown"}, {"unclassified DNA viruses", "DNA"}, {"unclassified archaeal dsDNA viruses", "dsDNA"}, {"unclassified dsDNA phages", "dsDNA"}, {"unclassified dsDNA viruses", "dsDNA"}, {"unclassified ssDNA bacterial viruses", "ssDNA"}, {"unclassified ssDNA viruses", "ssDNA"}, {"environmental samples", "unknown"}, })
static TViralMap s_InitializeViralMap()
static bool s_MatchPartialType(const CSeq_loc &loc1, const CSeq_loc &loc2, unsigned int partial_type)
@ e_RnaPosition_MIDDLE_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_X
@ e_RnaPosition_LEFT_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_2
@ e_RnaPosition_RIGHT_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_1
static bool s_SubsequentIntron(CFeat_CI feat_ci_dup, Int4 start, Int4 stop, Int4 max)
#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)
static bool s_GetFlankingGapTypes(const CSeq_inst &inst, CSeq_gap::TType &fst, CSeq_gap::TType &lst)
static bool s_SeqIdMatch(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)
bool x_IsPseudo(const CGene_ref &ref)
static bool s_LocSortCompare(const CConstRef< CSeq_loc > &q1, const CConstRef< CSeq_loc > &q2)
static int CountNs(const CSeq_data &seq_data, TSeqPos len)
bool s_BeforeIsGapOrN(TSeqPos pos, TSeqPos before, const CSeqVector &vec)
static bool x_BadCDSinVDJC(const CSeq_loc &cdsloc, const CSeq_loc &vdjcloc, CScope *scope)
static bool x_FeatIsVDJC(const CSeq_feat &ft)
static int s_MaxNsInSeqLitForTech(CMolInfo::TTech tech)
unsigned int s_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
bool s_IsCDDFeat(const CMappedFeat &feat)
static EDiagSev GetBioseqEndWarning(const CBioseq &seq, bool is_circular, EBioseqEndIsType end_is_char)
bool s_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)
bool s_DbtagEqual(const CRef< CDbtag > &dbt1, const CRef< CDbtag > &dbt2)
static bool x_FeatIsCDS(const CSeq_feat &ft)
bool s_HasGI(const CBioseq &seq)
bool s_AfterIsGap(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)
static optional< int > s_MaxSeqStretchIfLessThanThreshold(const CSeqVector &vec, int threshold)
bool HasUnverified(CBioseq_Handle bsh)
static bool s_OrgModEqual(const CRef< COrgMod > &om1, const CRef< COrgMod > &om2)
string s_GetMrnaProductString(const CSeq_feat &mrna)
static bool s_SubsourceEquivalent(const CRef< CSubSource > &st1, const CRef< CSubSource > &st2)
bool x_HasNamedQual(const CSeq_feat &feat, const string &qual)
static char CheckForBadSeqIdChars(const string &id)
static string s_GetKeywordForStructuredComment(const CUser_object &obj)
TGi GetGIForSeqId(const CSeq_id &id, CScope &scope)
bool StrandsMatch(ENa_strand s1, ENa_strand s2)
static CBioseq_Handle s_GetParent(const CBioseq_Handle &part)
static ERnaPosition s_RnaPosition(const CSeq_feat &feat)
bool s_AreAdjacent(ERnaPosition pos1, ERnaPosition pos2)
bool lists_match(Iterator iter1, Iterator iter1_stop, Iterator iter2, Iterator iter2_stop, Predicate pred)
static bool s_IsConWithGaps(const CBioseq &seq)
static bool s_BiosrcFullLengthIsOk(const CBioSource &src)
static bool s_StandaloneProt(const CBioseq_Handle &bsh)
static TSeqPos s_GetDeltaLen(const CDelta_seq &seg, CScope *scope)
bool s_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
static bool HasAssemblyOrNullGap(const CBioseq &seq)
static bool s_IsTPAAssemblyOkForBioseq(const CBioseq &seq, bool has_refseq)
bool s_HasTpaUserObject(CBioseq_Handle bsh)
static bool s_OrgrefEquivalent(const COrg_ref &org1, const COrg_ref &org2)
bool s_GeneralTagsMatch(const string &protein_id, const CDbtag &dbtag)
static bool s_WillReportTerminalGap(const CBioseq &seq, CBioseq_Handle bsh)
string s_GetMrnaProteinLink(const CUser_field &field)
static bool s_ReportableCollision(const CGene_ref &g1, const CGene_ref &g2)
static char CheckForBadLocalIdChars(const string &id)
bool s_BeforeIsGap(TSeqPos pos, TSeqPos before, const CSeqVector &vec)
static bool s_IsSkippableDbtag(const CDbtag &dbt)
static void s_MakePubLabelString(const CPubdesc &pd, string &label)
static void s_GetGeneTextLabel(const CSeq_feat &feat, string &label)
static vector< int > s_LocationToStartStopPairs(const CSeq_loc &loc)
static void GetDateString(string &out_date_str, const CDate &date)
static bool s_SeqIdCompare(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)
static bool s_IsSwissProt(const CBioseq &seq)
bool s_FieldHasNonBlankValue(const CUser_field &field)
static string linkEvStrings[]
static bool s_IsUnspecified(const CSeq_gap &gap)
static bool s_SuppressMultipleEquivBioSources(const CBioSource &src)
bool s_OverlapOrAbut(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
static bool x_IsWgsSecondary(const CBioseq &seq)