53 using namespace sequence;
96 const CGene_ref& gene = m_Feat.GetData().GetGene();
98 m_Imp.IncrementGeneCount();
108 "There is a gene feature where all fields are empty");
112 "Comment has same value as gene locus");
118 for (
auto it : locus_tag) {
119 if (
isspace((
unsigned char)(it)) != 0 ) {
122 "' should be a single word without any spaces");
130 "Gene locus and locus_tag '" + gene.
GetLocus() +
"' match");
132 if (m_Feat.IsSetComment() &&
NStr::EqualCase (m_Feat.GetComment(), locus_tag)) {
134 "Comment has same value as gene locus_tag");
136 if (m_Feat.IsSetQual()) {
137 for (
auto it : m_Feat.GetQual()) {
138 if (it->IsSetQual() &&
NStr::EqualNocase(it->GetQual(),
"old_locus_tag") && it->IsSetVal()) {
141 "old_locus_tag has same value as gene locus_tag");
143 "Gene locus_tag and old_locus_tag '" + locus_tag +
"' match");
145 if (
NStr::Find(it->GetVal(),
",") != string::npos) {
147 "old_locus_tag has comma, multiple old_locus_tags should be split into separate qualifiers");
152 }
else if (m_Imp.DoesAnyGeneHaveLocusTag() &&
155 "Missing gene locus tag");
159 m_Imp.ValidateDbxref(gene.
GetDb(), m_Feat);
162 if (m_Imp.IsRefSeq() && gene.
IsSetSyn()) {
163 for (
auto it : gene.
GetSyn()) {
164 if (sc_BadGeneSyn.find (it.c_str()) != sc_BadGeneSyn.end()) {
166 "Uninformative gene synonym '" + it +
"'");
171 "gene synonym has same value as gene locus");
178 && !m_Imp.IsGpipe()) {
180 "gene description has same value as gene locus");
185 "gene synonym without gene locus or description");
189 ValidateCharactersInField (gene.
GetLocus(),
"Gene locus");
195 "gene locus " + gene.
GetLocus() +
" has SGML");
205 "gene description " + gene.
GetDesc() +
" has SGML");
209 for (
auto it : gene.
GetSyn()) {
212 "gene synonym " + it +
" has SGML");
218 x_ValidateMultiIntervalGene();
227 (!m_Feat.GetData().GetGene().IsSetLocus_tag() ||
NStr::IsBlank(m_Feat.GetData().GetGene().GetLocus_tag()))) {
243 if (
label.empty() ) {
247 for (
auto qual_iter : operon->
GetQual()) {
253 "Operon is same as gene - " + qual.
GetVal());
265 loc->
SetInt().SetFrom(from);
268 loc->
SetInt().SetFrom(to);
269 loc->
SetInt().SetTo(from);
284 mobile_elements.clear();
300 const CSeq_loc& loc = m_Feat.GetLocation();
309 gap_start =
si.GetRange().GetFrom() + 1;
311 gap_start =
si.GetRange().GetTo() + 1;
317 gap_end =
si.GetRange().GetTo();
319 gap_end =
si.GetRange().GetFrom();
340 if (
si.GetRange().GetFrom() != 0) {
353 if (!
si ||
si.GetRange().GetFrom() != 0) {
369 const CSeq_loc& loc = m_Feat.GetLocation();
375 if (m_Feat.IsSetExcept() && m_Feat.IsSetExcept_text()
376 &&
NStr::FindNoCase (m_Feat.GetExcept_text(),
"trans-splicing") != string::npos) {
381 if (x_AllIntervalGapsAreMobileElements()) {
392 }
else if (m_Imp.IsSmallGenomeSet()) {
394 "Multiple interval gene feature in small genome set - "
395 "set trans-splicing exception if appropriate");
398 "Gene feature on non-segmented sequence should not "
399 "have multiple intervals");
401 }
catch (
const exception& e ) {
402 if (
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
404 string(
"Exception while validating multi-interval genes. EXCEPTION: ") +
420 for ( CSeq_loc_CI
si(loc);
si; ++
si ) {
429 if (loc.GetId()->IsOther()) {
440 if ((*it)->IsOther()) {
456 bool completeness =
false;
459 if (diter->GetMolinfo().IsSetTech() && diter->GetMolinfo().GetTech() ==
CMolInfo::eTech_wgs) {
470 if (diter->GetSource().IsSetDivision()
static CRef< CScope > m_Scope
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_InvalidOperonMatchesGene
@ eErr_SEQ_FEAT_ExceptionRequiresLocusTag
@ eErr_SEQ_FEAT_MissingGeneLocusTag
@ eErr_SEQ_FEAT_LocusTagHasSpace
@ eErr_SEQ_FEAT_MultiIntervalGene
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_OldLocusTagBadFormat
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_FEAT_GeneRefHasNoData
@ eErr_SEQ_FEAT_LocusTagGeneLocusMatch
@ eErr_SEQ_FEAT_UndesiredGeneSynonym
@Gb_qual.hpp User-defined methods of the data storage class.
bool x_AllIntervalGapsAreMobileElements()
void x_ValidateMultiIntervalGene()
void x_ValidateExceptText(const string &text) override
@ eSubtype_mobile_element
virtual void x_ValidateExceptText(const string &text)
Cache various information for one validation run.
CBioseq_Handle GetBioseqHandleFromLocation(CScope *scope, const CSeq_loc &loc, const CTSE_Handle &tse)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char si[8][64]
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
static bool s_LocIntervalsSpanOrigin(const CSeq_loc &loc, CBioseq_Handle bsh)
static const char *const sc_BadGeneSynText[]
static bool s_IsLocDirSub(const CSeq_loc &loc, const CTSE_Handle &tse, CCacheImpl &cache, CScope *scope)
DEFINE_STATIC_ARRAY_MAP(TBadGeneSynSet, sc_BadGeneSyn, sc_BadGeneSynText)
CStaticArraySet< const char *, PCase_CStr > TBadGeneSynSet
bool s_HasMobileElementForInterval(TSeqPos from, TSeqPos to, CBioseq_Handle bsh)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
string GetLabel(const CSeq_id &id)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
@ fFGL_Content
Include its content if there is any.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
@ eOverlap_Contained
2nd contained within 1st extremes
CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
CScope & GetScope(void) const
Get scope this handle belongs to.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static const char label[]
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
const TSyn & GetSyn(void) const
Get the Syn member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
bool IsSetAllele(void) const
Official allele designation Check if a value has been assigned to Allele data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetMaploc(void) const
descriptive map location Check if a value has been assigned to Maploc data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TAllele & GetAllele(void) const
Get the Allele member data.
const TMaploc & GetMaploc(void) const
Get the Maploc member data.
const TVal & GetVal(void) const
Get the Val member data.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool CanGetVal(void) const
Check if it is safe to call GetVal method.
const TQual & GetQual(void) const
Get the Qual member data.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
ENa_strand
strand of nucleic acid
@ eCompleteness_complete
complete biological entity
@ eTech_wgs
whole genome shotgun sequencing
@ e_Genbank
GenBank specific info.
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
static void text(MDB_val *v)
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define FOR_EACH_KEYWORD_ON_GENBANKBLOCK(Itr, Var)
FOR_EACH_KEYWORD_ON_GENBANKBLOCK EDIT_EACH_KEYWORD_ON_GENBANKBLOCK.
bool ContainsSgml(const string &str)