82 unsigned char chu = ch;
83 if (chu > 31 && chu < 128) {
107 const string&
str = *it;
109 const char& ch = *c_it;
110 unsigned char chu = ch;
111 if (ch > 127 || (ch < 32 && ch !=
'\t' && ch !=
'\r' && ch !=
'\n')) {
122 switch ( desc.
Which() ) {
125 "Modif descriptor is obsolete", *
m_Ctx, desc);
126 CSeqdesc::TModif::const_iterator it2 = desc.
GetModif().begin();
127 while (it2 != desc.
GetModif().end()) {
137 "MolType descriptor is obsolete", *
m_Ctx, desc);
142 "Method descriptor is obsolete", *
m_Ctx, desc);
170 "Name descriptor needs text",
ctx, desc);
178 "OrgRef descriptor is obsolete", *
m_Ctx, desc);
191 "Region descriptor needs text",
ctx, desc);
231 const string& comment,
236 "Comment may refer to reference by serial number - "
237 "attach reference specific comments to the reference "
238 "REMARK instead.", *
m_Ctx, desc);
242 "Comment descriptor needs text", *
m_Ctx, desc);
244 if (
NStr::Find (comment,
"::") != string::npos) {
246 "Comment may be formatted to look like a structured comment.", *
m_Ctx, desc);
256 "Title descriptor needs text",
ctx, desc);
260 "Title descriptor has internal PMID",
ctx, desc);
264 char end = cpy.c_str()[cpy.length() - 1];
266 if (end ==
'.' && cpy.length() > 4) {
267 end = cpy.c_str()[cpy.length() - 2];
274 "Title descriptor ends in bad punctuation",
ctx, desc);
322 if (
NStr::Find(msg,
"is not a valid value") != string::npos) {
324 }
else if (
NStr::Find(msg,
"field is out of order") != string::npos) {
328 }
else if (
NStr::Find(msg,
"is not a valid field name") != string::npos
329 ||
NStr::Find(msg,
"field without label") != string::npos) {
350 if (errors.size() > 0) {
379 if (errors.size() > 0) {
394 "Evidence-For-Name-Assignment",
396 "Genome-Annotation-Data",
397 "Genome-Assembly-Data",
398 "GISAID_EpiFlu(TM)Data",
402 "International Barcode of Life (iBOL)Data",
420 "SymbiotaSpecimenReference",
421 "Taxonomic-Update-Statistics",
441 if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
443 (*it)->IsSetData() && (*it)->GetData().IsStr()) {
444 const string&
val = (*it)->GetData().GetStr();
460 const bool report =
false;
481 report_prefix +
" is not a valid value for StructuredCommentPrefix", *
m_Ctx, desc);
499 string report_sfx =
suffix.GetData().GetStr();
500 string sfx = report_sfx;
509 "StructuredCommentSuffix '" + report_sfx +
"' does not match prefix", *
m_Ctx, desc);
527 auto& fields =
tmp.SetData();
547 "Structured Comment user object descriptor is empty", *
m_Ctx, desc);
558 "Structured Comment lacks prefix and/or suffix", *
m_Ctx, desc);
573 const bool isV2Prefix =
574 (
prefix ==
"HumanSTR" && usr.
HasField(
"Bracketed record seq.",
""));
575 const string queryPrefix = isV2Prefix ?
"HumanSTRv2" :
prefix;
588 if (
auto pSuffix = usr.
GetFieldRef(
"StructuredCommentSuffix"); pSuffix) {
600 "Structured Comment lacks prefix and/or suffix", *
m_Ctx, desc);
610 "Assembly Name should not start with 'NCBI' or 'GenBank' in structured comment", *
m_Ctx, desc);
617 "Structured Comment invalid; the field value and/or name are incorrect", *
m_Ctx, desc);
626 unsigned int skip = 4;
628 if (
str.length() < 5)
return true;
630 if (
str [0] !=
'S')
return true;
631 if (
str [1] !=
'A')
return true;
632 if (
str [2] !=
'M')
return true;
633 if (
str [3] !=
'E' &&
str [3] !=
'N' &&
str [3] !=
'D')
return true;
635 if (
str [3] ==
'E') {
642 for (
i = skip;
i <
str.length();
i++) {
644 if (!
isdigit (ch))
return true;
655 if (
str.length() < 9)
return true;
657 if (
str [0] !=
'S')
return true;
658 if (
str [1] !=
'R')
return true;
659 if (
str [2] !=
'S')
return true;
661 for (
i = 3;
i <
str.length();
i++) {
663 if (!
isdigit (ch))
return true;
674 if (
str.length() < 9)
return true;
677 if (ch !=
'S' && ch !=
'D' && ch !=
'E')
return true;
679 if (!
isupper (ch))
return true;
681 if (!
isupper (ch))
return true;
683 for (
i = 3;
i <
str.length();
i++) {
685 if (!
isdigit (ch))
return true;
697 if (
str.length() < 6)
return true;
699 if (
str [0] !=
'P')
return true;
700 if (
str [1] !=
'R')
return true;
701 if (
str [2] !=
'J')
return true;
702 if (
str [3] !=
'E' &&
str [3] !=
'N' &&
str [3] !=
'D')
return true;
703 if (
str [4] !=
'A' &&
str [4] !=
'B')
return true;
705 for (
i = 5;
i <
str.length();
i++) {
707 if (!
isdigit (ch))
return true;
714 "Trace Assembly Archive",
718 "Sequence Read Archive",
735 "DBLink user object descriptor is empty", *
m_Ctx, desc);
746 const auto& fdata = fld.
GetData();
747 if (fdata.IsStrs()) {
750 const string&
str = *st_itr;
754 "Bad BioSample format - " +
str, *
m_Ctx, desc);
757 "Old BioSample format - " +
str, *
m_Ctx, desc);
761 }
else if (fdata.IsStr()) {
762 const string&
str = fdata.GetStr();
766 "Bad BioSample format - " + fdata.GetStr(), *
m_Ctx, desc);
769 "Old BioSample format - " + fdata.GetStr(), *
m_Ctx, desc);
778 const string&
str = *st_itr;
781 "Bad Sequence Read Archive format - " +
str, *
m_Ctx, desc);
789 const string&
str = *st_itr;
792 "Bad BioProject format - " +
str, *
m_Ctx, desc);
800 const string&
str = *st_itr;
803 "Trace Asssembly Archive accession " +
str +
" does not begin with TI prefix", *
m_Ctx, desc);
812 "Bad DBLink capitalization - " + label_str, *
m_Ctx, desc);
828 "User object with no type", *
m_Ctx, desc);
834 "User object with no type", *
m_Ctx, desc);
840 "User object with no data", *
m_Ctx, desc);
844 bool has_ref_track_status =
false;
846 if ( (*field)->CanGetLabel() ) {
847 const CObject_id& obj_id = (*field)->GetLabel();
848 if ( !obj_id.
IsStr() ) {
852 has_ref_track_status =
true;
853 if ((*field)->IsSetData() && (*field)->GetData().IsStr()) {
856 "RefGeneTracking object has illegal Status '"
857 + (*field)->GetData().GetStr() +
"'",
864 if ( !has_ref_track_status ) {
866 "RefGeneTracking object needs to have Status set", *
m_Ctx, desc);
883 "Molinfo-biomol unknown used", *
m_Ctx, desc);
927 "Biomol \"" + p +
"\" is not appropriate for sequences that use the TSA technique.",
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_SEQ_DESCR_DBLinkBadBioSample
@ eErr_SEQ_DESCR_DBLinkBadCapitalization
@ eErr_SEQ_DESCR_BadStrucCommInvalidSuffix
@ eErr_SEQ_DESCR_BadAssemblyName
@ eErr_SEQ_DESCR_StrucCommMissingUserObject
@ eErr_SEQ_DESCR_BadStrucCommInvalidFieldName
@ eErr_SEQ_DESCR_BadStrucCommInvalidFieldValue
@ eErr_SEQ_DESCR_SerialInComment
@ eErr_SEQ_DESCR_BadPunctuation
@ eErr_SEQ_DESCR_RefGeneTrackingIllegalStatus
@ eErr_SEQ_DESCR_TitleMissingText
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_MoltypeUnknown
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_DESCR_TitleHasPMID
@ eErr_SEQ_FEAT_RefSeqInText
@ eErr_SEQ_DESCR_BadStrucCommMultipleFields
@ eErr_SEQ_DESCR_StrucCommMissingPrefixOrSuffix
@ eErr_SEQ_DESCR_WrongBiomolForTSA
@ eErr_SEQ_DESCR_BadStrucCommMissingField
@ eErr_SEQ_DESCR_DBLinkBadFormat
@ eErr_SEQ_DESCR_UserObjectNoType
@ eErr_SEQ_DESCR_DBLinkMissingUserObject
@ eErr_SEQ_DESCR_BadStrucCommFieldOutOfOrder
@ eErr_SEQ_DESCR_BadStrucCommInvalidPrefix
@ eErr_SEQ_DESCR_DBLinkBadBioProject
@ eErr_SEQ_DESCR_DBLinkBadSRAaccession
@ eErr_SEQ_DESCR_MissingText
@ eErr_SEQ_DESCR_FakeStructuredComment
@ eErr_SEQ_DESCR_UserObjectNoData
@ eErr_SEQ_DESCR_RegionMissingText
@ eErr_SEQ_DESCR_RefGeneTrackingWithoutStatus
@ eErr_SEQ_DESCR_CommentMissingText
int Compare(const CObject_id &oid2) const
Template class for iteration on objects of class C (non-medifiable version)
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool IsRefGeneTracking() const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
bool IsStructuredComment() const
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsWGSMaster(const CBioseq &seq, CScope &scope)
bool x_ValidateStructuredComment(const CUser_object &usr, const CSeqdesc &desc, bool report=true)
CConstRef< CSeq_entry > m_Ctx
void ValidateSeqDesc(const CSeqdesc &desc, const CSeq_entry &ctx)
Validate descriptors as stand alone objects (no context)
void ValidateTitle(const string &title, const CSeqdesc &desc, const CSeq_entry &ctx)
void x_ReportStructuredCommentErrors(const CSeqdesc &desc, const CComment_rule::TErrorList &errors)
void ValidateMolInfo(const CMolInfo &minfo, const CSeqdesc &desc)
~CValidError_desc() override
bool x_ValidateStructuredCommentPrefix(const string &prefix, const CSeqdesc &desc, bool report)
void ValidateUser(const CUser_object &usr, const CSeqdesc &desc)
bool ValidateStructuredCommentGeneric(const CUser_object &usr, const CSeqdesc &desc, bool report)
bool IsValidStructuredComment(const CSeqdesc &desc)
void ValidateComment(const string &comment, const CSeqdesc &desc)
bool ValidateStructuredCommentInternal(const CSeqdesc &desc, bool report=true)
bool ValidateDblink(const CUser_object &usr, const CSeqdesc &desc, bool report=true)
bool x_ValidateStructuredCommentUsingRule(const CComment_rule &rule, const CSeqdesc &desc, bool report)
bool x_ValidateStructuredCommentSuffix(const string &prefix, const CUser_field &suffix, const CSeqdesc &desc, bool report)
bool ValidateStructuredComment(const CUser_object &usr, const CSeqdesc &desc, const CComment_rule &rule, bool report)
bool IsSerialNumberInComment(const string &comment)
void PostBadDateError(EDiagSev sv, const string &msg, int flags, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
static bool is_valid(const char *num, int type, CONV_RESULT *cr)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * str(char *buf, int n)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ eNocase
Case insensitive compare.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrs
vector< CRef< CUser_field > > TData
const TUser & GetUser(void) const
Get the variant data.
const TUpdate_date & GetUpdate_date(void) const
Get the variant data.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
TTech GetTech(void) const
Get the Tech member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TModif & GetModif(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
const TCreate_date & GetCreate_date(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the variant data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
const TName & GetName(void) const
Get the variant data.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsUser(void) const
Check if variant User is selected.
@ eTech_tsa
transcriptome shotgun assembly
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_other_genetic
other genetic material
@ e_Embl
EMBL specific information.
@ e_Het
cofactor, etc associated but not bound
@ e_Org
if all from one organism
@ e_Num
a numbering system
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Pir
PIR specific info.
@ e_Genbank
GenBank specific info.
@ e_Prf
PRF specific information.
@ e_Mol_type
type of molecule
@ e_Sp
SWISSPROT specific info.
@ e_Dbxref
xref to other databases
@ e_Comment
a more extensive comment
@ e_Method
sequencing method
@ e_Region
overall region (globin locus)
@ e_Molinfo
info on the molecule and techniques
@ e_Maploc
map location of this sequence
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Pdb
PDB specific information.
@ e_not_set
No variant selected.
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
static const char * suffix[]
static const char * prefix[]
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
static bool x_IsBadBioSampleFormat(const string &str)
static string s_AsciiString(const string &src)
EErrType s_GetErrTypeFromString(const string &msg)
static bool x_IsBadSRAFormat(const string &str)
static bool s_IsAllowedPrefix(const string &val)
static string s_legalDblinkNames[]
bool HasBadGenomeAssemblyName(const CUser_object &usr)
static string s_OfficialPrefixList[]
bool s_UserFieldCompare(const CRef< CUser_field > &f1, const CRef< CUser_field > &f2)
static EDiagSev s_ErrorLevelFromFieldRuleSev(CField_rule::TSeverity severity)
static bool x_IsBadBioProjectFormat(const string &str)
static bool x_IsNotAltBioSampleFormat(const string &str)