34 #ifndef VALIDATOR___VALIDERROR_BIOSEQ__HPP
35 #define VALIDATOR___VALIDERROR_BIOSEQ__HPP
119 void ValidateBioseq(
const CBioseq& seq);
120 void ValidateSeqIds(
const CBioseq& seq);
121 void ValidateSeqId(
const CSeq_id&
id,
const CBioseq&
ctx,
bool longer_general =
false);
122 void ValidateInst(
const CBioseq& seq);
123 void ValidateBioseqContext(
const CBioseq& seq);
124 void ValidateHistory(
const CBioseq& seq);
126 bool GetTSANStretchErrors(
const CBioseq& seq);
127 bool GetTSAConflictingBiomolTechErrors(
const CBioseq& seq);
129 static bool IsSelfReferential(
const CBioseq& seq);
133 static bool IsMaster(
const CBioseq& seq);
135 static bool IsWGSMaster(
const CSeq_entry& entry);
136 static bool IsWGS(
const CBioseq& seq);
141 static bool IsTSAAccession(
const CSeq_id&
id);
142 static bool IsTSAAccession(
const CBioseq& seq);
144 static bool IsEmblOrDdbj(
const CBioseq& seq);
145 static bool IsGenbank(
const CBioseq& seq);
146 static bool IsRefSeq(
const CBioseq& seq);
147 static bool IsPdb(
const CBioseq& seq);
166 void ValidateSeqLen(
const CBioseq& seq);
167 void ValidateSegRef(
const CBioseq& seq);
168 void ValidateDelta(
const CBioseq& seq);
173 void ValidateSeqParts(
const CBioseq& seq);
174 void x_ValidateTitle(
const CBioseq& seq);
175 void x_ValidateBarcode(
const CBioseq& seq);
176 void ValidateRawConst(
const CBioseq& seq);
177 void x_CalculateNsStretchAndTotal(
const CSeqVector& seqvec,
TSeqPos& num_ns,
TSeqPos& max_stretch,
bool& n5,
bool& n3);
178 void ValidateNsAndGaps(
const CBioseq& seq);
179 void GapByGapInst (
const CBioseq& seq);
180 void ReportBadAssemblyGap (
const CBioseq& seq);
181 static bool HasBadWGSGap(
const CBioseq& seq);
182 void ReportBadWGSGap(
const CBioseq& seq);
183 void ReportBadTSAGap(
const CBioseq& seq);
184 void ReportBadGenomeGap(
const CBioseq& seq);
188 void ValidateBadGeneOverlap(
const CSeq_feat& feat);
189 void x_ReportGeneOverlapError(
const CSeq_feat& feat,
const string& gene_label);
190 void x_ReportImproperPartial(
const CSeq_feat& feat);
191 void x_ReportInternalPartial(
const CSeq_feat& feat);
192 bool x_PartialAdjacentToIntron(
const CSeq_loc& loc);
193 void ValidateFeatPartialInContext (
const CMappedFeat& feat,
bool is_complete);
194 void x_ReportStartStopPartialProblem(
int partial_type,
bool at_splice_or_gap,
bool abuts_n,
const CSeq_feat& feat);
196 bool x_IsPartialAtSpliceSiteOrGap (
const CSeq_loc& loc,
unsigned int tag,
bool& bad_seq,
bool& is_gap,
bool& abuts_n);
197 bool x_MatchesOverlappingFeaturePartial (
const CMappedFeat& feat,
unsigned int partial_type);
199 void ValidateSeqFeatContext(
const CBioseq& seq,
bool is_complete);
205 void ValidateDupOrOverlapFeats(
const CBioseq& seq);
206 void ValidateTwintrons(
const CBioseq& seq);
207 void ValidateCollidingGenes(
const CBioseq& seq);
208 void ValidateCompleteGenome(
const CBioseq& seq);
209 void x_CompareStrings(
const TStrFeatMap& str_feat_map,
const string&
type);
211 void x_ReportSuspiciousUseOfComplete(
const CBioseq& seq,
EDiagSev sev);
213 bool x_IsRangeGap (
const CBioseq_Handle& seq,
int start,
int stop);
215 void x_ValidateGeneCDSmRNACounts();
221 void x_CheckOrigProteinAndTranscriptIds(
const CCdsMatchInfo& cds_match);
222 void x_TranscriptIDsMatch(
const string& protein_id,
const CSeq_feat& cds);
226 void ValidateSeqDescContext(
const CBioseq& seq);
228 void CheckForMultipleStructuredComments(
const CBioseq& seq);
231 void ValidateMolInfoContext(
const CMolInfo& minfo,
int& seq_biomol,
int& tech,
int& completeness,
233 void x_ValidateMolInfoForBioSource(
238 void x_CheckSingleStrandedRNAViruses(
240 const string& lineage,
241 const string& stranded_mol,
250 eStrandedMoltype_unknown = 0,
251 eStrandedMoltype_ssRNA = 1,
252 eStrandedMoltype_dsRNA = 2,
253 eStrandedMoltype_ssDNA = 4,
254 eStrandedMoltype_dsDNA = 8
256 static string s_GetStrandedMolStringFromLineage(
const string& lineage);
258 void x_ReportLineageConflictWithMol(
259 const string& lineage,
260 const string& stranded_mol,
268 void ValidateUpdateDateContext(
const CDate& update,
const CDate& create,
270 void ValidateOrgContext(
const COrg_ref& this_org,
272 void ReportModifInconsistentError (
int new_mod,
int& old_mod,
const CSeqdesc& desc,
const CSeq_entry&
ctx);
273 void ValidateModifDescriptors (
const CBioseq& seq);
274 void ValidateMoltypeDescriptors (
const CBioseq& seq);
276 void ValidateSecondaryAccConflict(
const string& primary_acc,
277 const CBioseq& seq,
int choice);
278 void ValidateIDSetAgainstDb(
const CBioseq& seq);
282 void x_ReportDuplicatePubLabels (
const CBioseq& seq,
const vector<CTempString>& labels);
283 void x_ValidateMultiplePubs(
286 void CheckForPubOnBioseq(
const CBioseq& seq);
289 static size_t x_BadMetazoanMitochondrialLength(
const CBioSource& src,
const CSeq_inst& inst);
290 void CheckForMolinfoOnBioseq(
const CBioseq& seq);
291 void CheckTpaHistory(
const CBioseq& seq);
293 size_t GetDataLen(
const CSeq_inst& inst);
296 size_t NumOfIntervals(
const CSeq_loc& loc);
300 bool SuppressTrailingXMsg(
const CBioseq& seq);
302 bool IsHistAssemblyMissing(
const CBioseq& seq);
303 bool IsFlybaseDbxrefs(
const TDbtags& dbxrefs);
304 bool GraphsOnBioseq()
const;
305 bool IsSynthetic()
const;
307 bool x_IsActiveFin()
const;
308 bool x_IsMicroRNA()
const;
309 bool x_IsDeltaLitOnly(
const CSeq_inst& inst)
const;
310 bool x_ShowBioProjectWarning(
const CBioseq& seq);
312 static bool x_HasCitSub(
const CPub_equiv& pub);
313 static bool x_HasCitSub(
const CPub& pub);
315 void ValidateCDSUTR();
319 void x_CheckGeneralIDs(
const CBioseq& seq);
321 static bool x_HasGap(
const CBioseq& seq);
@Auth_list.hpp User-defined methods of the data storage class.
@Imp_feat.hpp User-defined methods of the data storage class.
@RNA_ref.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Base class for all serializable objects.
CBioseq_Handle m_CurrentHandle
void ValidateMolTypeContext(const EGIBB_mol &gibb, EGIBB_mol &seq_biomol, const CBioseq &seq, const CSeqdesc &desc)
CValidError_descr m_DescrValidator
bool m_report_missing_chromosome
CValidError_annot m_AnnotValidator
bool m_splicing_not_expected
bool x_IsArtificial(const CBioseq &seq) const
const CCacheImpl::TFeatValue * m_AllFeatIt
vector< CMappedFeat > TMappedFeatVec
CValidError_feat m_FeatValidator
multimap< string, const CSeq_feat *, PNocase > TStrFeatMap
const CCacheImpl::TFeatValue * m_GeneIt
std::vector< CMappedFeat > TFeatValue
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
EDiagSev
Severity level for the posted diagnostics.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
#define NCBI_VALIDATOR_EXPORT
EMol
molecule class in living organism
EGIBB_mol
type of molecule represented
const CharType(& source)[N]
CValidator::CCache CCache
map< const CSeq_feat *, CRef< CMrnaMatchInfo > > TmRNAList
static bool IsWGSAccession(const string &acc, const CTextseq_id &id, TAllowSeqType allow_seq_type)