30 #ifndef _MISC_DISCREPANCY_DISCREPANCY_CORE_H_
31 #define _MISC_DISCREPANCY_DISCREPANCY_CORE_H_
68 const std::initializer_list<const char*>*
Aliases;
71 template<eTestNames _Name>
96 const string& msg,
const string&
xml,
const string& unit,
size_t count));
98 CDiscrepancyItem(string_view title,
const string& name,
const string& msg,
const string&
xml,
const string& unit,
size_t count)
208 eTestNames
GetName()
const override {
return m_props->Name; }
209 string_view
GetSName()
const override {
return m_props->sName; }
214 bool Empty()
const {
return m_Objs.empty(); }
232 template<eTestNames _Name>
236 template<eTestNames _Name>
300 typedef list<pair<CRef<CDiscrepancyObject>,
string>>
TGenesList;
317 void Summarize()
override;
330 eHasRearranged = 1 << 0,
331 eHasSatFeat = 1 << 1,
332 eHasNonSatFeat = 1 << 2
339 void AddTest(eTestNames name)
override;
341 void AddTest(string_view name)
override;
342 void Push(
const CSerialObject& root,
const string& fname)
override;
343 void Parse()
override { ParseAll(*m_RootNode); }
345 void ParseObject(
const CBioseq& root);
349 void ParseStream(
CObjectIStream& stream,
const string& fname,
bool skip,
const string& default_header =
kEmptyStr)
override;
350 void ParseStrings(
const string& fname)
override;
352 unsigned Summarize()
override;
355 void AutofixFile(vector<CDiscrepancyObject*>&fixes,
const string& default_header);
360 CParseNode* FindNode(
const CRefNode& obj);
368 const string&
CurrentText()
const {
return m_CurrentNode->m_Ref->m_Text; }
372 unsigned char ReadFlags()
const {
return m_CurrentNode->m_Flags; }
375 objects::CScope&
GetScope()
const {
return const_cast<objects::CScope&
>(*m_Scope); }
378 void SetSuspectRules(
const string& name,
bool read =
true)
override;
383 static bool IsUnculturedNonOrganelleName(
const string& taxname);
384 static bool HasLineage(
const CBioSource& biosrc,
const string& def_lineage,
const string&
type);
385 bool HasLineage(
const CBioSource* biosrc,
const string& lineage)
const;
387 bool IsEukaryotic(
const CBioSource* biosrc)
const;
388 bool IsBacterial(
const CBioSource* biosrc)
const;
396 const vector<CConstRef<CSeqdesc>>&
GetSetBiosources()
const {
return m_CurrentNode->m_SetBiosources; }
398 static string GetGenomeName(
unsigned n);
399 static string GetAminoacidName(
const CSeq_feat& feat);
400 bool IsBadLocusTagFormat(
const string& locus_tag)
const;
401 bool IsRefseq()
const;
405 string GetProdForFeature(
const CSeq_feat& feat);
406 void CollectFeature(
const CSeq_feat& feat);
407 void ClearFeatureList();
408 const vector<const CSeq_feat*>&
FeatAll() {
return m_FeatAll; }
409 const vector<const CSeq_feat*>&
FeatGenes() {
return m_FeatGenes; }
410 const vector<const CSeq_feat*>&
FeatPseudo() {
return m_FeatPseudo; }
411 const vector<const CSeq_feat*>&
FeatCDS() {
return m_FeatCDS; }
412 const vector<const CSeq_feat*>&
FeatMRNAs() {
return m_FeatMRNAs; }
413 const vector<const CSeq_feat*>&
FeatRRNAs() {
return m_FeatRRNAs; }
414 const vector<const CSeq_feat*>&
FeatTRNAs() {
return m_FeatTRNAs; }
415 const vector<const CSeq_feat*>&
Feat_RNAs() {
return m_Feat_RNAs; }
416 const vector<const CSeq_feat*>&
FeatExons() {
return m_FeatExons; }
417 const vector<const CSeq_feat*>&
FeatIntrons() {
return m_FeatIntrons; }
418 const vector<const CSeq_feat*>&
FeatMisc() {
return m_FeatMisc; }
430 CRefNode* ContainingSet(CRefNode& ref);
442 const CPub*
AuthPub(
const CAuth_list*
a)
const {
auto& apm = m_CurrentNode->m_AuthorPubMap;
auto it = apm.find(
a);
return it == apm.end() ?
nullptr : it->second; }
522 node = follow ? node->
m_Parent :
nullptr;
529 node = follow ? node->
m_Parent :
nullptr;
545 const vector<const CBioSource*>&
GetBiosources()
const {
return m_CurrentNode->m_Biosources; }
546 const vector<const CPubdesc*>&
GetPubdescs()
const {
return m_CurrentNode->m_Pubdescs; }
547 const vector<const CAuth_list*>&
GetAuthors()
const {
return m_CurrentNode->m_Authors; }
589 bool CanFixBioseq_set();
591 bool CanFixSeq_annot();
592 bool CanFixSeqdesc();
593 bool CanFixSubmit_block();
594 bool CanFixBioseq(
CRefNode& refnode);
595 bool CanFixBioseq_set(
CRefNode& refnode);
598 bool CanFixSubmit_block(
CRefNode& refnode);
599 void AutofixBioseq();
600 void AutofixBioseq_set();
601 void AutofixSeq_annot();
602 void AutofixSeq_descr();
603 void AutofixSubmit_block();
611 #define ADD_DISCREPANCY_TYPE(type) vector<CDiscrepancyCore*> m_All_##type;
654 case eSeqSet_NucProt:
655 case eSeqSet_GenProd:
665 static const char*
names[] =
694 string GetText()
const;
695 string GetBioseqLabel()
const;
703 eKnownPseudo = 1 << 1,
705 eKnownProduct = 1 << 3
711 m_Ref->m_Parent.Reset(parent->m_Ref);
717 new_node->m_Obj = &seqdesc;
718 m_Descriptors.push_back(new_node);
719 m_DescriptorMap[&seqdesc] = new_node;
722 m_Biosources.push_back(biosrc);
723 m_BiosourceMap[biosrc] = new_node;
725 if (seqdesc.
IsPub()) {
727 m_Pubdescs.push_back(pub);
728 m_PubdescMap[pub] = new_node;
731 if (it->IsSetAuthors()) {
733 m_Authors.push_back(auth);
734 m_AuthorMap[auth] = new_node;
735 m_AuthorPubMap[auth] = &*it;
745 new_node->m_Obj = &feat;
746 m_Features.push_back(new_node);
747 m_FeatureMap[&feat] = new_node;
750 m_Biosources.push_back(biosrc);
751 m_BiosourceMap[biosrc] = new_node;
755 m_Pubdescs.push_back(pub);
756 m_PubdescMap[pub] = new_node;
759 if (it->IsSetAuthors()) {
761 m_Authors.push_back(auth);
762 m_AuthorMap[auth] = new_node;
763 m_AuthorPubMap[auth] = &*it;
776 bool InGenProdSet()
const {
return m_Type == eSeqSet_GenProd ?
true : m_Parent ? m_Parent->InGenProdSet() :
false; }
807 string Path() {
return m_Parent ? m_Parent->
Path() +
" => " + TypeName(m_Type) +
": " + to_string(m_Index) : TypeName(m_Type) +
": " + to_string(m_Index); }
812 void ParseAll(CParseNode& node);
813 void Populate(CParseNode& node);
814 void PopulateBioseq(CParseNode& node);
815 void PopulateSeqSet(CParseNode& node);
816 void PopulateSubmit(CParseNode& node);
819 void PushNode(EObjType);
820 void PopNode() { m_CurrentNode.Reset(m_CurrentNode->m_Parent); }
827 string ProdForFeature(
const CParseNode& node);
837 : m_Ref(ref), m_Fix(fix), m_More(more) {}
845 string GetShort()
const override {
return m_Ref->GetBioseqLabel(); }
848 string GetText()
const override {
return m_Ref->GetText(); }
851 for (
auto ref = m_Ref; ref; ref = ref->m_Parent)
855 string GetFeatureType()
const override;
857 string GetLocation()
const override;
871 switch (m_Ref->m_Type) {
883 bool CanAutofix()
const override {
return m_Fix && !m_Fixed; }
884 bool IsFixed()
const override {
return m_Fixed; }
904 bool m_Fixed {
false };
925 #define DISCREPANCY_CASE_FULL(name, sname, type, group, descr, aliases_ptr) \
926 static constexpr CDiscrepancyCaseProps s_testcase_props_##name = { \
927 CDiscrepancyVisitorImpl<eTestNames::name>::Create, \
928 eTestTypes::type, eTestNames::name, sname, descr, group, aliases_ptr}; \
930 const CDiscrepancyCaseProps* \
931 CDiscrepancyCasePropsRef<eTestNames::name>::props = &s_testcase_props_##name; \
932 template<> void CDiscrepancyVisitorImpl<eTestNames::name>::Visit(NCBI_UNUSED CDiscrepancyContext& context)
934 #define DISCREPANCY_CASE(name, type, group, descr) \
935 DISCREPANCY_CASE_FULL(name, #name, type, group, descr, nullptr)
937 #define DISCREPANCY_CASE0(name, sname, type, group, descr) \
938 DISCREPANCY_CASE_FULL(name, sname, type, group, descr, nullptr)
940 #define DISCREPANCY_CASE1(name, type, group, descr, ...) \
941 static constexpr std::initializer_list<const char*> g_aliases_ ##name = { __VA_ARGS__ }; \
942 DISCREPANCY_CASE_FULL(name, #name, type, group, descr, &g_aliases_ ##name)
945 #define DISCREPANCY_SUMMARIZE(name) \
946 template<> void CDiscrepancyVisitorImpl<eTestNames::name>::Summarize()
949 #define DISCREPANCY_AUTOFIX(name) \
951 CRef<CAutofixReport> \
952 CDiscrepancyVisitorImpl<eTestNames::name>::Autofix(CDiscrepancyObject* obj, CDiscrepancyContext& context) const
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@Auth_list.hpp User-defined methods of the data storage class.
static const CDiscrepancyCaseProps * props
virtual TReportObjectList GetObjects() const =0
const vector< const CPubdesc * > & GetPubdescs() const
CRef< CBioseq > m_AF_Bioseq
vector< CDiscrepancyObject * > * m_Fixes
CSeqdesc_run GetAllSeqdesc()
CConstRef< CSeqdesc > m_Current_Seqdesc
CRef< CSeq_descr > m_AF_Seq_descr
const vector< const CSeq_feat * > & FeatTRNAs()
vector< const CSeq_feat * > m_Feat_RNAs
const vector< const CSeq_feat * > & FeatExons()
unsigned char ReadFlags() const
CRef< objects::CScope > m_Scope
const string & CurrentText() const
ct::const_bitset< static_cast< size_t >eTestTypes::max_num_types), eTestTypes > m_Enabled
CDiscrepancyContext(objects::CScope &scope)
TDiscrepancyCoreMap m_Tests
CRef< CBioseq_set > m_AF_Bioseq_set
const vector< CConstRef< CSeqdesc > > & GetSetBiosources() const
CRef< CSimpleTypeObject< string > > m_Current_Submit_block_StringObj
CRef< CParseNode > m_RootNode
CRef< feature::CFeatTree > m_FeatTree
CConstRef< CPub > m_Current_Pub
const vector< const CSeq_feat * > & FeatGenes()
CRef< CParseNode > m_CurrentNode
vector< const CSeq_feat * > m_FeatAll
CConstRef< CSubmit_block > m_Current_Submit_block
bool InGenProdSet() const
vector< const CSeq_feat * > m_FeatRRNAs
CConstRef< CSeqdesc > GetTitle() const
objects::CScope & GetScope() const
CConstRef< CSeq_feat > m_Current_Seq_feat
const vector< const CBioSource * > & GetBiosources() const
vector< const CSeq_feat * > m_FeatCDS
const vector< const CSeq_feat * > & FeatMRNAs()
const CPub * AuthPub(const CAuth_list *a) const
CConstRef< CBioseq > m_Current_Bioseq
const CSeqSummary & GetSeqSummary()
CBioseq_Handle m_Current_Bioseq_Handle
static bool InGenProdSet(const CParseNode *node)
const vector< const CAuth_list * > & GetAuthors() const
const vector< const CSeq_feat * > & Feat_RNAs()
CSeq_feat_run GetAllFeat()
const vector< const CSeq_feat * > & FeatPseudo()
const CBioseq & CurrentBioseq() const
vector< const CSeq_feat * > m_FeatGenes
static string TypeName(EObjType n)
CBioseq_set_Handle GetBioseq_setHandle(const CBioseq_set &bss)
CConstRef< CSeqdesc > GetBiosource() const
const vector< const CSeq_feat * > & FeatCDS()
vector< const CSeq_feat * > m_FeatMRNAs
map< const CRefNode *, CParseNode * > m_NodeMap
const vector< const CSeq_feat * > & FeatAll()
const vector< const CSeq_feat * > & FeatIntrons()
CRef< CSimpleTypeObject< string > > m_Current_Cit_sub_StringObj
CRef< CSeq_annot > m_AF_Seq_annot
static bool InNucProtSet(const CParseNode *node)
CConstRef< CSuspect_rule_set > m_OrganelleProductRules
CBioseq_EditHandle GetBioseqHandle(const CBioseq &bs)
vector< const CSeq_feat * > m_FeatTRNAs
const vector< const CSeq_feat * > & FeatRRNAs()
vector< const CSeq_feat * > m_FeatExons
CConstRef< CSeqdesc > GetMolinfo() const
CSeqdesc_vec GetSeqdesc()
const CBioseq_set & CurrentBioseq_set() const
const vector< const CSeq_feat * > & FeatMisc()
const CObject * GetMore(CReportObj &obj)
vector< CConstRef< CBioseq_set > > m_Bioseq_set_Stack
CRef< CSubmit_block > m_AF_Submit_block
CConstRef< CPub_equiv > m_Current_Pub_equiv
CConstRef< CSuspect_rule_set > m_ProductRules
void PropagateFlags(unsigned char f)
vector< const CSeq_feat * > m_FeatIntrons
vector< const CSeq_feat * > m_FeatPseudo
static bool IsSeqSet(EObjType n)
vector< const CSeq_feat * > m_FeatMisc
eTestTypes GetType() const override
virtual CRef< CAutofixReport > Autofix(CDiscrepancyObject *obj, CDiscrepancyContext &context) const =0
virtual void Visit(CDiscrepancyContext &context)=0
eTestNames GetName() const override
const CDiscrepancyCaseProps * m_props
CDiscrepancyCore(const CDiscrepancyCaseProps *props)
string_view GetSName() const override
TReportItemList m_ReportItems
virtual void Summarize()=0
string_view GetDescription() const override
const TReportItemList & GetReport() const override
CDiscrepancyItem(string_view title, const string &name, const string &msg, const string &xml, const string &unit, size_t count)
string_view GetTitle() const override
bool IsExtended() const override
size_t GetCount() const override
TReportObjectList & SetDetails()
TReportObjectList GetDetails() const override
bool IsInfo() const override
bool IsReal() const override
void SetAutofix(bool value)
bool IsFatal() const override
string GetUnit() const override
TReportItemList GetSubitems() const override
CDiscrepancyItem(const string &msg)
string GetStr() const override
string GetXml() const override
string GetMsg() const override
bool CanAutofix() const override
ESeverity GetSeverity() const override
bool IsSummary() const override
friend bool operator<(const CReportObjPtr &one, const CReportObjPtr &another)
CRef< CDiscrepancyContext::CRefNode > m_Ref
string GetBioseqLabel() const override
string GetPath() const override
void SetMoreInfo(CObject *data) override
string GetShort() const override
bool IsFixed() const override
bool CanAutofix() const override
CConstRef< CObject > m_More
CConstRef< CObject > GetMoreInfo()
string GetText() const override
CRef< CDiscrepancyContext::CRefNode > m_Fix
CRef< CDiscrepancyCore > m_Case
EType GetType() const override
CDiscrepancyObject(CDiscrepancyContext::CRefNode *ref, CDiscrepancyContext::CRefNode *fix=nullptr, const CObject *more=nullptr)
TDiscrepancyCoreMap m_Tests
void Visit(CDiscrepancyContext &context) override
CRef< CAutofixReport > Autofix(CDiscrepancyObject *, CDiscrepancyContext &) const override
void Summarize() override
CDiscrepancyPrivateData< _Name > m_private
static CRef< CDiscrepancyCore > Create()
@Pubdesc.hpp User-defined methods of the data storage class.
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
CReportNode(const string &name)
bool Exist(CReportObj &obj)
CReportNode & Merge(CReportNode &other)
CReportNode & Add(CReportObj &obj, bool unique=true)
CReportNode & NoRec(bool b=true)
map< string, CRef< CReportNode > > TNodeMap
CReportNode & Summ(bool b=true)
TReportObjectList & GetObjects()
CReportNode & Ext(bool b=true)
CReportNode & operator[](const string &name)
CReportNode & Severity(CReportItem::ESeverity s)
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
CReportItem::ESeverity m_Severity
static bool Exist(TReportObjectSet &hash, CReportObj &obj)
bool Exist(const string &name) const
CReportNode & Add(TReportObjectList &objs, bool unique=true)
void Copy(CRef< CReportNode > other)
static CRef< CReportObj > Create(CRef< CDiscrepancyCore > disc_core, const CReportObj &obj, bool autofix)
namespace ncbi::objects::
Base class for all serializable objects.
const_iterator end() const
const_iterator find(const key_type &key) const
static const char location[]
static const struct name_t names[]
vector< CRef< CReportItem > > TReportItemList
vector< CRef< CReportObj > > TReportObjectList
CConstRef< objects::CSuspect_rule_set > GetProductRules(const string &name="")
CConstRef< objects::CSuspect_rule_set > GetOrganelleProductRules(const string &name="")
void UnitTest_FLATFILE_FIND()
Checking that FLATFILE_FIND.inc is in sync with kSpellFixes If the array is changed,...
map< eTestNames, CRef< CDiscrepancyCore > > TDiscrepancyCoreMap
list< pair< CRef< CDiscrepancyObject >, string > > TGenesList
CDiscrepancyContext - manage and run the list of tests.
set< CReportObjPtr > TReportObjectSet
map< string, TGenesList > TGeneLocusMap
#define ADD_DISCREPANCY_TYPE(type)
std::ofstream out("events_result.xml")
main entry point for tests
#define NCBI_DEPRECATED_CTOR(decl)
Macro used to mark a constructor as deprecated.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
CBioseq_set_Handle GetBioseq_setHandle(const CBioseq_set &seqset, EMissing action=eMissing_Default)
CBioseq_EditHandle GetBioseqEditHandle(const CBioseq &bioseq)
Get edit handle for the specified object Throw an exception if object is not found,...
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::streampos CNcbiStreampos
Portable alias for streampos.
#define NCBI_DISCREPANCY_EXPORT
const Tdata & Get(void) const
Get the member data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
const TData & GetData(void) const
Get the Data member data.
bool IsPub(void) const
Check if variant Pub is selected.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
bool IsPub(void) const
Check if variant Pub is selected.
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
const TPub & GetPub(void) const
Get the Pub member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
static string GetProductName(const CProt_ref &prot)
double f(double x_, const double &y_)
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
string GetProduct(const CProt_ref &prot_ref)
const std::initializer_list< const char * > * Aliases
CConstRef< CSeqdesc > m_Molinfo
CParseNode & AddDescriptor(const CSeqdesc &seqdesc)
CConstRef< CSeqdesc > m_Title
vector< CRef< CParseNode > > m_Children
CParseNode(EObjType type, unsigned index, CParseNode *parent=nullptr)
map< const CBioSource *, CParseNode * > m_BiosourceMap
vector< const CBioSource * > m_Biosources
vector< CRef< CParseNode > > m_Features
void SetType(EObjType type)
bool InGenProdSet() const
vector< const CPubdesc * > m_Pubdescs
vector< CRef< CParseNode > > m_Descriptors
CConstRef< CSerialObject > m_Obj
CParseNode & AddFeature(const CSeq_feat &feat)
vector< CConstRef< CSeqdesc > > m_SetBiosources
map< const CSeq_feat *, CParseNode * > m_FeatureMap
CConstRef< CSeqdesc > m_Biosource
const CParseNode * m_Gene
map< const CSeqdesc *, CParseNode * > m_DescriptorMap
map< const CAuth_list *, CParseNode * > m_AuthorMap
shared_ptr< CSeqSummary > m_BioseqSummary
map< const CPubdesc *, CParseNode * > m_PubdescMap
CConstRef< CSeqdesc > GetBiosource() const
CConstRef< CSeqdesc > GetMolinfo() const
CConstRef< CSeqdesc > GetTitle() const
vector< const CAuth_list * > m_Authors
map< const CAuth_list *, const CPub * > m_AuthorPubMap
CRef< CRefNode > m_Parent
CRefNode(EObjType type, unsigned index)
const CSeq_feat & operator*()
vector< CRef< CParseNode > >::iterator it
bool operator!=(const iterator &x) const
bool operator==(const iterator &x) const
CSeq_feat_run(CParseNode &n)
bool operator==(const iterator &x) const
bool operator!=(const iterator &x) const
const CSeq_feat & operator*()
vector< CRef< CParseNode > >::iterator it
iterator(vector< CRef< CParseNode >>::iterator x)
CSeq_feat_vec(CParseNode &n)
bool operator==(const iterator &x) const
const CSeqdesc & operator*()
bool operator!=(const iterator &x) const
vector< CRef< CParseNode > >::iterator it
CSeqdesc_run(CParseNode &n)
iterator(vector< CRef< CParseNode >>::iterator x)
bool operator==(const iterator &x) const
vector< CRef< CParseNode > >::iterator it
bool operator!=(const iterator &x) const
const CSeqdesc & operator*()
CSeqdesc_vec(CParseNode &n)
friend bool operator<(const CReportObjPtr &one, const CReportObjPtr &another)
CReportObjPtr(const CReportObj *p)
vector< pair< size_t, size_t > > NRuns
size_t _CBposition[WINDOW_SIZE]
static const size_t WINDOW_SIZE
size_t _CBscore[WINDOW_SIZE]
CSimpleTypeObject(const T &v)
int test(int srctype, const void *srcdata, int srclen, int dsttype, int dstlen)
static int RunTests(void)
Code to iterate through all tests to run.
string GetTextObjectDescription(const CSeq_feat &sf, CScope &scope)
void Merge(wxMenu &menu_1, const wxMenu &menu_2)
merges all items form menu_2 into menu_1, preserving the structure if possible