NCBI C++ ToolKit
|
#include <ncbi_pch.hpp>
#include <corelib/ncbistd.hpp>
#include <serial/serialbase.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Seq_annot.hpp>
#include <objects/pub/Pub.hpp>
#include <objects/pub/Pub_equiv.hpp>
#include <objects/seq/Pubdesc.hpp>
#include <objects/biblio/Author.hpp>
#include <objects/biblio/Auth_list.hpp>
#include <objects/general/Person_id.hpp>
#include <objects/general/Name_std.hpp>
#include <objects/misc/sequence_macros.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seqset/seqset_macros.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/Seq_feat.hpp>
#include <objects/seqfeat/SeqFeatXref.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/general/User_object.hpp>
#include <objects/submit/Seq_submit.hpp>
#include <objects/taxon3/taxon3.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/util/sequence.hpp>
#include <objmgr/util/feature.hpp>
#include <objmgr/util/autodef.hpp>
#include <objmgr/seq_annot_ci.hpp>
#include <objmgr/seqdesc_ci.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/seq_vector_ci.hpp>
#include <objtools/edit/cds_fix.hpp>
#include <objtools/cleanup/cleanup.hpp>
#include "cleanup_utils.hpp"
#include <objtools/cleanup/cleanup_message.hpp>
#include <util/strsearch.hpp>
#include "newcleanupp.hpp"
#include <objtools/logging/listener.hpp>
#include <objtools/cleanup/influenza_set.hpp>
Go to the source code of this file.
Go to the SVN repository for this file.
Macros | |
#define | CLEANUP_SETUP |
Typedefs | |
typedef pair< size_t, bool > | TRNALength |
typedef map< string, TRNALength > | TRNALengthMap |
typedef SStaticPair< CSeqdesc::E_Choice, int > | TSeqdescOrderElem |
typedef CStaticPairArrayMap< CSeqdesc::E_Choice, int > | TSeqdescOrderMap |
Enumerations | |
enum | EChangeType { eChange_UNKNOWN } |
Variables | |
static constexpr std::array< string_view, CCleanupChangeCore::eNumberofChangeTypes > | sm_ChangeDesc |
static const TRNALengthMap | kTrnaLengthMap |
const string | kLowQualitySequence = "low-quality sequence region" |
static const TSeqdescOrderElem | sc_seqdesc_order_map [] |
const unsigned int | methionine_encoded = 'M' - 'A' |
#define CLEANUP_SETUP |
Definition at line 127 of file cleanup.cpp.
typedef pair<size_t, bool> TRNALength |
Definition at line 2603 of file cleanup.cpp.
typedef map<string, TRNALength > TRNALengthMap |
Definition at line 2604 of file cleanup.cpp.
typedef SStaticPair<CSeqdesc::E_Choice, int> TSeqdescOrderElem |
Definition at line 2942 of file cleanup.cpp.
Definition at line 2973 of file cleanup.cpp.
enum EChangeType |
Enumerator | |
---|---|
eChange_UNKNOWN |
Definition at line 82 of file cleanup.cpp.
void AddIRDMiscFeature | ( | CBioseq_Handle | bh, |
const CDbtag & | tag | ||
) |
Definition at line 4619 of file cleanup.cpp.
References CSeq_annot_EditHandle::AddFeat(), CSerialObject::Assign(), CBioseq_EditHandle::AttachAnnot(), f, ftable, CBioseq_Handle::GetBioseqLength(), CBioseq_Handle::GetEditHandle(), CBioseq_Handle::GetSeqId(), and tag.
Referenced by CCleanup::MakeIRDFeatsFromSourceXrefs().
bool ConvertProteinToImp | ( | CSeq_feat_Handle | fh | ) |
Definition at line 481 of file cleanup.cpp.
References CSerialObject::Assign(), CSeq_feat_Handle::GetData(), CProt_ref_Base::GetName(), CProt_ref_Base::GetProcessed(), CSeqFeatData_Base::GetProt(), CSeq_feat_Handle::GetSeq_feat(), NStr::IsBlank(), CSeqFeatData_Base::IsProt(), CProt_ref_Base::IsSetName(), CProt_ref_Base::IsSetProcessed(), ncbi::grid::netcache::search::fields::key, CSeq_feat_EditHandle::Replace(), s_KeyFromProcessed(), CSeq_feat_Base::SetData(), CGb_qual_Base::SetQual(), CSeq_feat_Base::SetQual(), and CGb_qual_Base::SetVal().
Referenced by CCleanup::MoveFeatToProtein().
DEFINE_STATIC_ARRAY_MAP | ( | TSeqdescOrderMap | , |
sc_SeqdescOrderMap | , | ||
sc_seqdesc_order_map | |||
) |
void GetSourceDescriptors | ( | const CSeq_entry & | se, |
vector< const CSeqdesc * > & | src_descs | ||
) |
Definition at line 1951 of file cleanup.cpp.
References CSeq_descr_Base::Get(), CSeq_entry::GetDescr(), CBioseq_set_Base::GetSeq_set(), CSeq_entry_Base::GetSet(), CSeq_entry_Base::IsSet(), CSeq_entry::IsSetDescr(), CBioseq_set_Base::IsSetSeq_set(), and ITERATE.
Referenced by CCleanup::CleanupCollectionDates(), and CCleanup::TaxonomyLookup().
Definition at line 3712 of file cleanup.cpp.
References NStr::Equal(), COrg_ref_Base::GetMod(), COrg_ref_Base::IsSetMod(), ITERATE, and mod().
Referenced by CCleanup::x_MergeDupOrgRefs().
Definition at line 2279 of file cleanup.cpp.
References IsCrossKingdom().
Definition at line 2251 of file cleanup.cpp.
References NStr::EqualNocase(), CPartialOrgName_Base::Get(), CTaxElement_Base::GetFixed_level(), CTaxElement_Base::GetLevel(), COrgName_Base::GetName(), CTaxElement_Base::GetName(), COrg_ref_Base::GetOrgname(), COrgName_Base::C_Name::GetPartial(), NStr::IsBlank(), COrgName_Base::C_Name::IsPartial(), CPartialOrgName_Base::IsSet(), CTaxElement_Base::IsSetFixed_level(), CTaxElement_Base::IsSetLevel(), COrgName_Base::IsSetName(), CTaxElement_Base::IsSetName(), COrg_ref_Base::IsSetOrgname(), ITERATE, and kEmptyStr.
Referenced by CCleanup::AddPartialToProteinTitle(), IsCrossKingdom(), and s_TitleEndsInOrganism().
Definition at line 3414 of file cleanup.cpp.
References NStr::Equal(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetImp(), CImp_feat_Base::GetKey(), CSeqFeatData_Base::IsImp(), and CImp_feat_Base::IsSetKey().
Referenced by CCleanup::RescueSiteRefPubs().
|
static |
Definition at line 118 of file cleanup.cpp.
References CCleanup::eClean_NoReporting, and CRef< C, Locker >::Reset().
Referenced by CCleanup::BasicCleanup(), CCleanup::BioSrcFromFeat(), CCleanup::ExtendedCleanup(), CCleanup::MergeDupBioSources(), CCleanup::MoveFeatToProtein(), and CCleanup::RescueSiteRefPubs().
void RescueProtProductQual | ( | CSeq_feat & | feat | ) |
Definition at line 522 of file cleanup.cpp.
References NStr::Equal(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetProt(), NStr::IsBlank(), CSeqFeatData_Base::IsProt(), CSeq_feat_Base::IsSetData(), CProt_ref_Base::IsSetName(), CSeq_feat_Base::IsSetQual(), CSeq_feat_Base::ResetQual(), CSeq_feat_Base::SetData(), and CSeq_feat_Base::SetQual().
Referenced by CCleanup::MoveFeatToProtein().
Definition at line 2622 of file cleanup.cpp.
References CSeqFeatData::eSubtype_rRNA, f, NStr::FindNoCase(), GetLength(), CGb_qual_Base::GetQual(), CRNA_ref::GetRnaProductName(), CGb_qual_Base::GetVal(), CGb_qual_Base::IsSetQual(), ITERATE, kTrnaLengthMap, and len.
Referenced by CCleanup::WGSCleanup().
Definition at line 3304 of file cleanup.cpp.
References CSerialObject::Equals(), CPub_equiv_Base::Get(), CPubdesc_Base::GetPub(), CPubdesc_Base::IsSetPub(), and ITERATE.
Referenced by CCleanup::PubAlreadyInSet().
void s_GetAuthorsString | ( | string * | out_authors, |
const CAuth_list & | auth_list | ||
) |
Definition at line 3077 of file cleanup.cpp.
References BEGIN_COMMA_END, copy(), CAuth_list_Base::C_Names::GetMl(), CAuth_list_Base::GetNames(), CAuth_list_Base::C_Names::GetStd(), CAuth_list_Base::C_Names::GetStr(), CAuth_list_Base::C_Names::IsMl(), CAuth_list_Base::IsSetNames(), CAuth_list_Base::C_Names::IsStd(), CAuth_list_Base::C_Names::IsStr(), ITERATE, NStr::Join(), and label.
Referenced by CCleanup::GetPubdescLabels(), and s_GetAuthorsString().
Definition at line 3124 of file cleanup.cpp.
References FOR_EACH_PUB_ON_PUBDESC, and s_GetAuthorsString().
Definition at line 560 of file cleanup.cpp.
References CSeqFeatData_Base::e_Cdregion, eExtreme_Biological, eLocationInFrame_InFrame, eOverlap_Contained, CSeqFeatData::eSubtype_cdregion, GetOverlappingFeatures(), CScope::GetSeq_featHandle(), IsLocationInFrame(), and CSeq_loc::IsPartialStart().
Referenced by CCleanup::MoveFeatToProtein().
Definition at line 549 of file cleanup.cpp.
References ConstRef(), CSeqFeatData_Base::e_Cdregion, and CMappedFeat::GetOriginalFeature().
Referenced by CCleanup::MoveFeatToProtein().
Definition at line 1477 of file cleanup.cpp.
References NStr::EqualNocase(), CCleanup::GetProteinName(), CSeq_feat_Base::GetQual(), CSeq_feat_Base::GetXref(), CSeq_feat_Base::IsSetQual(), CSeq_feat_Base::IsSetXref(), ITERATE, and kEmptyStr.
Referenced by CCleanup::GetProteinName().
bool s_IsLocationEndAtOtherLocationInternalEndpoint | ( | const CSeq_loc & | loc, |
const CSeq_loc & | other_loc | ||
) |
Definition at line 1268 of file cleanup.cpp.
References eExtreme_Biological, eNa_strand_minus, CRange_Base::GetFrom(), CSeq_loc_CI::GetRange(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeq_loc_CI::GetStrand(), CRange_Base::GetTo(), CSeq_loc::IsSetStrand(), and CSeq_loc_CI::IsSetStrand().
Referenced by CCleanup::ExtendToStopIfShortAndNotPartial().
bool s_IsPreprotein | ( | CSeq_feat_Handle | fh | ) |
Definition at line 504 of file cleanup.cpp.
References CProt_ref_Base::eProcessed_preprotein, CSeq_feat_Handle::GetData(), CSeqFeatData_Base::GetImp(), CImp_feat_Base::GetKey(), CProt_ref_Base::GetProcessed(), CSeqFeatData_Base::GetProt(), CSeqFeatData_Base::IsImp(), CSeqFeatData_Base::IsProt(), CSeq_feat_Handle::IsSetData(), CImp_feat_Base::IsSetKey(), CProt_ref_Base::IsSetProcessed(), and s_ProcessedFromKey().
Referenced by CCleanup::MoveFeatToProtein().
string s_KeyFromProcessed | ( | CProt_ref::EProcessed | processed | ) |
Definition at line 455 of file cleanup.cpp.
References CProt_ref_Base::eProcessed_mature, CProt_ref_Base::eProcessed_not_set, CProt_ref_Base::eProcessed_preprotein, CProt_ref_Base::eProcessed_propeptide, CProt_ref_Base::eProcessed_signal_peptide, CProt_ref_Base::eProcessed_transit_peptide, and kEmptyStr.
Referenced by ConvertProteinToImp().
|
static |
Definition at line 3583 of file cleanup.cpp.
References NStr::Equal(), CSubSource_Base::GetName(), and CSubSource_Base::IsSetName().
Referenced by s_SubSourceListUniqued().
CProt_ref::EProcessed s_ProcessedFromKey | ( | const string & | key | ) |
Definition at line 438 of file cleanup.cpp.
References CProt_ref_Base::eProcessed_mature, CProt_ref_Base::eProcessed_not_set, CProt_ref_Base::eProcessed_preprotein, CProt_ref_Base::eProcessed_propeptide, CProt_ref_Base::eProcessed_signal_peptide, CProt_ref_Base::eProcessed_transit_peptide, NStr::Equal(), and ncbi::grid::netcache::search::fields::key.
Referenced by CCleanup::MoveFeatToProtein(), and s_IsPreprotein().
Definition at line 2336 of file cleanup.cpp.
References Asn2gnbkCompressSpaces(), data, NStr::eCase, EDIT_EACH_FEATURE_ON_ANNOT, EDIT_EACH_NAME_ON_PROTREF, EDIT_EACH_SEQANNOT_ON_BIOSEQ, NStr::eReverseSearch, NStr::Find(), CSeq_annot::IsFtable(), len, NPOS, CSeq_feat_Base::SetData(), NStr::StartsWith(), and str().
Referenced by CCleanup::AddPartialToProteinTitle().
|
static |
Definition at line 3571 of file cleanup.cpp.
References CSubSource_Base::GetSubtype(), and CSubSource_Base::IsSetSubtype().
Referenced by s_SubSourceListUniqued().
|
static |
Definition at line 2990 of file cleanup.cpp.
References s_SeqDescToOrdering(), and CSeqdesc_Base::Which().
Referenced by CCleanup::NormalizeDescriptorOrder().
|
static |
Definition at line 2977 of file cleanup.cpp.
Referenced by s_SeqDescLessThan().
|
static |
Definition at line 3548 of file cleanup.cpp.
References NStr::CompareNocase(), FIELD_IS_SET, and GET_FIELD.
Referenced by s_SubSourceListUniqued().
bool s_SubSourceListUniqued | ( | CBioSource & | biosrc | ) |
Definition at line 3601 of file cleanup.cpp.
References CBioSource_Base::GetSubtype(), CBioSource_Base::IsSetSubtype(), s_NameCloseEnoughC(), s_SameSubtypeC(), s_SubsourceCompareC(), CBioSource_Base::SetSubtype(), SORT_SUBSOURCE_ON_BIOSOURCE, and SUBSOURCE_ON_BIOSOURCE_IS_SORTED.
Referenced by CCleanup::MergeDupBioSources().
|
static |
Definition at line 2286 of file cleanup.cpp.
References COrgMod_Base::eSubtype_old_name, NStr::Find(), COrgName_Base::GetMod(), COrgName_Base::GetName(), COrg_ref_Base::GetOrgname(), COrg_ref_Base::GetTaxname(), COrgName_Base::C_Name::IsBinomial(), NStr::IsBlank(), IsCrossKingdom(), COrgName_Base::IsSetName(), COrg_ref::IsSetOrgMod(), COrg_ref_Base::IsSetOrgname(), COrg_ref_Base::IsSetTaxname(), ITERATE, NPOS, and s_TitleEndsInOrganism().
|
static |
Definition at line 2231 of file cleanup.cpp.
References COrgName_Base::C_Name::GetBinomial(), CBinomialOrgName_Base::GetGenus(), CBinomialOrgName_Base::GetSpecies(), COrgName_Base::C_Name::IsBinomial(), NStr::IsBlank(), CBinomialOrgName_Base::IsSetGenus(), CBinomialOrgName_Base::IsSetSpecies(), NPOS, and s_TitleEndsInOrganism().
|
static |
Definition at line 2180 of file cleanup.cpp.
References CBioSource_Base::eGenome_chloroplast, CBioSource_Base::eGenome_chromatophore, CBioSource_Base::eGenome_chromosome, CBioSource_Base::eGenome_extrachrom, CBioSource_Base::eGenome_insertion_seq, CBioSource_Base::eGenome_proviral, CBioSource_Base::eGenome_transposon, CBioSource_Base::eGenome_virion, NStr::EndsWith(), NStr::eNocase, NStr::eReverseSearch, NStr::Find(), CBioSource::GetOrganelleByGenome(), and NPOS.
Referenced by CCleanup::AddPartialToProteinTitle(), and s_TitleEndsInOrganism().
Definition at line 2886 of file cleanup.cpp.
Referenced by CCleanup::x_AddLowQualityException().
|
static |
Definition at line 2606 of file cleanup.cpp.
Referenced by s_CleanupIsShortrRNA().
Definition at line 4686 of file cleanup.cpp.
Referenced by CCleanup::IsMethionine().
|
static |
Definition at line 2943 of file cleanup.cpp.
|
staticconstexpr |
Definition at line 312 of file cleanup.cpp.
Referenced by CCleanupChangeCore::GetDescription().