NCBI C++ ToolKit
Macros | Typedefs | Enumerations | Functions | Variables
cleanup.cpp File Reference
#include <ncbi_pch.hpp>
#include <corelib/ncbistd.hpp>
#include <serial/serialbase.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Seq_annot.hpp>
#include <objects/pub/Pub.hpp>
#include <objects/pub/Pub_equiv.hpp>
#include <objects/seq/Pubdesc.hpp>
#include <objects/biblio/Author.hpp>
#include <objects/biblio/Auth_list.hpp>
#include <objects/general/Person_id.hpp>
#include <objects/general/Name_std.hpp>
#include <objects/misc/sequence_macros.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seqset/seqset_macros.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/Seq_feat.hpp>
#include <objects/seqfeat/SeqFeatXref.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/general/User_object.hpp>
#include <objects/submit/Seq_submit.hpp>
#include <objects/taxon3/taxon3.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/util/sequence.hpp>
#include <objmgr/util/feature.hpp>
#include <objmgr/util/autodef.hpp>
#include <objmgr/seq_annot_ci.hpp>
#include <objmgr/seqdesc_ci.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/seq_vector_ci.hpp>
#include <objtools/edit/cds_fix.hpp>
#include <objtools/cleanup/cleanup.hpp>
#include "cleanup_utils.hpp"
#include <objtools/cleanup/cleanup_message.hpp>
#include <util/strsearch.hpp>
#include "newcleanupp.hpp"
#include <objtools/logging/listener.hpp>
#include <objtools/cleanup/influenza_set.hpp>
+ Include dependency graph for cleanup.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.




typedef pair< size_t, boolTRNALength
typedef map< string, TRNALengthTRNALengthMap
typedef SStaticPair< CSeqdesc::E_Choice, intTSeqdescOrderElem
typedef CStaticPairArrayMap< CSeqdesc::E_Choice, intTSeqdescOrderMap


enum  EChangeType { eChange_UNKNOWN }


static CRef< CCleanupChangemakeCleanupChange (Uint4 options)
CProt_ref::EProcessed s_ProcessedFromKey (const string &key)
string s_KeyFromProcessed (CProt_ref::EProcessed processed)
bool ConvertProteinToImp (CSeq_feat_Handle fh)
bool s_IsPreprotein (CSeq_feat_Handle fh)
void RescueProtProductQual (CSeq_feat &feat)
static CConstRef< CSeq_feats_GetCdsByProduct (CScope &scope, const CSeq_loc &product)
static CConstRef< CSeq_feats_GetCdsByLocation (CScope &scope, const CSeq_loc &feat_loc)
bool s_IsLocationEndAtOtherLocationInternalEndpoint (const CSeq_loc &loc, const CSeq_loc &other_loc)
static const strings_GetProteinNameFromXrefOrQual (const CSeq_feat &cds)
void GetSourceDescriptors (const CSeq_entry &se, vector< const CSeqdesc * > &src_descs)
static SIZE_TYPE s_TitleEndsInOrganism (const string &sTitle, const string &sOrganism, SIZE_TYPE &OrganellePos)
static SIZE_TYPE s_TitleEndsInOrganism (const string &sTitle, const COrgName::TName &orgname, SIZE_TYPE &organelle_pos)
bool IsCrossKingdom (const COrg_ref &org, string &first_kingdom, string &second_kingdom)
bool IsCrossKingdom (const COrg_ref &org)
static SIZE_TYPE s_TitleEndsInOrganism (const string &sTitle, const COrg_ref &org, SIZE_TYPE &organelle_pos)
static void s_RemoveOrgFromEndOfProtein (CBioseq &seq, string taxname)
static bool s_CleanupIsShortrRNA (const CSeq_feat &f, CScope *scope)
 DEFINE_STATIC_ARRAY_MAP (TSeqdescOrderMap, sc_SeqdescOrderMap, sc_seqdesc_order_map)
static int s_SeqDescToOrdering (CSeqdesc::E_Choice chs)
static bool s_SeqDescLessThan (const CRef< CSeqdesc > &desc1, const CRef< CSeqdesc > &desc2)
void s_GetAuthorsString (string *out_authors, const CAuth_list &auth_list)
void s_GetAuthorsString (string *out_authors_string, const CPubdesc &pd)
bool s_FirstPubMatchesSecond (const CPubdesc &pd1, const CPubdesc &pd2)
bool IsSiteRef (const CSeq_feat &sf)
static bool s_SubsourceCompareC (const CRef< CSubSource > &st1, const CRef< CSubSource > &st2)
static bool s_SameSubtypeC (const CSubSource &s1, const CSubSource &s2)
static bool s_NameCloseEnoughC (const CSubSource &s1, const CSubSource &s2)
bool s_SubSourceListUniqued (CBioSource &biosrc)
bool HasMod (const COrg_ref &org, const string &mod)
void AddIRDMiscFeature (CBioseq_Handle bh, const CDbtag &tag)


static constexpr std::array< string_view, CCleanupChangeCore::eNumberofChangeTypessm_ChangeDesc
static const TRNALengthMap kTrnaLengthMap
const string kLowQualitySequence = "low-quality sequence region"
static const TSeqdescOrderElem sc_seqdesc_order_map []
const unsigned int methionine_encoded = 'M' - 'A'

Macro Definition Documentation


auto changes = makeCleanupChange(options); \
CNewCleanup_imp clean_i(changes, options); \
static CRef< CScope > m_Scope
static CRef< CCleanupChange > makeCleanupChange(Uint4 options)
Definition: cleanup.cpp:118

Definition at line 127 of file cleanup.cpp.

Typedef Documentation

◆ TRNALength

typedef pair<size_t, bool> TRNALength

Definition at line 2603 of file cleanup.cpp.

◆ TRNALengthMap

Definition at line 2604 of file cleanup.cpp.

◆ TSeqdescOrderElem

Definition at line 2942 of file cleanup.cpp.

◆ TSeqdescOrderMap

Definition at line 2973 of file cleanup.cpp.

Enumeration Type Documentation

◆ EChangeType


Definition at line 82 of file cleanup.cpp.

Function Documentation

◆ AddIRDMiscFeature()

void AddIRDMiscFeature ( CBioseq_Handle  bh,
const CDbtag tag 

◆ ConvertProteinToImp()

bool ConvertProteinToImp ( CSeq_feat_Handle  fh)


sc_SeqdescOrderMap  ,

◆ GetSourceDescriptors()

void GetSourceDescriptors ( const CSeq_entry se,
vector< const CSeqdesc * > &  src_descs 

◆ HasMod()

bool HasMod ( const COrg_ref org,
const string mod 

◆ IsCrossKingdom() [1/2]

bool IsCrossKingdom ( const COrg_ref org)

Definition at line 2279 of file cleanup.cpp.

References IsCrossKingdom().

◆ IsCrossKingdom() [2/2]

bool IsCrossKingdom ( const COrg_ref org,
string first_kingdom,
string second_kingdom 

◆ IsSiteRef()

bool IsSiteRef ( const CSeq_feat sf)

◆ makeCleanupChange()

static CRef<CCleanupChange> makeCleanupChange ( Uint4  options)

◆ RescueProtProductQual()

void RescueProtProductQual ( CSeq_feat feat)

◆ s_CleanupIsShortrRNA()

static bool s_CleanupIsShortrRNA ( const CSeq_feat f,
CScope scope 

◆ s_FirstPubMatchesSecond()

bool s_FirstPubMatchesSecond ( const CPubdesc pd1,
const CPubdesc pd2 

◆ s_GetAuthorsString() [1/2]

void s_GetAuthorsString ( string out_authors,
const CAuth_list auth_list 

◆ s_GetAuthorsString() [2/2]

void s_GetAuthorsString ( string out_authors_string,
const CPubdesc pd 

Definition at line 3124 of file cleanup.cpp.

References FOR_EACH_PUB_ON_PUBDESC, and s_GetAuthorsString().

◆ s_GetCdsByLocation()

static CConstRef<CSeq_feat> s_GetCdsByLocation ( CScope scope,
const CSeq_loc feat_loc 

◆ s_GetCdsByProduct()

static CConstRef<CSeq_feat> s_GetCdsByProduct ( CScope scope,
const CSeq_loc product 

Definition at line 549 of file cleanup.cpp.

References ConstRef(), CSeqFeatData_Base::e_Cdregion, and fi.

Referenced by CCleanup::MoveFeatToProtein().

◆ s_GetProteinNameFromXrefOrQual()

static const string& s_GetProteinNameFromXrefOrQual ( const CSeq_feat cds)

◆ s_IsLocationEndAtOtherLocationInternalEndpoint()

bool s_IsLocationEndAtOtherLocationInternalEndpoint ( const CSeq_loc loc,
const CSeq_loc other_loc 

◆ s_IsPreprotein()

bool s_IsPreprotein ( CSeq_feat_Handle  fh)

◆ s_KeyFromProcessed()

string s_KeyFromProcessed ( CProt_ref::EProcessed  processed)

◆ s_NameCloseEnoughC()

static bool s_NameCloseEnoughC ( const CSubSource s1,
const CSubSource s2 

◆ s_ProcessedFromKey()

CProt_ref::EProcessed s_ProcessedFromKey ( const string key)

◆ s_RemoveOrgFromEndOfProtein()

static void s_RemoveOrgFromEndOfProtein ( CBioseq seq,
string  taxname 

◆ s_SameSubtypeC()

static bool s_SameSubtypeC ( const CSubSource s1,
const CSubSource s2 

Definition at line 3571 of file cleanup.cpp.

References CSubSource_Base::GetSubtype(), and CSubSource_Base::IsSetSubtype().

Referenced by s_SubSourceListUniqued().

◆ s_SeqDescLessThan()

static bool s_SeqDescLessThan ( const CRef< CSeqdesc > &  desc1,
const CRef< CSeqdesc > &  desc2 

Definition at line 2990 of file cleanup.cpp.

References s_SeqDescToOrdering(), and CSeqdesc_Base::Which().

Referenced by CCleanup::NormalizeDescriptorOrder().

◆ s_SeqDescToOrdering()

static int s_SeqDescToOrdering ( CSeqdesc::E_Choice  chs)

Definition at line 2977 of file cleanup.cpp.

Referenced by s_SeqDescLessThan().

◆ s_SubsourceCompareC()

static bool s_SubsourceCompareC ( const CRef< CSubSource > &  st1,
const CRef< CSubSource > &  st2 

Definition at line 3548 of file cleanup.cpp.

References NStr::CompareNocase(), FIELD_IS_SET, and GET_FIELD.

Referenced by s_SubSourceListUniqued().

◆ s_SubSourceListUniqued()

bool s_SubSourceListUniqued ( CBioSource biosrc)

◆ s_TitleEndsInOrganism() [1/3]

static SIZE_TYPE s_TitleEndsInOrganism ( const string sTitle,
const COrg_ref org,
SIZE_TYPE organelle_pos 

◆ s_TitleEndsInOrganism() [2/3]

static SIZE_TYPE s_TitleEndsInOrganism ( const string sTitle,
const COrgName::TName orgname,
SIZE_TYPE organelle_pos 

◆ s_TitleEndsInOrganism() [3/3]

static SIZE_TYPE s_TitleEndsInOrganism ( const string sTitle,
const string sOrganism,
SIZE_TYPE OrganellePos 

Variable Documentation

◆ kLowQualitySequence

const string kLowQualitySequence = "low-quality sequence region"

Definition at line 2886 of file cleanup.cpp.

Referenced by CCleanup::x_AddLowQualityException().

◆ kTrnaLengthMap

const TRNALengthMap kTrnaLengthMap
Initial value:
{ "16S", { 1000, false } },
{ "18S", { 1000, false } },
{ "23S", { 2000, false } },
{ "25S", { 1000, false } },
{ "26S", { 1000, false } },
{ "28S", { 3300, false } },
{ "small", { 1000, false } },
{ "large", { 1000, false } },
{ "5.8S", { 130, true } },
{ "5S", { 90, true } }

Definition at line 2606 of file cleanup.cpp.

Referenced by s_CleanupIsShortrRNA().

◆ methionine_encoded

const unsigned int methionine_encoded = 'M' - 'A'

Definition at line 4686 of file cleanup.cpp.

Referenced by CCleanup::IsMethionine().

◆ sc_seqdesc_order_map

const TSeqdescOrderElem sc_seqdesc_order_map[]
Initial value:
= {
{ CSeqdesc::e_Org, 16 },
{ CSeqdesc::e_Num, 11 },
{ CSeqdesc::e_Pir, 18 },
{ CSeqdesc::e_Sp, 17 },
{ CSeqdesc::e_Prf, 19 },
{ CSeqdesc::e_Pdb, 20 },
@ e_Embl
EMBL specific information.
Definition: Seqdesc_.hpp:127
@ e_Het
cofactor, etc associated but not bound
Definition: Seqdesc_.hpp:132
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Num
a numbering system
Definition: Seqdesc_.hpp:118
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Pir
PIR specific info.
Definition: Seqdesc_.hpp:120
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
@ e_Prf
PRF specific information.
Definition: Seqdesc_.hpp:130
@ e_Mol_type
type of molecule
Definition: Seqdesc_.hpp:111
@ e_Sp
SWISSPROT specific info.
Definition: Seqdesc_.hpp:125
@ e_Dbxref
xref to other databases
Definition: Seqdesc_.hpp:126
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Method
sequencing method
Definition: Seqdesc_.hpp:113
@ e_Modelev
model evidence for XM records
Definition: Seqdesc_.hpp:135
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Modif
Definition: Seqdesc_.hpp:112
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Pdb
PDB specific information.
Definition: Seqdesc_.hpp:131
@ e_Name
a name for this sequence
Definition: Seqdesc_.hpp:114
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133

Definition at line 2943 of file cleanup.cpp.

◆ sm_ChangeDesc

constexpr std::array<string_view, CCleanupChangeCore::eNumberofChangeTypes> sm_ChangeDesc

Definition at line 312 of file cleanup.cpp.

Referenced by CCleanupChangeCore::GetDescription().

Modified on Sat Dec 09 04:47:52 2023 by rev. 669887