NCBI C++ ToolKit
Public Types | Public Member Functions | Static Public Member Functions | Static Private Member Functions | Private Attributes | List of all members
CCleanup Class Reference

Search Toolkit Book for CCleanup

#include <objtools/cleanup/cleanup.hpp>

+ Inheritance diagram for CCleanup:
+ Collaboration diagram for CCleanup:

Public Types

enum  EValidOptions {
  eClean_NoReporting = 0x1 , eClean_GpipeMode = 0x2 , eClean_NoNcbiUserObjects = 0x4 , eClean_SyncGenCodes = 0x8 ,
  eClean_NoProteinTitles = 0x10 , eClean_KeepTopSet = 0x20 , eClean_KeepSingleSeqSet = 0x40 , eClean_InHugeSeqSet = 0x80
}
 
enum  EScopeOptions { eScope_Copy , eScope_UseInPlace }
 
using TChanges = CConstRef< CCleanupChange >
 
typedef pair< CSeq_feat_Handle, CSeq_feat_HandleTFeatGenePair
 
- Public Types inherited from CObject
enum  EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern }
 Control filling of newly allocated memory. More...
 
typedef CObjectCounterLocker TLockerType
 Default locker type for CRef. More...
 
typedef atomic< Uint8TCounter
 Counter type is CAtomiCounter. More...
 
typedef Uint8 TCount
 Alias for value type of counter. More...
 

Public Member Functions

 CCleanup (CScope *scope=nullptr, EScopeOptions scope_handling=eScope_Copy)
 
 CCleanup (const CCleanup &)=delete
 
CCleanupoperator= (const CCleanup &)=delete
 
 ~CCleanup ()
 
void SetScope (CScope *scope)
 
TChanges BasicCleanup (CSeq_entry &se, Uint4 options=0)
 
TChanges BasicCleanup (CSeq_submit &ss, Uint4 options=0)
 Cleanup a Seq-submit. More...
 
TChanges BasicCleanup (CBioseq_set &bss, Uint4 options=0)
 Cleanup a Bioseq_set. More...
 
TChanges BasicCleanup (CSeq_annot &sa, Uint4 options=0)
 Cleanup a Seq-Annot. More...
 
TChanges BasicCleanup (CSeq_feat &sf, Uint4 options=0)
 Cleanup a Seq-feat. More...
 
TChanges BasicCleanup (CBioSource &src, Uint4 options=0)
 Cleanup a BioSource. More...
 
TChanges BasicCleanup (CSubmit_block &block, Uint4 options=0)
 
TChanges BasicCleanup (CSeqdesc &desc, Uint4 options=0)
 
TChanges BasicCleanup (CSeq_descr &desc, Uint4 options=0)
 
TChanges BasicCleanup (CSeq_entry_Handle &seh, Uint4 options=0)
 
TChanges BasicCleanup (CBioseq_Handle &bsh, Uint4 options=0)
 
TChanges BasicCleanup (CBioseq_set_Handle &bssh, Uint4 options=0)
 
TChanges BasicCleanup (CSeq_annot_Handle &sak, Uint4 options=0)
 
TChanges BasicCleanup (CSeq_feat_Handle &sfh, Uint4 options=0)
 
TChanges ExtendedCleanup (CSeq_entry &se, Uint4 options=0)
 Cleanup a Seq-entry. More...
 
TChanges ExtendedCleanup (CSeq_submit &ss, Uint4 options=0)
 Cleanup a Seq-submit. More...
 
TChanges ExtendedCleanup (CSeq_annot &sa, Uint4 options=0)
 Cleanup a Seq-Annot. More...
 
- Public Member Functions inherited from CObject
 CObject (void)
 Constructor. More...
 
 CObject (const CObject &src)
 Copy constructor. More...
 
virtual ~CObject (void)
 Destructor. More...
 
CObjectoperator= (const CObject &src) THROWS_NONE
 Assignment operator. More...
 
bool CanBeDeleted (void) const THROWS_NONE
 Check if object can be deleted. More...
 
bool IsAllocatedInPool (void) const THROWS_NONE
 Check if object is allocated in memory pool (not system heap) More...
 
bool Referenced (void) const THROWS_NONE
 Check if object is referenced. More...
 
bool ReferencedOnlyOnce (void) const THROWS_NONE
 Check if object is referenced only once. More...
 
void AddReference (void) const
 Add reference to object. More...
 
void RemoveReference (void) const
 Remove reference to object. More...
 
void ReleaseReference (void) const
 Remove reference without deleting object. More...
 
virtual void DoNotDeleteThisObject (void)
 Mark this object as not allocated in heap – do not delete this object. More...
 
virtual void DoDeleteThisObject (void)
 Mark this object as allocated in heap – object can be deleted. More...
 
void * operator new (size_t size)
 Define new operator for memory allocation. More...
 
void * operator new[] (size_t size)
 Define new[] operator for 'array' memory allocation. More...
 
void operator delete (void *ptr)
 Define delete operator for memory deallocation. More...
 
void operator delete[] (void *ptr)
 Define delete[] operator for memory deallocation. More...
 
void * operator new (size_t size, void *place)
 Define new operator. More...
 
void operator delete (void *ptr, void *place)
 Define delete operator. More...
 
void * operator new (size_t size, CObjectMemoryPool *place)
 Define new operator using memory pool. More...
 
void operator delete (void *ptr, CObjectMemoryPool *place)
 Define delete operator. More...
 
virtual void DebugDump (CDebugDumpContext ddc, unsigned int depth) const
 Define method for dumping debug information. More...
 
- Public Member Functions inherited from CDebugDumpable
 CDebugDumpable (void)
 
virtual ~CDebugDumpable (void)
 
void DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const
 
void DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const
 
void DumpToConsole (void) const
 

Static Public Member Functions

static TChanges ExtendedCleanup (CSeq_entry_Handle &seh, Uint4 options=0)
 
static bool ShouldStripPubSerial (const CBioseq &bs)
 
static bool MoveProteinSpecificFeats (CSeq_entry_Handle seh)
 Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein sequence. More...
 
static bool MoveFeatToProtein (CSeq_feat_Handle fh)
 Moves one feature from nucleotide bioseq to the appropriate protein sequence. More...
 
static bool IsGeneXrefUnnecessary (const CSeq_feat &sf, CScope &scope, const CGene_ref &gene_xref)
 Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calculated using overlap) More...
 
static bool RemoveUnnecessaryGeneXrefs (CSeq_feat &f, CScope &scope)
 Removes unnecessary Gene-xrefs. More...
 
static bool RemoveUnnecessaryGeneXrefs (CSeq_entry_Handle seh)
 Removes unnecessary Gene-xrefs on features in Seq-entry. More...
 
static bool RemoveNonsuppressingGeneXrefs (CSeq_feat &f)
 Removes non-suppressing Gene-xrefs. More...
 
static bool RepairXrefs (const CSeq_feat &f, const CTSE_Handle &tse)
 Repairs non-reciprocal xref pairs for specified feature if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype. More...
 
static bool RepairXrefs (const CSeq_feat &src, CSeq_feat_Handle &dst, const CTSE_Handle &tse)
 Repairs non-reciprocal xref pairs for specified feature pair if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype. More...
 
static bool RepairXrefs (CSeq_entry_Handle seh)
 Repairs non-reciprocal xref pairs in specified seq-entry. More...
 
static bool FindMatchingLocusGene (CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh)
 Detects gene features with matching locus. More...
 
static bool RemoveOrphanLocusGeneXrefs (CSeq_feat &f, CBioseq_Handle bsh)
 Removes orphaned locus Gene-xrefs. More...
 
static bool FindMatchingLocus_tagGene (CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh)
 Detects gene features with matching locus_tag. More...
 
static bool RemoveOrphanLocus_tagGeneXrefs (CSeq_feat &f, CBioseq_Handle bsh)
 Removes orphaned locus_tag Gene-xrefs. More...
 
static bool SeqLocExtend (CSeq_loc &loc, size_t pos, CScope &scope)
 Extends a location to the specificed position. More...
 
static bool ExtendToStopIfShortAndNotPartial (CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
 Extends a coding region up to 50 nt. More...
 
static bool LocationMayBeExtendedToMatch (const CSeq_loc &orig, const CSeq_loc &improved)
 Checks whether it is possible to extend the original location up to improved one. More...
 
static bool ExtendToStopCodon (CSeq_feat &f, CBioseq_Handle bsh, size_t limit)
 Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partial will be set if location extends to end of sequence but no stop codon is found) More...
 
static bool ExtendStopPosition (CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
 
static bool SetBestFrame (CSeq_feat &cds, CScope &scope)
 Translates coding region and selects best frame (without stops, or longest) More...
 
static bool SetFrameFromLoc (CCdregion &cdregion, const CSeq_loc &loc, CScope &scope)
 Chooses best frame based on location 1. More...
 
static bool SetFrameFromLoc (CCdregion::EFrame &frame, const CSeq_loc &loc, CScope &scope)
 
static bool SetCDSPartialsByFrameAndTranslation (CSeq_feat &cds, CScope &scope)
 1. More...
 
static bool ClearInternalPartials (CSeq_loc &loc, bool is_first=true, bool is_last=true)
 Clear internal partials. More...
 
static bool ClearInternalPartials (CSeq_loc_mix &mix, bool is_first=true, bool is_last=true)
 
static bool ClearInternalPartials (CPacked_seqint &pint, bool is_first=true, bool is_last=true)
 
static bool ClearInternalPartials (CSeq_entry_Handle seh)
 
static bool SetFeaturePartial (CSeq_feat &f)
 Set feature partial based on feature location. More...
 
static bool UpdateECNumbers (CProt_ref::TEc &ec_num_list)
 Update EC numbers. More...
 
static bool RemoveBadECNumbers (CProt_ref::TEc &ec_num_list)
 Delete EC numbers. More...
 
static bool FixECNumbers (CSeq_entry_Handle entry)
 Fix EC numbers. More...
 
static bool SetGenePartialByLongestContainedFeature (CSeq_feat &gene, CScope &scope)
 Set partialness of gene to match longest feature contained in gene. More...
 
static void SetProteinName (CProt_ref &prot, const string &protein_name, bool append)
 
static void SetProteinName (CSeq_feat &cds, const string &protein_name, bool append, CScope &scope)
 
static void SetMrnaName (CSeq_feat &mrna, const string &protein_name)
 
static const stringGetProteinName (const CProt_ref &prot)
 
static const stringGetProteinName (const CSeq_feat &cds, CSeq_entry_Handle seh)
 
static bool SetMolinfoTech (CBioseq_Handle seq, CMolInfo::ETech tech)
 Sets MolInfo::tech for a sequence. More...
 
static bool SetMolinfoBiomol (CBioseq_Handle seq, CMolInfo::EBiomol biomol)
 Sets MolInfo::biomol for a sequence. More...
 
static bool AddMissingMolInfo (CBioseq &seq, bool is_product)
 Adds missing MolInfo descriptor to sequence. More...
 
static bool AddProteinTitle (CBioseq_Handle bsh)
 Creates missing protein title descriptor. More...
 
static bool RemoveNcbiCleanupObject (CSeq_entry &seq_entry)
 Removes NcbiCleanup User Objects in the Seq-entry. More...
 
static void AddNcbiCleanupObject (int ncbi_cleanup_version, CSeq_descr &descr)
 Adds NcbiCleanup User Object to Seq-descr. More...
 
static bool TaxonomyLookup (CSeq_entry_Handle seh)
 Looks up Org-refs in the Seq-entry. More...
 
static bool SetGeneticCodes (CBioseq_Handle bsh)
 Sets genetic codes for coding regions on Bioseq-Handle. More...
 
static bool AddPartialToProteinTitle (CBioseq &bioseq)
 Adjusts protein title to reflect partialness. More...
 
static bool RemovePseudoProduct (CSeq_feat &cds, CScope &scope)
 Removes protein product from pseudo coding region. More...
 
static CRef< CSeq_entryAddProtein (const CSeq_feat &cds, CScope &scope)
 
static bool ExpandGeneToIncludeChildren (CSeq_feat &gene, CTSE_Handle &tse)
 Expands gene to include features it cross-references. More...
 
static bool WGSCleanup (CSeq_entry_Handle entry, bool instantiate_missing_proteins=true, Uint4 options=0, bool run_extended_cleanup=true)
 Performs WGS specific cleanup. More...
 
static bool AddLowQualityException (CSeq_entry_Handle entry)
 For table2asn -c s Adds an exception of "low-quality sequence region" to coding regions and mRNAs that are not pseudo and have an intron <11bp in length. More...
 
static bool NormalizeDescriptorOrder (CSeq_descr &descr)
 Normalize Descriptor Order on a specific Seq-entry. More...
 
static bool NormalizeDescriptorOrder (CSeq_entry_Handle seh)
 Normalize Descriptor Order on a specific Seq-entry. More...
 
static bool RemoveUnseenTitles (CSeq_entry_EditHandle::TSeq seq)
 Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile. More...
 
static bool RemoveUnseenTitles (CSeq_entry_EditHandle::TSet set)
 Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile. More...
 
static bool AddGenBankWrapper (CSeq_entry_Handle seh)
 Add GenBank Wrapper Set. More...
 
static void GetPubdescLabels (const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
 For Publication Citations Get labels for a pubdesc. More...
 
static vector< CConstRef< CPub > > GetCitationList (CBioseq_Handle bsh)
 Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle. More...
 
static bool RemoveDuplicatePubs (CSeq_descr &descr)
 Remove duplicate publications. More...
 
static bool OkToPromoteNpPub (const CPubdesc &pd)
 Some pubs should not be promoted to nuc-prot set from sequence. More...
 
static bool OkToPromoteNpPub (const CBioseq &b)
 For some sequences, pubs should not be promoted to nuc-prot set from sequence. More...
 
static bool PubAlreadyInSet (const CPubdesc &pd, const CSeq_descr &descr)
 
static bool ConvertPubFeatsToPubDescs (CSeq_entry_Handle seh)
 Convert full-length publication features to publication descriptors. More...
 
static bool RescueSiteRefPubs (CSeq_entry_Handle seh)
 Rescue pubs from Site-ref features. More...
 
static bool IsMinPub (const CPubdesc &pd, bool is_refseq_prot)
 Is this a "minimal" pub? (If yes, do not rescue from a Seq-feat.cit) More...
 
static void MoveOneFeatToPubdesc (CSeq_feat_Handle feat, CRef< CSeqdesc > d, CBioseq_Handle b, bool remove_feat=true)
 
static bool RemoveDupBioSource (CSeq_descr &descr)
 Remove duplicate biosource descriptors. More...
 
static CRef< CBioSourceBioSrcFromFeat (const CSeq_feat &f)
 Get BioSource from feature to use for source descriptor. More...
 
static bool AreBioSourcesMergeable (const CBioSource &src1, const CBioSource &src2)
 
static bool MergeDupBioSources (CSeq_descr &descr)
 
static bool MergeDupBioSources (CBioSource &src1, const CBioSource &add)
 
static bool ConvertSrcFeatsToSrcDescs (CSeq_entry_Handle seh)
 Convert full-length source features to source descriptors. More...
 
static bool FixGeneXrefSkew (CSeq_entry_Handle seh)
 Examine all genes and gene xrefs in the Seq-entry. More...
 
static bool RenormalizeNucProtSets (CSeq_entry_Handle seh)
 Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore phase because it changes a seq to a set. More...
 
static bool DecodeXMLMarkChanged (std::string &str)
 decodes various tags, including carriage-return-line-feed constructs More...
 
static CRef< CSeq_locGetProteinLocationFromNucleotideLocation (const CSeq_loc &nuc_loc, CScope &scope)
 
static CRef< CSeq_locGetProteinLocationFromNucleotideLocation (const CSeq_loc &nuc_loc, const CSeq_feat &cds, CScope &scope, bool require_inframe=false)
 
static bool RepackageProteins (CSeq_entry_Handle seh)
 Find proteins that are not packaged in the same nuc-prot set as the coding region for which they are a product, and move them to that nuc-prot set. More...
 
static bool RepackageProteins (const CSeq_feat &cds, CBioseq_set_Handle np)
 
static bool ConvertDeltaSeqToRaw (CSeq_entry_Handle seh, CSeq_inst::EMol filter=CSeq_inst::eMol_not_set)
 
static bool ParseCodeBreak (const CSeq_feat &feat, CCdregion &cds, const CTempString &str, CScope &scope, IObjtoolsListener *pMessageListener=nullptr)
 Parse string into code break and add to coding region. More...
 
static bool ParseCodeBreaks (CSeq_feat &feat, CScope &scope)
 Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except Gb-quals that were successfully parsed. More...
 
static size_t MakeSmallGenomeSet (CSeq_entry_Handle entry)
 
static bool MakeIRDFeatsFromSourceXrefs (CSeq_entry_Handle entry)
 From SQD-4329 For each sequence with a source that has an IRD db_xref, create a misc_feature across the entire span and move the IRD db_xref from the source to the misc_feature. More...
 
static bool FixRNAEditingCodingRegion (CSeq_feat &cds)
 From GB-7563 An action has been requested that will do the following: 1. More...
 
static void SetCodeBreakLocation (CCode_break &cb, size_t pos, const CSeq_feat &cds)
 utility function for setting code break location given offset pos is the position of the amino acid where the translation exception occurs (starts with 1) More...
 
static bool IsMethionine (const CCode_break &cb)
 
static CConstRef< CCode_breakGetCodeBreakForLocation (size_t pos, const CSeq_feat &cds)
 utility function for finding the code break for a given amino acid position pos is the position of the amino acid where the translation exception occurs (starts with 1) More...
 
static bool NormalizeGeneQuals (CSeq_feat &cds, CSeq_feat &gene)
 
static bool NormalizeGeneQuals (CBioseq_Handle bsh)
 
static bool NormalizeGeneQuals (CSeq_entry_Handle seh)
 
static vector< TFeatGenePairGetNormalizableGeneQualPairs (CBioseq_Handle bsh)
 
static bool CleanupUserObject (CUser_object &object)
 
static bool CleanupAuthor (CAuthor &author, bool fix_initials=true)
 
static bool CleanupAuthList (CAuth_list &al, bool fix_initials=true)
 
static void ResetAuthorNames (CAuth_list::TNames &names)
 
static bool CleanupAffil (CAffil &af)
 
static bool IsEmpty (const CAuth_list::TAffil &affil)
 
static bool CleanupCollectionDates (CSeq_entry_Handle seh, bool month_first)
 
static void AutodefId (CSeq_entry_Handle seh)
 
static char ValidAminoAcid (string_view abbrev)
 
- Static Public Member Functions inherited from CObject
static NCBI_XNCBI_EXPORT void ThrowNullPointerException (void)
 Define method to throw null pointer exception. More...
 
static NCBI_XNCBI_EXPORT void ThrowNullPointerException (const type_info &type)
 
static EAllocFillMode GetAllocFillMode (void)
 
static void SetAllocFillMode (EAllocFillMode mode)
 
static void SetAllocFillMode (const string &value)
 Set mode from configuration parameter value. More...
 
- Static Public Member Functions inherited from CDebugDumpable
static void EnableDebugDump (bool on)
 

Static Private Member Functions

static bool x_CleanupUserField (CUser_field &field)
 
static bool x_MergeDupOrgNames (COrgName &on1, const COrgName &add)
 
static bool x_MergeDupOrgRefs (COrg_ref &org1, const COrg_ref &add)
 
static bool x_HasShortIntron (const CSeq_loc &loc, size_t min_len=11)
 
static bool x_AddLowQualityException (CSeq_feat &feat)
 
static bool x_AddLowQualityException (CSeq_entry_Handle entry, CSeqFeatData::ESubtype subtype)
 
static bool s_IsProductOnFeat (const CSeq_feat &cds)
 
static void s_SetProductOnFeat (CSeq_feat &feat, const string &protein_name, bool append)
 
static bool s_CleanupGeneOntology (CUser_object &obj)
 
static bool s_CleanupStructuredComment (CUser_object &obj)
 
static bool s_RemoveEmptyFields (CUser_object &obj)
 
static bool s_CleanupGenomeAssembly (CUser_object &obj)
 
static bool s_CleanupDBLink (CUser_object &obj)
 
static bool s_AddNumToUserField (CUser_field &field)
 
static bool s_CleanupNameStdBC (CName_std &name, bool fix_initials)
 
static void s_ExtractSuffixFromInitials (CName_std &name)
 
static void s_FixEtAl (CName_std &name)
 
static bool s_Flatten (CPub_equiv &pub_equiv)
 

Private Attributes

CRef< CScopem_Scope
 

Additional Inherited Members

- Static Public Attributes inherited from CObject
static const TCount eCounterBitsCanBeDeleted = 1 << 0
 Define possible object states. More...
 
static const TCount eCounterBitsInPlainHeap = 1 << 1
 Heap signature was found. More...
 
static const TCount eCounterBitsPlaceMask
 Mask for 'in heap' state flags. More...
 
static const int eCounterStep = 1 << 2
 Skip over the "in heap" bits. More...
 
static const TCount eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2)
 Minimal value for valid objects (reference counter is zero) Must be a single bit value. More...
 
static const TCount eCounterStateMask
 Valid object, and object in heap. More...
 
- Protected Member Functions inherited from CObject
virtual void DeleteThis (void)
 Virtual method "deleting" this object. More...
 

Detailed Description

Definition at line 68 of file cleanup.hpp.

Member Typedef Documentation

◆ TChanges

Definition at line 97 of file cleanup.hpp.

◆ TFeatGenePair

Definition at line 590 of file cleanup.hpp.

Member Enumeration Documentation

◆ EScopeOptions

Enumerator
eScope_Copy 
eScope_UseInPlace 

Definition at line 83 of file cleanup.hpp.

◆ EValidOptions

Enumerator
eClean_NoReporting 
eClean_GpipeMode 
eClean_NoNcbiUserObjects 
eClean_SyncGenCodes 
eClean_NoProteinTitles 
eClean_KeepTopSet 
eClean_KeepSingleSeqSet 
eClean_InHugeSeqSet 

Definition at line 72 of file cleanup.hpp.

Constructor & Destructor Documentation

◆ CCleanup() [1/2]

CCleanup::CCleanup ( CScope scope = nullptr,
EScopeOptions  scope_handling = eScope_Copy 
)

◆ CCleanup() [2/2]

CCleanup::CCleanup ( const CCleanup )
delete

◆ ~CCleanup()

CCleanup::~CCleanup ( void  )

Definition at line 103 of file cleanup.cpp.

Member Function Documentation

◆ AddGenBankWrapper()

bool CCleanup::AddGenBankWrapper ( CSeq_entry_Handle  seh)
static

Add GenBank Wrapper Set.

Parameters
entrySeq-entry to edit
Returns
Boolean return value indicates whether object changed

Definition at line 3065 of file cleanup.cpp.

References CSeq_entry_EditHandle::ConvertSeqToSet(), CBioseq_set_Base::eClass_genbank, CBioseq_set_Handle::GetClass(), CSeq_entry_Handle::GetSet(), CSeq_entry_Handle::IsSet(), and CBioseq_set_Handle::IsSetClass().

◆ AddLowQualityException()

bool CCleanup::AddLowQualityException ( CSeq_entry_Handle  entry)
static

For table2asn -c s Adds an exception of "low-quality sequence region" to coding regions and mRNAs that are not pseudo and have an intron <11bp in length.

Parameters
entrySeq-entry to edit
Returns
Boolean return value indicates whether object was updated

Definition at line 2931 of file cleanup.cpp.

References CSeqFeatData::eSubtype_cdregion, CSeqFeatData::eSubtype_mRNA, and x_AddLowQualityException().

Referenced by CTable2AsnValidator::Cleanup().

◆ AddMissingMolInfo()

bool CCleanup::AddMissingMolInfo ( CBioseq seq,
bool  is_product 
)
static

◆ AddNcbiCleanupObject()

void CCleanup::AddNcbiCleanupObject ( int  ncbi_cleanup_version,
CSeq_descr descr 
)
static

◆ AddPartialToProteinTitle()

bool CCleanup::AddPartialToProteinTitle ( CBioseq bioseq)
static

◆ AddProtein()

CRef< CSeq_entry > CCleanup::AddProtein ( const CSeq_feat cds,
CScope scope 
)
static

◆ AddProteinTitle()

bool CCleanup::AddProteinTitle ( CBioseq_Handle  bsh)
static

◆ AreBioSourcesMergeable()

bool CCleanup::AreBioSourcesMergeable ( const CBioSource src1,
const CBioSource src2 
)
static

◆ AutodefId()

void CCleanup::AutodefId ( CSeq_entry_Handle  seh)
static

◆ BasicCleanup() [1/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CBioseq_Handle bsh,
Uint4  options = 0 
)

◆ BasicCleanup() [2/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CBioseq_set bss,
Uint4  options = 0 
)

Cleanup a Bioseq_set.

Definition at line 156 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [3/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CBioseq_set_Handle bssh,
Uint4  options = 0 
)

◆ BasicCleanup() [4/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CBioSource src,
Uint4  options = 0 
)

Cleanup a BioSource.

Definition at line 180 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [5/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_annot sa,
Uint4  options = 0 
)

Cleanup a Seq-Annot.

Definition at line 164 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [6/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_annot_Handle sak,
Uint4  options = 0 
)

◆ BasicCleanup() [7/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_descr desc,
Uint4  options = 0 
)

Definition at line 247 of file cleanup.cpp.

References CLEANUP_SETUP, and CSeq_descr_Base::Set().

◆ BasicCleanup() [8/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_entry se,
Uint4  options = 0 
)

◆ BasicCleanup() [9/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_entry_Handle seh,
Uint4  options = 0 
)

◆ BasicCleanup() [10/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_feat sf,
Uint4  options = 0 
)

Cleanup a Seq-feat.

Definition at line 172 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [11/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_feat_Handle sfh,
Uint4  options = 0 
)

◆ BasicCleanup() [12/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeq_submit ss,
Uint4  options = 0 
)

Cleanup a Seq-submit.

Definition at line 140 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [13/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSeqdesc desc,
Uint4  options = 0 
)

Definition at line 238 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BasicCleanup() [14/14]

CCleanup::TChanges CCleanup::BasicCleanup ( CSubmit_block block,
Uint4  options = 0 
)

Definition at line 148 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ BioSrcFromFeat()

CRef< CBioSource > CCleanup::BioSrcFromFeat ( const CSeq_feat f)
static

◆ CleanupAffil()

bool CCleanup::CleanupAffil ( CAffil af)
static

◆ CleanupAuthList()

bool CCleanup::CleanupAuthList ( CAuth_list al,
bool  fix_initials = true 
)
static

◆ CleanupAuthor()

bool CCleanup::CleanupAuthor ( CAuthor author,
bool  fix_initials = true 
)
static

◆ CleanupCollectionDates()

bool CCleanup::CleanupCollectionDates ( CSeq_entry_Handle  seh,
bool  month_first 
)
static

◆ CleanupUserObject()

bool CCleanup::CleanupUserObject ( CUser_object object)
static

◆ ClearInternalPartials() [1/4]

bool CCleanup::ClearInternalPartials ( CPacked_seqint pint,
bool  is_first = true,
bool  is_last = true 
)
static

Definition at line 1595 of file cleanup.cpp.

References eExtreme_Biological, NON_CONST_ITERATE, and CPacked_seqint_Base::Set().

◆ ClearInternalPartials() [2/4]

bool CCleanup::ClearInternalPartials ( CSeq_entry_Handle  seh)
static

◆ ClearInternalPartials() [3/4]

bool CCleanup::ClearInternalPartials ( CSeq_loc loc,
bool  is_first = true,
bool  is_last = true 
)
static

◆ ClearInternalPartials() [4/4]

bool CCleanup::ClearInternalPartials ( CSeq_loc_mix mix,
bool  is_first = true,
bool  is_last = true 
)
static

◆ ConvertDeltaSeqToRaw()

bool CCleanup::ConvertDeltaSeqToRaw ( CSeq_entry_Handle  seh,
CSeq_inst::EMol  filter = CSeq_inst::eMol_not_set 
)
static

◆ ConvertPubFeatsToPubDescs()

bool CCleanup::ConvertPubFeatsToPubDescs ( CSeq_entry_Handle  seh)
static

◆ ConvertSrcFeatsToSrcDescs()

bool CCleanup::ConvertSrcFeatsToSrcDescs ( CSeq_entry_Handle  seh)
static

◆ DecodeXMLMarkChanged()

bool CCleanup::DecodeXMLMarkChanged ( std::string str)
static

◆ ExpandGeneToIncludeChildren()

bool CCleanup::ExpandGeneToIncludeChildren ( CSeq_feat gene,
CTSE_Handle tse 
)
static

Expands gene to include features it cross-references.

Parameters
geneSeq-feat to adjust
tseTop-level Seq-entry in which to find other features
Returns
Boolean indicates whether anything changed

Definition at line 2571 of file cleanup.cpp.

References eExtreme_Positional, CSeqFeatData::eSubtype_any, f, CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), CSeq_feat_Base::GetLocation(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_feat_Base::GetXref(), CSeq_loc_Base::IsInt(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::IsSetXref(), ITERATE, and CSeq_feat_Base::SetLocation().

Referenced by WGSCleanup().

◆ ExtendedCleanup() [1/4]

CCleanup::TChanges CCleanup::ExtendedCleanup ( CSeq_annot sa,
Uint4  options = 0 
)

Cleanup a Seq-Annot.

Definition at line 276 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ ExtendedCleanup() [2/4]

CCleanup::TChanges CCleanup::ExtendedCleanup ( CSeq_entry se,
Uint4  options = 0 
)

Cleanup a Seq-entry.

Definition at line 259 of file cleanup.cpp.

References CLEANUP_SETUP.

Referenced by CTable2AsnValidator::Cleanup(), and WGSCleanup().

◆ ExtendedCleanup() [3/4]

CCleanup::TChanges CCleanup::ExtendedCleanup ( CSeq_entry_Handle seh,
Uint4  options = 0 
)
static

◆ ExtendedCleanup() [4/4]

CCleanup::TChanges CCleanup::ExtendedCleanup ( CSeq_submit ss,
Uint4  options = 0 
)

Cleanup a Seq-submit.

Definition at line 268 of file cleanup.cpp.

References CLEANUP_SETUP.

◆ ExtendStopPosition()

bool CCleanup::ExtendStopPosition ( CSeq_feat f,
const CSeq_feat cdregion,
size_t  extension = 0 
)
static

◆ ExtendToStopCodon()

bool CCleanup::ExtendToStopCodon ( CSeq_feat f,
CBioseq_Handle  bsh,
size_t  limit 
)
static

Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partial will be set if location extends to end of sequence but no stop codon is found)

Parameters
fSeq-feat to edit
bshCBioseq_Handle on which the feature is located
limitmaximum number of nt to extend, or 0 if unlimited
Returns
Boolean return value indicates whether the feature was extended

Definition at line 1113 of file cleanup.cpp.

References CSeqVector::begin(), CBioseq_Handle::eCoding_Iupac, eExtreme_Biological, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_three, CCdregion_Base::eFrame_two, eNa_strand_minus, ExtendStopPosition(), f, CBioseq_Handle::GetBioseqLength(), CTrans_table::GetCodonResidue(), CBioseq_Handle::GetId(), CBioseq_Handle::GetInst_Length(), GetLength(), CBioseq_Handle::GetScope(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CGen_code_table::GetTransTable(), i, CSeq_loc::IsSetStrand(), len, mod(), CTrans_table::NextCodonState(), CSeq_loc::SetInt(), CSeq_loc::SetStrand(), and CSeqVector::size().

Referenced by ExtendToStopIfShortAndNotPartial(), and CCleanupApp::x_BatchExtendCDS().

◆ ExtendToStopIfShortAndNotPartial()

bool CCleanup::ExtendToStopIfShortAndNotPartial ( CSeq_feat f,
CBioseq_Handle  bsh,
bool  check_for_stop = true 
)
static

Extends a coding region up to 50 nt.

if the coding region: 1. does not end with a stop codon 2. is adjacent to a stop codon 3. is not pseudo

Parameters
fSeq-feat to edit
bshCBioseq_Handle on which the feature is located
Returns
Boolean return value indicates whether the feature was extended

Definition at line 1291 of file cleanup.cpp.

References eExtreme_Biological, NStr::EndsWith(), ExtendToStopCodon(), f, CSeq_feat_Base::GetLocation(), GetmRNAforCDS(), CBioseq_Handle::GetScope(), CSeq_loc::GetStop(), IsPseudo(), s_IsLocationEndAtOtherLocationInternalEndpoint(), and CSeqTranslator::Translate().

Referenced by CNewCleanup_imp::CdRegionEC(), CFeatTableEdit::xGenerate_mRNA_Product(), and CFeatureTableReader::xTranslateProtein().

◆ FindMatchingLocus_tagGene()

bool CCleanup::FindMatchingLocus_tagGene ( CSeq_feat f,
const CGene_ref gene_xref,
CBioseq_Handle  bsh 
)
static

Detects gene features with matching locus_tag.

Parameters
fSeq-feat parent feature of gene_xref [in]
gene_xrefGene-ref of gene-xref [in]
bshCBioseq_Handle parent bioseq in which to search for genes [in]
Returns
Boolean return value indicates whether a gene feature with matching locus_tag has been found

Definition at line 990 of file cleanup.cpp.

References CSeqFeatData::eSubtype_gene, f, CGene_ref_Base::GetLocus_tag(), CGene_ref_Base::IsSetLocus_tag(), and match().

Referenced by RemoveOrphanLocus_tagGeneXrefs().

◆ FindMatchingLocusGene()

bool CCleanup::FindMatchingLocusGene ( CSeq_feat f,
const CGene_ref gene_xref,
CBioseq_Handle  bsh 
)
static

Detects gene features with matching locus.

Parameters
fSeq-feat parent feature of gene_xref [in]
gene_xrefGene-ref of gene-xref [in]
bshCBioseq_Handle parent bioseq in which to search for genes [in]
Returns
Boolean return value indicates whether a gene feature with matching locus has been found

Definition at line 942 of file cleanup.cpp.

References CSeqFeatData::eSubtype_gene, f, CGene_ref_Base::GetLocus(), CGene_ref_Base::IsSetLocus(), and match().

Referenced by RemoveOrphanLocusGeneXrefs().

◆ FixECNumbers()

bool CCleanup::FixECNumbers ( CSeq_entry_Handle  entry)
static

Fix EC numbers.

Parameters
entrySeq-entry-handle to clean
Returns
Boolean value indicates whether any changes were made

Definition at line 1713 of file cleanup.cpp.

References CSerialObject::Assign(), CSeqFeatData_Base::e_Prot, f, CSeq_feat_Base::GetData(), CProt_ref_Base::GetEc(), CSeqFeatData_Base::GetProt(), RemoveBadECNumbers(), CSeq_feat_EditHandle::Replace(), CSeq_feat_Base::SetData(), and UpdateECNumbers().

Referenced by CCleanupApp::x_ProcessFeatureOptions().

◆ FixGeneXrefSkew()

bool CCleanup::FixGeneXrefSkew ( CSeq_entry_Handle  seh)
static

Examine all genes and gene xrefs in the Seq-entry.

If no genes have locus and some have locus tag AND no gene xrefs have locus-tag and some gene xrefs have locus, change all gene xrefs to use locus tag. If no genes have locus tag and some have locus AND no gene xrefs have locus and some gene xrefs have locus tag, change all gene xrefs to use locus.

Parameters
sehSeq-entry to edit
Returns
bool indicates whether any changes were made

Definition at line 3915 of file cleanup.cpp.

References CSerialObject::Assign(), g(), CSeq_feat_Handle::GetData(), CSeqFeatData_Base::GetGene(), CSeq_feat_Handle::GetGeneXref(), CMappedFeat::GetSeq_feat(), CSeqFeatData_Base::IsGene(), CGene_ref_Base::IsSetLocus(), CGene_ref_Base::IsSetLocus_tag(), CSeq_feat_Handle::IsSetXref(), NON_CONST_ITERATE, CSeq_feat_EditHandle::Replace(), CFeat_CI::Rewind(), and CSeq_feat_Base::SetXref().

Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().

◆ FixRNAEditingCodingRegion()

bool CCleanup::FixRNAEditingCodingRegion ( CSeq_feat cds)
static

From GB-7563 An action has been requested that will do the following: 1.

This action should be limited to protein sequences where the product is an exact match to a specified text (the usual string constraint is not needed). 2. Protein sequences for which the coding region is 5' partial should not be affected. 3. When the protein name matches, the following actions should be taken if and only if the first amino acid of the protein sequence is not M (methionine): a. The first amino acid of the protein sequence should be changed to methionine. b. The coding region should have the text "RNA editing" added to Seq-feat.except_text (separated from any existing text by a semicolon). If Seq-feat.except is not already true, it should be set to true. c. A code-break should be added to Cdregion.code-break where the Code-break.loc is the location of the first codon of the coding region and Code-break.aa is ncbieaa 'M' (Indexers will refer to "code-breaks" as "translation exceptions" because these appear in the flatfile as a /transl_except qualifier.

It will be the responsibility of the caller to only invoke this function for coding regions where the product name is a match, and the protein sequence does not already start with an M.

Definition at line 4849 of file cleanup.cpp.

References eExtreme_Biological, NStr::Find(), GetCodeBreakForLocation(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetExcept(), CSeq_feat_Base::GetExcept_text(), CSeq_feat_Base::GetLocation(), NStr::IsBlank(), CSeqFeatData_Base::IsCdregion(), IsMethionine(), CSeq_loc::IsPartialStart(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetExcept(), CSeq_feat_Base::IsSetExcept_text(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::SetExcept(), and CSeq_feat_Base::SetExcept_text().

Referenced by CRestoreRNAediting::OnApply().

◆ GetCitationList()

vector< CConstRef< CPub > > CCleanup::GetCitationList ( CBioseq_Handle  bsh)
static

Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle.

Parameters
bshBioseq-handle to search
Returns
vector<CConstRef<CPub> > ordered list of pubs Note that Seq-feat.cit appear in the flatfile using the position in the list

Definition at line 3207 of file cleanup.cpp.

References CSeqdesc_Base::e_Pub, CSeqFeatData_Base::e_Pub, CSeq_feat_Handle::GetData(), CSeqdesc_Base::GetPub(), CSeqFeatData_Base::GetPub(), GetPubdescLabels(), CAliasBase< TPrim >::Set(), CCit_gen_Base::SetCit(), CPub_Base::SetGen(), CPub_Base::SetMuid(), CPub_Base::SetPmid(), and CCit_gen_Base::SetSerial_number().

Referenced by CNewCleanup_imp::MoveCitationQuals().

◆ GetCodeBreakForLocation()

CConstRef< CCode_break > CCleanup::GetCodeBreakForLocation ( size_t  pos,
const CSeq_feat cds 
)
static

◆ GetNormalizableGeneQualPairs()

vector< CCleanup::TFeatGenePair > CCleanup::GetNormalizableGeneQualPairs ( CBioseq_Handle  bsh)
static

◆ GetProteinLocationFromNucleotideLocation() [1/2]

CRef< CSeq_loc > CCleanup::GetProteinLocationFromNucleotideLocation ( const CSeq_loc nuc_loc,
const CSeq_feat cds,
CScope scope,
bool  require_inframe = false 
)
static

◆ GetProteinLocationFromNucleotideLocation() [2/2]

CRef< CSeq_loc > CCleanup::GetProteinLocationFromNucleotideLocation ( const CSeq_loc nuc_loc,
CScope scope 
)
static

◆ GetProteinName() [1/2]

const string & CCleanup::GetProteinName ( const CProt_ref prot)
static

◆ GetProteinName() [2/2]

const string & CCleanup::GetProteinName ( const CSeq_feat cds,
CSeq_entry_Handle  seh 
)
static

◆ GetPubdescLabels()

void CCleanup::GetPubdescLabels ( const CPubdesc pd,
vector< TEntrezId > &  pmids,
vector< TEntrezId > &  muids,
vector< int > &  serials,
vector< string > &  published_labels,
vector< string > &  unpublished_labels 
)
static

◆ IsEmpty()

bool CCleanup::IsEmpty ( const CAuth_list::TAffil affil)
static

◆ IsGeneXrefUnnecessary()

bool CCleanup::IsGeneXrefUnnecessary ( const CSeq_feat sf,
CScope scope,
const CGene_ref gene_xref 
)
static

Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calculated using overlap)

Parameters
sfSeq-feat with the xref [in]
scopeScope in which to search for location [in]
gene_xrefGene-ref of gene-xref [in]
Returns
Boolean return value indicates whether gene-xref is unnecessary

Definition at line 744 of file cleanup.cpp.

References Compare(), CSeqFeatData_Base::e_Gene, eOverlap_Contained, eSame, CSeqFeatData::eSubtype_gene, fCompareOverlapping, g(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetGene(), CSeq_feat_Base::GetLocation(), GetOverlappingFeatures(), GetOverlappingGene(), CConstRef< C, Locker >::GetPointer(), CSeqFeatData_Base::IsGene(), CSeq_feat_Base::IsSetData(), CGene_ref::IsSuppressed(), ITERATE, and CGene_ref::RefersToSameGene().

Referenced by RemoveUnnecessaryGeneXrefs(), and CMacroFunction_RemoveGeneXref::s_GeneXrefMatchesNecessary().

◆ IsMethionine()

bool CCleanup::IsMethionine ( const CCode_break cb)
static

◆ IsMinPub()

bool CCleanup::IsMinPub ( const CPubdesc pd,
bool  is_refseq_prot 
)
static

Is this a "minimal" pub? (If yes, do not rescue from a Seq-feat.cit)

Definition at line 3426 of file cleanup.cpp.

References gen, CPub_equiv_Base::Get(), CPubdesc_Base::GetPub(), CPubdesc_Base::IsSetPub(), and ITERATE.

Referenced by RescueSiteRefPubs().

◆ LocationMayBeExtendedToMatch()

bool CCleanup::LocationMayBeExtendedToMatch ( const CSeq_loc orig,
const CSeq_loc improved 
)
static

Checks whether it is possible to extend the original location up to improved one.

It is possible only if the original location is less than improved

Parameters
origSeq-loc to check
improvedSeq-loc original location may be extended to
Returns
Boolean return value indicates whether the extention is possible

Definition at line 1333 of file cleanup.cpp.

References eExtreme_Biological, eNa_strand_minus, CSeq_loc::GetStop(), and orig.

Referenced by CNewCleanup_imp::CdRegionEC(), CCleanupApp::x_FixCDS(), CFeatTableEdit::xGenerate_mRNA_Product(), and CFeatureTableReader::xTranslateProtein().

◆ MakeIRDFeatsFromSourceXrefs()

bool CCleanup::MakeIRDFeatsFromSourceXrefs ( CSeq_entry_Handle  entry)
static

From SQD-4329 For each sequence with a source that has an IRD db_xref, create a misc_feature across the entire span and move the IRD db_xref from the source to the misc_feature.

Create a suppressing gene xref for the misc_feature.

Parameters
entrySeq-entry on which to search for sources and create features
Returns
bool indicates changes were made

Definition at line 4654 of file cleanup.cpp.

References AddIRDMiscFeature(), CSeqdesc_Base::e_Source, CSeq_inst_Base::eMol_na, NStr::Equal(), COrg_ref_Base::GetDb(), CBioSource_Base::GetOrg(), CSeqdesc_Base::GetSource(), COrg_ref_Base::IsSetDb(), CBioSource_Base::IsSetOrg(), COrg_ref_Base::ResetDb(), and COrg_ref_Base::SetDb().

Referenced by CCleanupApp::x_ProcessXOptions().

◆ MakeSmallGenomeSet()

size_t CCleanup::MakeSmallGenomeSet ( CSeq_entry_Handle  entry)
static

◆ MergeDupBioSources() [1/2]

bool CCleanup::MergeDupBioSources ( CBioSource src1,
const CBioSource add 
)
static

◆ MergeDupBioSources() [2/2]

bool CCleanup::MergeDupBioSources ( CSeq_descr descr)
static

◆ MoveFeatToProtein()

bool CCleanup::MoveFeatToProtein ( CSeq_feat_Handle  fh)
static

Moves one feature from nucleotide bioseq to the appropriate protein sequence.

Parameters
fhFeature to edit
Returns
Boolean return value indicates whether any changes were made

Definition at line 589 of file cleanup.cpp.

References CSerialObject::Assign(), CBioseq_EditHandle::AttachAnnot(), CNewCleanup_imp::BasicCleanupSeqFeat(), ConvertProteinToImp(), CSeqFeatData_Base::e_Imp, CProt_ref_Base::eProcessed_mature, CProt_ref_Base::eProcessed_not_set, CProt_ref_Base::eProcessed_preprotein, ftable, CBioseq_Base::GetAnnot(), CSeq_feat_Handle::GetAnnot(), CBioseq_Handle::GetBioseqCore(), CScope::GetBioseqHandle(), CSeq_feat_Base::GetComment(), CBioseq_Handle::GetCompleteBioseq(), CSeq_annot_Handle::GetCompleteSeq_annot(), CSeq_feat_Base::GetData(), CSeq_feat_Handle::GetData(), CBioseq_Handle::GetEditHandle(), CBioseq_Base::GetId(), CSeqFeatData_Base::GetImp(), CImp_feat_Base::GetKey(), CSeq_feat_Base::GetLocation(), CSeq_feat_Handle::GetLocation(), CProt_ref_Base::GetProcessed(), CSeq_feat_Handle::GetProduct(), CSeqFeatData_Base::GetProt(), GetProteinLocationFromNucleotideLocation(), CSeq_feat_Handle::GetScope(), CScope::GetSeq_annotHandle(), CSeq_feat_Handle::GetSeq_feat(), CBioseq_Handle::IsAa(), NStr::IsBlank(), CSeqFeatData_Base::IsImp(), CSeqFeatData_Base::IsProt(), CBioseq_Base::IsSetAnnot(), CSeq_feat_Base::IsSetComment(), CImp_feat_Base::IsSetKey(), CProt_ref_Base::IsSetName(), CProt_ref_Base::IsSetProcessed(), CSeq_feat_Base::IsSetProduct(), CSeq_feat_Handle::IsSetProduct(), ITERATE, makeCleanupChange(), orig, CSeq_feat_EditHandle::Replace(), RescueProtProductQual(), CSeq_feat_Base::ResetComment(), CSeq_feat_Base::ResetLocation(), CSeq_feat_Base::ResetProduct(), s_GetCdsByLocation(), s_GetCdsByProduct(), s_IsPreprotein(), s_ProcessedFromKey(), CSeq_feat_Base::SetData(), SetFeaturePartial(), CSeq_feat_Base::SetLocation(), CNewCleanup_imp::SetScope(), CNewCleanup_imp::ShouldRemoveAnnot(), CSeq_annot_EditHandle::TakeFeat(), and CSeqFeatData_Base::Which().

Referenced by MoveProteinSpecificFeats().

◆ MoveOneFeatToPubdesc()

void CCleanup::MoveOneFeatToPubdesc ( CSeq_feat_Handle  feat,
CRef< CSeqdesc d,
CBioseq_Handle  b,
bool  remove_feat = true 
)
static

◆ MoveProteinSpecificFeats()

bool CCleanup::MoveProteinSpecificFeats ( CSeq_entry_Handle  seh)
static

Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein sequence.

Parameters
sehSeq-entry Handle to edit [in]
Returns
Boolean return value indicates whether any changes were made

Definition at line 724 of file cleanup.cpp.

References CSeqFeatData_Base::e_Bond, CSeqFeatData_Base::e_Imp, CSeqFeatData_Base::e_Prot, CSeqFeatData_Base::e_Psec_str, CSeq_inst_Base::eMol_na, SAnnotSelector::IncludeFeatType(), and MoveFeatToProtein().

Referenced by s_MoveProteinSpecificFeats(), and CNewCleanup_imp::x_ExtendedCleanupExtra().

◆ NormalizeDescriptorOrder() [1/2]

bool CCleanup::NormalizeDescriptorOrder ( CSeq_descr descr)
static

Normalize Descriptor Order on a specific Seq-entry.

Parameters
entrySeq-entry to edit
Returns
Boolean return value indicates whether object was updated

Definition at line 3000 of file cleanup.cpp.

References s_SeqDescLessThan(), seq_mac_is_sorted(), and CSeq_descr_Base::Set().

Referenced by ConvertSrcFeatsToSrcDescs(), CCleanupApp::HandleSeqEntry(), MoveOneFeatToPubdesc(), NormalizeDescriptorOrder(), RenormalizeNucProtSets(), WGSCleanup(), CCleanupHugeAsnReader::x_CleanupTopLevelDescriptors(), CCleanupApp::x_ProcessXOptions(), and CNewCleanup_imp::x_SortSeqDescs().

◆ NormalizeDescriptorOrder() [2/2]

bool CCleanup::NormalizeDescriptorOrder ( CSeq_entry_Handle  seh)
static

Normalize Descriptor Order on a specific Seq-entry.

Parameters
sehSeq-entry-Handle to edit
Returns
Boolean return value indicates whether object was updated

Definition at line 3010 of file cleanup.cpp.

References CSeq_entry_CI::fIncludeGivenEntry, CSeq_entry_CI::fRecursive, and NormalizeDescriptorOrder().

◆ NormalizeGeneQuals() [1/3]

bool CCleanup::NormalizeGeneQuals ( CBioseq_Handle  bsh)
static

◆ NormalizeGeneQuals() [2/3]

bool CCleanup::NormalizeGeneQuals ( CSeq_entry_Handle  seh)
static

Definition at line 224 of file gene_qual_normalization.cpp.

References CSeq_inst_Base::eMol_na, and NormalizeGeneQuals().

◆ NormalizeGeneQuals() [3/3]

bool CCleanup::NormalizeGeneQuals ( CSeq_feat cds,
CSeq_feat gene 
)
static

◆ OkToPromoteNpPub() [1/2]

bool CCleanup::OkToPromoteNpPub ( const CBioseq b)
static

For some sequences, pubs should not be promoted to nuc-prot set from sequence.

Definition at line 3330 of file cleanup.cpp.

References b, and ITERATE.

◆ OkToPromoteNpPub() [2/2]

bool CCleanup::OkToPromoteNpPub ( const CPubdesc pd)
static

Some pubs should not be promoted to nuc-prot set from sequence.

Definition at line 3343 of file cleanup.cpp.

References CPubdesc_Base::IsSetComment(), CPubdesc_Base::IsSetFig(), CPubdesc_Base::IsSetName(), and CPubdesc_Base::IsSetNum().

Referenced by MoveOneFeatToPubdesc(), and CNewCleanup_imp::x_MoveNpPub().

◆ operator=()

CCleanup& CCleanup::operator= ( const CCleanup )
delete

◆ ParseCodeBreak()

bool CCleanup::ParseCodeBreak ( const CSeq_feat feat,
CCdregion cds,
const CTempString str,
CScope scope,
IObjtoolsListener pMessageListener = nullptr 
)
static

Parse string into code break and add to coding region.

Parameters
featfeature that contains coding region - necessary to determine codon boundaries
cdscoding region to which code breaks will be added
strstring from which to parse code break
scopescope in which to find sequences referenced (used for location comparisons)
Returns
bool indicates string was successfully parsed and code break was added

Definition at line 4425 of file cleanup.cpp.

References Compare(), CCleanupMessage::eCodeBreak, eContained, eDiag_Error, eNa_strand_minus, eNa_strand_plus, fCompareOverlapping, FIELD_IS_SET, NStr::Find(), GET_FIELD, CSeq_loc::GetId(), GetLength(), CSeq_feat_Base::GetLocation(), isalpha(), CSeq_loc_Base::IsInt(), CSeq_loc_Base::IsPnt(), CSeq_feat_Base::IsSetLocation(), isspace(), len, msg(), NPOS, IObjtoolsListener::PutMessage(), ReadLocFromText(), RESET_FIELD, CCode_break_Base::SetAa(), CCdregion_Base::SetCode_break(), CCode_break_Base::SetLoc(), CCode_break_Base::C_Aa::SetNcbieaa(), CSeq_loc::SetStrand(), str(), NStr::TruncateSpaces_Unsafe(), and x_ValidAminoAcid().

Referenced by ParseCodeBreaks().

◆ ParseCodeBreaks()

bool CCleanup::ParseCodeBreaks ( CSeq_feat feat,
CScope scope 
)
static

Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except Gb-quals that were successfully parsed.

Parameters
featfeature that contains coding region
scopescope in which to find sequences referenced (used for location comparisons)
Returns
bool indicates changes were made

Definition at line 4556 of file cleanup.cpp.

References NStr::EqualNocase(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetQual(), CSeqFeatData_Base::IsCdregion(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::IsSetQual(), ParseCodeBreak(), CSeq_feat_Base::ResetQual(), CSeq_feat_Base::SetData(), and CSeq_feat_Base::SetQual().

Referenced by CNewCleanup_imp::x_CleanSeqFeatQuals(), CImportFeatTable::x_DoImportCDS(), and CFeatureTableReader::xMoveCdRegions().

◆ PubAlreadyInSet()

bool CCleanup::PubAlreadyInSet ( const CPubdesc pd,
const CSeq_descr descr 
)
static

◆ RemoveBadECNumbers()

bool CCleanup::RemoveBadECNumbers ( CProt_ref::TEc ec_num_list)
static

Delete EC numbers.

Parameters
ec_num_listProt-ref ec number list to clean
Returns
Boolean value indicates whether any changes were made

Definition at line 1689 of file cleanup.cpp.

References CleanVisStringJunk(), CProt_ref::eEC_deleted, CProt_ref::eEC_unknown, CProt_ref::GetECNumberStatus(), and CProt_ref::IsECNumberSplit().

Referenced by FixECNumbers().

◆ RemoveDupBioSource()

bool CCleanup::RemoveDupBioSource ( CSeq_descr descr)
static

Remove duplicate biosource descriptors.

Definition at line 3794 of file cleanup.cpp.

References ITERATE, and CSeq_descr_Base::Set().

Referenced by ConvertSrcFeatsToSrcDescs().

◆ RemoveDuplicatePubs()

bool CCleanup::RemoveDuplicatePubs ( CSeq_descr descr)
static

Remove duplicate publications.

Definition at line 3281 of file cleanup.cpp.

References CSeq_descr_Base::Set().

Referenced by MoveOneFeatToPubdesc(), and CNewCleanup_imp::x_RemoveDupPubs().

◆ RemoveNcbiCleanupObject()

bool CCleanup::RemoveNcbiCleanupObject ( CSeq_entry seq_entry)
static

◆ RemoveNonsuppressingGeneXrefs()

bool CCleanup::RemoveNonsuppressingGeneXrefs ( CSeq_feat f)
static

Removes non-suppressing Gene-xrefs.

Parameters
fSeq-feat to edit [in]
Returns
Boolean return value indicates whether gene-xrefs were removed

Definition at line 828 of file cleanup.cpp.

References f.

Referenced by CRemoveGeneXrefs::GetCommand(), and CRemoveGeneXrefs::RemoveNonsuppressing().

◆ RemoveOrphanLocus_tagGeneXrefs()

bool CCleanup::RemoveOrphanLocus_tagGeneXrefs ( CSeq_feat f,
CBioseq_Handle  bsh 
)
static

Removes orphaned locus_tag Gene-xrefs.

Parameters
fSeq-feat to edit [in]
bshCBioseq_Handle in which to search for gene features [in]
Returns
Boolean return value indicates whether gene-xrefs were removed

Definition at line 1013 of file cleanup.cpp.

References f, and FindMatchingLocus_tagGene().

Referenced by CRemoveGeneXrefs::RemoveOrphanLocus_tag().

◆ RemoveOrphanLocusGeneXrefs()

bool CCleanup::RemoveOrphanLocusGeneXrefs ( CSeq_feat f,
CBioseq_Handle  bsh 
)
static

Removes orphaned locus Gene-xrefs.

Parameters
fSeq-feat to edit [in]
bshCBioseq_Handle in which to search for gene features [in]
Returns
Boolean return value indicates whether gene-xrefs were removed

Definition at line 965 of file cleanup.cpp.

References f, and FindMatchingLocusGene().

Referenced by CRemoveGeneXrefs::RemoveOrphanLocus().

◆ RemovePseudoProduct()

bool CCleanup::RemovePseudoProduct ( CSeq_feat cds,
CScope scope 
)
static

Removes protein product from pseudo coding region.

Parameters
cdsSeq-feat to adjust
scopeScope in which to find protein sequence and remove it
Returns
Boolean indicates whether anything changed

Definition at line 2537 of file cleanup.cpp.

References CSeqFeatData::eSubtype_prot, CScope::GetBioseqHandle(), CSeq_feat_Base::GetComment(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetProduct(), NStr::IsBlank(), CSeqFeatData_Base::IsCdregion(), IsPseudo(), CSeq_feat_Base::IsSetComment(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetProduct(), label, prot, CBioseq_EditHandle::Remove(), CSeq_feat_Base::ResetProduct(), and CSeq_feat_Base::SetComment().

Referenced by CNewCleanup_imp::CdRegionEC(), and WGSCleanup().

◆ RemoveUnnecessaryGeneXrefs() [1/2]

bool CCleanup::RemoveUnnecessaryGeneXrefs ( CSeq_entry_Handle  seh)
static

Removes unnecessary Gene-xrefs on features in Seq-entry.

Parameters
sehSeq-entry-Handle to edit [in]
Returns
Boolean return value indicates whether gene-xrefs were removed

Definition at line 804 of file cleanup.cpp.

References CSerialObject::Assign(), CSeq_entry_Handle::GetScope(), RemoveUnnecessaryGeneXrefs(), and CSeq_feat_EditHandle::Replace().

◆ RemoveUnnecessaryGeneXrefs() [2/2]

bool CCleanup::RemoveUnnecessaryGeneXrefs ( CSeq_feat f,
CScope scope 
)
static

Removes unnecessary Gene-xrefs.

Parameters
fSeq-feat to edit [in]
scopeScope in which to search for locations [in]
Returns
Boolean return value indicates whether gene-xrefs were removed

Definition at line 779 of file cleanup.cpp.

References f, and IsGeneXrefUnnecessary().

Referenced by CRemoveGeneXrefs::GetCommand(), CRemoveGeneXrefs::RemoveUnnecessary(), RemoveUnnecessaryGeneXrefs(), and CCleanupApp::x_ProcessFeatureOptions().

◆ RemoveUnseenTitles() [1/2]

bool CCleanup::RemoveUnseenTitles ( CSeq_entry_EditHandle::TSeq  seq)
static

Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile.

Parameters
seqBioseq-Handle to edit
Returns
Boolean return value indicates whether any titles were removed

Definition at line 3027 of file cleanup.cpp.

References CSeq_descr_Base::Get(), CBioseq_Handle::GetDescr(), CBioseq_Handle::IsSetDescr(), ITERATE, CBioseq_EditHandle::RemoveSeqdesc(), and CConstRef< C, Locker >::Reset().

Referenced by RenormalizeNucProtSets().

◆ RemoveUnseenTitles() [2/2]

bool CCleanup::RemoveUnseenTitles ( CSeq_entry_EditHandle::TSet  set)
static

Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile.

Parameters
setBioseq-set-Handle to edit
Returns
Boolean return value indicates whether any titles were removed

Definition at line 3046 of file cleanup.cpp.

References ITERATE, and CConstRef< C, Locker >::Reset().

◆ RenormalizeNucProtSets()

bool CCleanup::RenormalizeNucProtSets ( CSeq_entry_Handle  seh)
static

◆ RepackageProteins() [1/2]

bool CCleanup::RepackageProteins ( const CSeq_feat cds,
CBioseq_set_Handle  np 
)
static

◆ RepackageProteins() [2/2]

bool CCleanup::RepackageProteins ( CSeq_entry_Handle  seh)
static

Find proteins that are not packaged in the same nuc-prot set as the coding region for which they are a product, and move them to that nuc-prot set.

Ignore coding regions that are in gen-prod-sets.

Parameters
sehSeq-entry to edit
Returns
bool indicates whether any changes were made

Definition at line 4385 of file cleanup.cpp.

References CSeq_entry_Base::e_Set, CBioseq_set_Base::eClass_nuc_prot, CSeq_entry_CI::fIncludeGivenEntry, CSeq_entry_CI::fRecursive, ITERATE, and si.

Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().

◆ RepairXrefs() [1/3]

bool CCleanup::RepairXrefs ( const CSeq_feat f,
const CTSE_Handle tse 
)
static

Repairs non-reciprocal xref pairs for specified feature if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype.

Parameters
fSeq-feat to edit [in]
tsetop-level Seq-entry in which to search for the other half of the xref pair
Returns
Boolean return value indicates whether xrefs were created

Definition at line 905 of file cleanup.cpp.

References CSeqFeatData_Base::e_not_set, f, CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), and ITERATE.

Referenced by CSequenceEditingEventHandler::FixNonReciprocalLinks(), RepairXrefs(), and CNewCleanup_imp::x_ExtendedCleanupExtra().

◆ RepairXrefs() [2/3]

bool CCleanup::RepairXrefs ( const CSeq_feat src,
CSeq_feat_Handle dst,
const CTSE_Handle tse 
)
static

Repairs non-reciprocal xref pairs for specified feature pair if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype.

Parameters
fSeq-feat to edit [in]
tsetop-level Seq-entry in which to search for the other half of the xref pair
Returns
Boolean return value indicates whether xrefs were created

Definition at line 854 of file cleanup.cpp.

References CSeq_feat::AddSeqFeatXref(), CSeqFeatData::AllowXref(), CSerialObject::Assign(), CSeqFeatData_Base::e_not_set, CSeq_feat_Handle::GetAnnot(), CSeq_feat_Base::GetData(), CSeq_feat_Handle::GetData(), CSeq_annot_Handle::GetEditHandle(), CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), CSeq_feat_Base::GetId(), CSeq_feat_Handle::GetSeq_feat(), CSeqFeatData::GetSubtype(), CSeq_feat_Handle::GetXref(), CSeqFeatData_Base::IsGene(), CFeat_id_Base::IsLocal(), CSeq_feat_Base::IsSetId(), CSeq_feat_Handle::IsSetXref(), ITERATE, and CSeq_feat_EditHandle::Replace().

◆ RepairXrefs() [3/3]

bool CCleanup::RepairXrefs ( CSeq_entry_Handle  seh)
static

Repairs non-reciprocal xref pairs in specified seq-entry.

Parameters
sehSeq-entry to edit [in]
Returns
Boolean return value indicates whether xrefs were created

Definition at line 926 of file cleanup.cpp.

References CMappedFeat::GetSeq_feat(), CSeq_entry_Handle::GetTSE_Handle(), and RepairXrefs().

◆ RescueSiteRefPubs()

bool CCleanup::RescueSiteRefPubs ( CSeq_entry_Handle  seh)
static

◆ ResetAuthorNames()

void CCleanup::ResetAuthorNames ( CAuth_list::TNames names)
static

Definition at line 567 of file cleanup_author.cpp.

References names.

Referenced by CleanupAuthList().

◆ s_AddNumToUserField()

bool CCleanup::s_AddNumToUserField ( CUser_field field)
staticprivate

◆ s_CleanupDBLink()

bool CCleanup::s_CleanupDBLink ( CUser_object obj)
staticprivate

◆ s_CleanupGeneOntology()

bool CCleanup::s_CleanupGeneOntology ( CUser_object obj)
staticprivate

◆ s_CleanupGenomeAssembly()

bool CCleanup::s_CleanupGenomeAssembly ( CUser_object obj)
staticprivate

◆ s_CleanupNameStdBC()

bool CCleanup::s_CleanupNameStdBC ( CName_std name,
bool  fix_initials 
)
staticprivate

◆ s_CleanupStructuredComment()

bool CCleanup::s_CleanupStructuredComment ( CUser_object obj)
staticprivate

◆ s_ExtractSuffixFromInitials()

void CCleanup::s_ExtractSuffixFromInitials ( CName_std name)
staticprivate

◆ s_FixEtAl()

void CCleanup::s_FixEtAl ( CName_std name)
staticprivate

◆ s_Flatten()

static bool CCleanup::s_Flatten ( CPub_equiv pub_equiv)
staticprivate

◆ s_IsProductOnFeat()

bool CCleanup::s_IsProductOnFeat ( const CSeq_feat cds)
staticprivate

◆ s_RemoveEmptyFields()

bool CCleanup::s_RemoveEmptyFields ( CUser_object obj)
staticprivate

◆ s_SetProductOnFeat()

void CCleanup::s_SetProductOnFeat ( CSeq_feat feat,
const string protein_name,
bool  append 
)
staticprivate

◆ SeqLocExtend()

bool CCleanup::SeqLocExtend ( CSeq_loc loc,
size_t  pos,
CScope scope 
)
static

Extends a location to the specificed position.

Parameters
locSeq-loc to extend
posposition of new end of location
scopeScope in which to look for sequences
Returns
Boolean return value indicates whether the location was extended

Definition at line 1038 of file cleanup.cpp.

References CSeq_loc::Assign(), eExtreme_Positional, CSeq_loc::fMerge_AbuttingOnly, CSeq_loc::fSort, CSeq_loc::GetId(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeq_loc::IsPartialStart(), CSeq_loc::IsPartialStop(), Seq_loc_Add(), CSeq_loc::SetPartialStart(), and CSeq_loc::SetPartialStop().

Referenced by ExtendToGapsOrEnds().

◆ SetBestFrame()

bool CCleanup::SetBestFrame ( CSeq_feat cds,
CScope scope 
)
static

Translates coding region and selects best frame (without stops, or longest)

Parameters
cdsCoding region Seq-feat to edit
scopeScope in which to find coding region
Returns
Boolean return value indicates whether the coding region was changed

Definition at line 1194 of file cleanup.cpp.

References CCdregion_Base::eFrame_not_set, CSeqTranslator::FindBestFrame(), CSeqFeatData_Base::GetCdregion(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CCdregion_Base::IsSetFrame(), and CSeq_feat_Base::SetData().

Referenced by WGSCleanup().

◆ SetCDSPartialsByFrameAndTranslation()

bool CCleanup::SetCDSPartialsByFrameAndTranslation ( CSeq_feat cds,
CScope scope 
)
static

1.

Set the partial flags when the CDS is partial and codon_start is 2 or 3 2. Make the CDS partial at the 5' end if there is no start codon 3. Make the CDS partial at the 3' end if there is no stop codon

Parameters
cdsCoding region Seq-feat to edit
scopeScope in which to find coding region and coding region's protein product sequence
Returns
Boolean return value indicates whether the coding region changed

Definition at line 1513 of file cleanup.cpp.

References AdjustFeaturePartialFlagForLocation(), eExtreme_Biological, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_one, NStr::EndsWith(), CSeqFeatData_Base::GetCdregion(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CSeq_feat_Base::GetLocation(), NStr::IsBlank(), CSeq_loc::IsPartialStart(), CSeq_loc::IsPartialStop(), CCdregion_Base::IsSetFrame(), CSeq_feat_Base::SetLocation(), NStr::StartsWith(), and CSeqTranslator::Translate().

Referenced by WGSCleanup().

◆ SetCodeBreakLocation()

void CCleanup::SetCodeBreakLocation ( CCode_break cb,
size_t  pos,
const CSeq_feat cds 
)
static

◆ SetFeaturePartial()

bool CCleanup::SetFeaturePartial ( CSeq_feat f)
static

Set feature partial based on feature location.

Definition at line 1633 of file cleanup.cpp.

References f, CSeq_loc_CI::GetFuzzFrom(), and CSeq_loc_CI::GetFuzzTo().

Referenced by MoveFeatToProtein().

◆ SetFrameFromLoc() [1/2]

bool CCleanup::SetFrameFromLoc ( CCdregion cdregion,
const CSeq_loc loc,
CScope scope 
)
static

Chooses best frame based on location 1.

If the location is 5' complete, then the frame must be one. 2. If the location is 5' partial and 3' complete, select a frame using the value of the location length modulo 3.

Parameters
cdregionCoding Region in which to set frame
locLocation to use for setting frame
scopeScope in which to find location sequence(s)
Returns
Boolean return value indicates whether the frame was changed

Definition at line 1253 of file cleanup.cpp.

References CCdregion_Base::eFrame_not_set, CCdregion_Base::GetFrame(), CCdregion_Base::IsSetFrame(), and CCdregion_Base::SetFrame().

Referenced by CkCdRegion(), CNewCleanup_imp::ImpFeatBC(), and CCleanupApp::x_FixCDS().

◆ SetFrameFromLoc() [2/2]

bool CCleanup::SetFrameFromLoc ( CCdregion::EFrame frame,
const CSeq_loc loc,
CScope scope 
)
static

◆ SetGenePartialByLongestContainedFeature()

bool CCleanup::SetGenePartialByLongestContainedFeature ( CSeq_feat gene,
CScope scope 
)
static

Set partialness of gene to match longest feature contained in gene.

Parameters
geneSeq-feat to edit
scopeScope in which to find gene
Returns
Boolean return value indicates whether the gene changed

Definition at line 1739 of file cleanup.cpp.

References Compare(), CopyFeaturePartials(), eContains, eSame, fCompareOverlapping, CScope::GetBioseqHandle(), CSeq_feat_Handle::GetData(), GetLength(), CSeq_feat_Base::GetLocation(), CMappedFeat::GetLocation(), CMappedFeat::GetSeq_feat(), CSeqFeatData_Base::IsGene(), len, and CConstRef< C, Locker >::Reset().

Referenced by WGSCleanup().

◆ SetGeneticCodes()

bool CCleanup::SetGeneticCodes ( CBioseq_Handle  bsh)
static

◆ SetMolinfoBiomol()

bool CCleanup::SetMolinfoBiomol ( CBioseq_Handle  seq,
CMolInfo::EBiomol  biomol 
)
static

Sets MolInfo::biomol for a sequence.

Parameters
seqBioseq to edit
biomolbiomol value to set
Returns
Boolean biomol was changed

Definition at line 1802 of file cleanup.cpp.

References CBioseq_EditHandle::AddSeqdesc(), CSeqdesc_Base::e_Molinfo, CMolInfo_Base::GetBiomol(), CBioseq_Handle::GetEditHandle(), CSeqdesc_Base::GetMolinfo(), CMolInfo_Base::IsSetTech(), CMolInfo_Base::SetBiomol(), and CSeqdesc_Base::SetMolinfo().

◆ SetMolinfoTech()

bool CCleanup::SetMolinfoTech ( CBioseq_Handle  seq,
CMolInfo::ETech  tech 
)
static

◆ SetMrnaName()

void CCleanup::SetMrnaName ( CSeq_feat mrna,
const string protein_name 
)
static

◆ SetProteinName() [1/2]

void CCleanup::SetProteinName ( CProt_ref prot,
const string protein_name,
bool  append 
)
static

◆ SetProteinName() [2/2]

void CCleanup::SetProteinName ( CSeq_feat cds,
const string protein_name,
bool  append,
CScope scope 
)
static

◆ SetScope()

void CCleanup::SetScope ( CScope scope)

◆ ShouldStripPubSerial()

bool CCleanup::ShouldStripPubSerial ( const CBioseq bs)
static

◆ TaxonomyLookup()

bool CCleanup::TaxonomyLookup ( CSeq_entry_Handle  seh)
static

◆ UpdateECNumbers()

bool CCleanup::UpdateECNumbers ( CProt_ref::TEc ec_num_list)
static

Update EC numbers.

Parameters
ec_num_listProt-ref ec number list to clean
Returns
Boolean value indicates whether any changes were made

Definition at line 1663 of file cleanup.cpp.

References CleanVisStringJunk(), CProt_ref::eEC_replaced, CProt_ref::GetECNumberReplacement(), CProt_ref::GetECNumberStatus(), NStr::IsBlank(), CProt_ref::IsECNumberSplit(), and NON_CONST_ITERATE.

Referenced by FixECNumbers(), and CNewCleanup_imp::x_CleanupECNumberListEC().

◆ ValidAminoAcid()

char CCleanup::ValidAminoAcid ( string_view  abbrev)
static

Definition at line 4974 of file cleanup.cpp.

References x_ValidAminoAcid().

Referenced by CkQualPosSeqaa(), and GetQualValueAa().

◆ WGSCleanup()

bool CCleanup::WGSCleanup ( CSeq_entry_Handle  entry,
bool  instantiate_missing_proteins = true,
Uint4  options = 0,
bool  run_extended_cleanup = true 
)
static

Performs WGS specific cleanup.

Parameters
entrySeq-entry to edit
Returns
Boolean return value indicates whether object was updated

Definition at line 2653 of file cleanup.cpp.

References AddProtein(), AddProteinFeature(), AdjustForCDSPartials(), CSerialObject::Assign(), CopyFeaturePartials(), CSeqFeatData_Base::e_Cdregion, CSeqFeatData_Base::e_Gene, CSeqFeatData_Base::e_Rna, eExtreme_Biological, CSeq_inst_Base::eMol_na, eNa_strand_minus, NStr::Equal(), NStr::EqualNocase(), CSeqFeatData::eSubtype_prot, CSeqFeatData::eSubtype_rRNA, ExpandGeneToIncludeChildren(), ExtendedCleanup(), CAliasBase< TPrim >::Get(), CScope::GetBioseqHandle(), CScope::GetBioseqHandleFromTSE(), CBioseq_Handle::GetCompleteBioseq(), CSeq_feat_Base::GetData(), CSeq_loc::GetId(), CBioseq_Handle::GetInst(), CSeq_data_Base::GetIupacaa(), GetLength(), CSeq_feat_Base::GetLocation(), GetmRNAforCDS(), GetNewProtId(), CSeq_feat_Base::GetProduct(), GetProteinName(), CSeq_feat_Base::GetQual(), CSeqFeatData_Base::GetRna(), CRNA_ref::GetRnaProductName(), CSeq_entry_Handle::GetScope(), CSeq_inst_Base::GetSeq_data(), CScope::GetSeq_featHandle(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeqFeatData::GetSubtype(), CSeq_entry_Handle::GetTopLevelEntry(), CSeq_entry_Handle::GetTSE_Handle(), NStr::IsBlank(), IsGeneralIdProtPresent(), CSeq_data_Base::IsIupacaa(), IsPseudo(), CSeq_feat_Base::IsSetData(), CBioseq_Handle::IsSetInst(), CSeq_feat_Base::IsSetProduct(), CSeq_feat_Base::IsSetQual(), CSeq_inst_Base::IsSetSeq_data(), CSeq_loc::IsSetStrand(), NormalizeDescriptorOrder(), prot, RemovePseudoProduct(), CSeq_feat_EditHandle::Replace(), RetranslateCDS(), s_CleanupIsShortrRNA(), SetBestFrame(), SetCDSPartialsByFrameAndTranslation(), SetGenePartialByLongestContainedFeature(), SetGeneticCodes(), CBioseq_EditHandle::SetInst(), CSeq_feat_Base::SetLocation(), SetMrnaName(), CSeq_feat_Base::SetProduct(), SetProteinName(), and CSeq_inst_Base::SetSeq_data().

Referenced by CTable2AsnValidator::Cleanup(), and CCleanupApp::x_ProcessXOptions().

◆ x_AddLowQualityException() [1/2]

bool CCleanup::x_AddLowQualityException ( CSeq_entry_Handle  entry,
CSeqFeatData::ESubtype  subtype 
)
staticprivate

◆ x_AddLowQualityException() [2/2]

bool CCleanup::x_AddLowQualityException ( CSeq_feat feat)
staticprivate

◆ x_CleanupUserField()

bool CCleanup::x_CleanupUserField ( CUser_field field)
staticprivate

◆ x_HasShortIntron()

bool CCleanup::x_HasShortIntron ( const CSeq_loc loc,
size_t  min_len = 11 
)
staticprivate

◆ x_MergeDupOrgNames()

bool CCleanup::x_MergeDupOrgNames ( COrgName on1,
const COrgName add 
)
staticprivate

◆ x_MergeDupOrgRefs()

bool CCleanup::x_MergeDupOrgRefs ( COrg_ref org1,
const COrg_ref add 
)
staticprivate

Member Data Documentation

◆ m_Scope

CRef<CScope> CCleanup::m_Scope
private

Definition at line 613 of file cleanup.hpp.

Referenced by CCleanup(), and SetScope().


The documentation for this class was generated from the following files:
Modified on Fri Sep 20 14:58:18 2024 by modify_doxy.py rev. 669887