NCBI C++ ToolKit
|
Search Toolkit Book for CCleanup
#include <objtools/cleanup/cleanup.hpp>
Public Types | |
enum | EValidOptions { eClean_NoReporting = 0x1 , eClean_GpipeMode = 0x2 , eClean_NoNcbiUserObjects = 0x4 , eClean_SyncGenCodes = 0x8 , eClean_NoProteinTitles = 0x10 , eClean_KeepTopSet = 0x20 , eClean_KeepSingleSeqSet = 0x40 , eClean_InHugeSeqSet = 0x80 } |
enum | EScopeOptions { eScope_Copy , eScope_UseInPlace } |
using | TChanges = CConstRef< CCleanupChange > |
typedef pair< CSeq_feat_Handle, CSeq_feat_Handle > | TFeatGenePair |
Public Types inherited from CObject | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
Public Member Functions | |
CCleanup (CScope *scope=nullptr, EScopeOptions scope_handling=eScope_Copy) | |
CCleanup (const CCleanup &)=delete | |
CCleanup & | operator= (const CCleanup &)=delete |
~CCleanup () | |
void | SetScope (CScope *scope) |
TChanges | BasicCleanup (CSeq_entry &se, Uint4 options=0) |
TChanges | BasicCleanup (CSeq_submit &ss, Uint4 options=0) |
Cleanup a Seq-submit. More... | |
TChanges | BasicCleanup (CBioseq_set &bss, Uint4 options=0) |
Cleanup a Bioseq_set. More... | |
TChanges | BasicCleanup (CSeq_annot &sa, Uint4 options=0) |
Cleanup a Seq-Annot. More... | |
TChanges | BasicCleanup (CSeq_feat &sf, Uint4 options=0) |
Cleanup a Seq-feat. More... | |
TChanges | BasicCleanup (CBioSource &src, Uint4 options=0) |
Cleanup a BioSource. More... | |
TChanges | BasicCleanup (CSubmit_block &block, Uint4 options=0) |
TChanges | BasicCleanup (CSeqdesc &desc, Uint4 options=0) |
TChanges | BasicCleanup (CSeq_descr &desc, Uint4 options=0) |
TChanges | BasicCleanup (CSeq_entry_Handle &seh, Uint4 options=0) |
TChanges | BasicCleanup (CBioseq_Handle &bsh, Uint4 options=0) |
TChanges | BasicCleanup (CBioseq_set_Handle &bssh, Uint4 options=0) |
TChanges | BasicCleanup (CSeq_annot_Handle &sak, Uint4 options=0) |
TChanges | BasicCleanup (CSeq_feat_Handle &sfh, Uint4 options=0) |
TChanges | ExtendedCleanup (CSeq_entry &se, Uint4 options=0) |
Cleanup a Seq-entry. More... | |
TChanges | ExtendedCleanup (CSeq_submit &ss, Uint4 options=0) |
Cleanup a Seq-submit. More... | |
TChanges | ExtendedCleanup (CSeq_annot &sa, Uint4 options=0) |
Cleanup a Seq-Annot. More... | |
Public Member Functions inherited from CObject | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
virtual void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Define method for dumping debug information. More... | |
Public Member Functions inherited from CDebugDumpable | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Static Public Member Functions | |
static TChanges | ExtendedCleanup (CSeq_entry_Handle &seh, Uint4 options=0) |
static bool | ShouldStripPubSerial (const CBioseq &bs) |
static bool | MoveProteinSpecificFeats (CSeq_entry_Handle seh) |
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein sequence. More... | |
static bool | MoveFeatToProtein (CSeq_feat_Handle fh) |
Moves one feature from nucleotide bioseq to the appropriate protein sequence. More... | |
static bool | IsGeneXrefUnnecessary (const CSeq_feat &sf, CScope &scope, const CGene_ref &gene_xref) |
Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calculated using overlap) More... | |
static bool | RemoveUnnecessaryGeneXrefs (CSeq_feat &f, CScope &scope) |
Removes unnecessary Gene-xrefs. More... | |
static bool | RemoveUnnecessaryGeneXrefs (CSeq_entry_Handle seh) |
Removes unnecessary Gene-xrefs on features in Seq-entry. More... | |
static bool | RemoveNonsuppressingGeneXrefs (CSeq_feat &f) |
Removes non-suppressing Gene-xrefs. More... | |
static bool | RepairXrefs (const CSeq_feat &f, const CTSE_Handle &tse) |
Repairs non-reciprocal xref pairs for specified feature if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype. More... | |
static bool | RepairXrefs (const CSeq_feat &src, CSeq_feat_Handle &dst, const CTSE_Handle &tse) |
Repairs non-reciprocal xref pairs for specified feature pair if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype. More... | |
static bool | RepairXrefs (CSeq_entry_Handle seh) |
Repairs non-reciprocal xref pairs in specified seq-entry. More... | |
static bool | FindMatchingLocusGene (CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh) |
Detects gene features with matching locus. More... | |
static bool | RemoveOrphanLocusGeneXrefs (CSeq_feat &f, CBioseq_Handle bsh) |
Removes orphaned locus Gene-xrefs. More... | |
static bool | FindMatchingLocus_tagGene (CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh) |
Detects gene features with matching locus_tag. More... | |
static bool | RemoveOrphanLocus_tagGeneXrefs (CSeq_feat &f, CBioseq_Handle bsh) |
Removes orphaned locus_tag Gene-xrefs. More... | |
static bool | SeqLocExtend (CSeq_loc &loc, size_t pos, CScope &scope) |
Extends a location to the specificed position. More... | |
static bool | ExtendToStopIfShortAndNotPartial (CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true) |
Extends a coding region up to 50 nt. More... | |
static bool | LocationMayBeExtendedToMatch (const CSeq_loc &orig, const CSeq_loc &improved) |
Checks whether it is possible to extend the original location up to improved one. More... | |
static bool | ExtendToStopCodon (CSeq_feat &f, CBioseq_Handle bsh, size_t limit) |
Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partial will be set if location extends to end of sequence but no stop codon is found) More... | |
static bool | ExtendStopPosition (CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0) |
static bool | SetBestFrame (CSeq_feat &cds, CScope &scope) |
Translates coding region and selects best frame (without stops, or longest) More... | |
static bool | SetFrameFromLoc (CCdregion &cdregion, const CSeq_loc &loc, CScope &scope) |
Chooses best frame based on location 1. More... | |
static bool | SetFrameFromLoc (CCdregion::EFrame &frame, const CSeq_loc &loc, CScope &scope) |
static bool | SetCDSPartialsByFrameAndTranslation (CSeq_feat &cds, CScope &scope) |
1. More... | |
static bool | ClearInternalPartials (CSeq_loc &loc, bool is_first=true, bool is_last=true) |
Clear internal partials. More... | |
static bool | ClearInternalPartials (CSeq_loc_mix &mix, bool is_first=true, bool is_last=true) |
static bool | ClearInternalPartials (CPacked_seqint &pint, bool is_first=true, bool is_last=true) |
static bool | ClearInternalPartials (CSeq_entry_Handle seh) |
static bool | SetFeaturePartial (CSeq_feat &f) |
Set feature partial based on feature location. More... | |
static bool | UpdateECNumbers (CProt_ref::TEc &ec_num_list) |
Update EC numbers. More... | |
static bool | RemoveBadECNumbers (CProt_ref::TEc &ec_num_list) |
Delete EC numbers. More... | |
static bool | FixECNumbers (CSeq_entry_Handle entry) |
Fix EC numbers. More... | |
static bool | SetGenePartialByLongestContainedFeature (CSeq_feat &gene, CScope &scope) |
Set partialness of gene to match longest feature contained in gene. More... | |
static void | SetProteinName (CProt_ref &prot, const string &protein_name, bool append) |
static void | SetProteinName (CSeq_feat &cds, const string &protein_name, bool append, CScope &scope) |
static void | SetMrnaName (CSeq_feat &mrna, const string &protein_name) |
static const string & | GetProteinName (const CProt_ref &prot) |
static const string & | GetProteinName (const CSeq_feat &cds, CSeq_entry_Handle seh) |
static bool | SetMolinfoTech (CBioseq_Handle seq, CMolInfo::ETech tech) |
Sets MolInfo::tech for a sequence. More... | |
static bool | SetMolinfoBiomol (CBioseq_Handle seq, CMolInfo::EBiomol biomol) |
Sets MolInfo::biomol for a sequence. More... | |
static bool | AddMissingMolInfo (CBioseq &seq, bool is_product) |
Adds missing MolInfo descriptor to sequence. More... | |
static bool | AddProteinTitle (CBioseq_Handle bsh) |
Creates missing protein title descriptor. More... | |
static bool | RemoveNcbiCleanupObject (CSeq_entry &seq_entry) |
Removes NcbiCleanup User Objects in the Seq-entry. More... | |
static void | AddNcbiCleanupObject (int ncbi_cleanup_version, CSeq_descr &descr) |
Adds NcbiCleanup User Object to Seq-descr. More... | |
static bool | TaxonomyLookup (CSeq_entry_Handle seh) |
Looks up Org-refs in the Seq-entry. More... | |
static bool | SetGeneticCodes (CBioseq_Handle bsh) |
Sets genetic codes for coding regions on Bioseq-Handle. More... | |
static bool | AddPartialToProteinTitle (CBioseq &bioseq) |
Adjusts protein title to reflect partialness. More... | |
static bool | RemovePseudoProduct (CSeq_feat &cds, CScope &scope) |
Removes protein product from pseudo coding region. More... | |
static CRef< CSeq_entry > | AddProtein (const CSeq_feat &cds, CScope &scope) |
static bool | ExpandGeneToIncludeChildren (CSeq_feat &gene, CTSE_Handle &tse) |
Expands gene to include features it cross-references. More... | |
static bool | WGSCleanup (CSeq_entry_Handle entry, bool instantiate_missing_proteins=true, Uint4 options=0, bool run_extended_cleanup=true) |
Performs WGS specific cleanup. More... | |
static bool | AddLowQualityException (CSeq_entry_Handle entry) |
For table2asn -c s Adds an exception of "low-quality sequence region" to coding regions and mRNAs that are not pseudo and have an intron <11bp in length. More... | |
static bool | NormalizeDescriptorOrder (CSeq_descr &descr) |
Normalize Descriptor Order on a specific Seq-entry. More... | |
static bool | NormalizeDescriptorOrder (CSeq_entry_Handle seh) |
Normalize Descriptor Order on a specific Seq-entry. More... | |
static bool | RemoveUnseenTitles (CSeq_entry_EditHandle::TSeq seq) |
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile. More... | |
static bool | RemoveUnseenTitles (CSeq_entry_EditHandle::TSet set) |
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile. More... | |
static bool | AddGenBankWrapper (CSeq_entry_Handle seh) |
Add GenBank Wrapper Set. More... | |
static void | GetPubdescLabels (const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels) |
For Publication Citations Get labels for a pubdesc. More... | |
static vector< CConstRef< CPub > > | GetCitationList (CBioseq_Handle bsh) |
Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle. More... | |
static bool | RemoveDuplicatePubs (CSeq_descr &descr) |
Remove duplicate publications. More... | |
static bool | OkToPromoteNpPub (const CPubdesc &pd) |
Some pubs should not be promoted to nuc-prot set from sequence. More... | |
static bool | OkToPromoteNpPub (const CBioseq &b) |
For some sequences, pubs should not be promoted to nuc-prot set from sequence. More... | |
static bool | PubAlreadyInSet (const CPubdesc &pd, const CSeq_descr &descr) |
static bool | ConvertPubFeatsToPubDescs (CSeq_entry_Handle seh) |
Convert full-length publication features to publication descriptors. More... | |
static bool | RescueSiteRefPubs (CSeq_entry_Handle seh) |
Rescue pubs from Site-ref features. More... | |
static bool | IsMinPub (const CPubdesc &pd, bool is_refseq_prot) |
Is this a "minimal" pub? (If yes, do not rescue from a Seq-feat.cit) More... | |
static void | MoveOneFeatToPubdesc (CSeq_feat_Handle feat, CRef< CSeqdesc > d, CBioseq_Handle b, bool remove_feat=true) |
static bool | RemoveDupBioSource (CSeq_descr &descr) |
Remove duplicate biosource descriptors. More... | |
static CRef< CBioSource > | BioSrcFromFeat (const CSeq_feat &f) |
Get BioSource from feature to use for source descriptor. More... | |
static bool | AreBioSourcesMergeable (const CBioSource &src1, const CBioSource &src2) |
static bool | MergeDupBioSources (CSeq_descr &descr) |
static bool | MergeDupBioSources (CBioSource &src1, const CBioSource &add) |
static bool | ConvertSrcFeatsToSrcDescs (CSeq_entry_Handle seh) |
Convert full-length source features to source descriptors. More... | |
static bool | FixGeneXrefSkew (CSeq_entry_Handle seh) |
Examine all genes and gene xrefs in the Seq-entry. More... | |
static bool | RenormalizeNucProtSets (CSeq_entry_Handle seh) |
Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore phase because it changes a seq to a set. More... | |
static bool | DecodeXMLMarkChanged (std::string &str) |
decodes various tags, including carriage-return-line-feed constructs More... | |
static CRef< CSeq_loc > | GetProteinLocationFromNucleotideLocation (const CSeq_loc &nuc_loc, CScope &scope) |
static CRef< CSeq_loc > | GetProteinLocationFromNucleotideLocation (const CSeq_loc &nuc_loc, const CSeq_feat &cds, CScope &scope, bool require_inframe=false) |
static bool | RepackageProteins (CSeq_entry_Handle seh) |
Find proteins that are not packaged in the same nuc-prot set as the coding region for which they are a product, and move them to that nuc-prot set. More... | |
static bool | RepackageProteins (const CSeq_feat &cds, CBioseq_set_Handle np) |
static bool | ConvertDeltaSeqToRaw (CSeq_entry_Handle seh, CSeq_inst::EMol filter=CSeq_inst::eMol_not_set) |
static bool | ParseCodeBreak (const CSeq_feat &feat, CCdregion &cds, const CTempString &str, CScope &scope, IObjtoolsListener *pMessageListener=nullptr) |
Parse string into code break and add to coding region. More... | |
static bool | ParseCodeBreaks (CSeq_feat &feat, CScope &scope) |
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except Gb-quals that were successfully parsed. More... | |
static size_t | MakeSmallGenomeSet (CSeq_entry_Handle entry) |
static bool | MakeIRDFeatsFromSourceXrefs (CSeq_entry_Handle entry) |
From SQD-4329 For each sequence with a source that has an IRD db_xref, create a misc_feature across the entire span and move the IRD db_xref from the source to the misc_feature. More... | |
static bool | FixRNAEditingCodingRegion (CSeq_feat &cds) |
From GB-7563 An action has been requested that will do the following: 1. More... | |
static void | SetCodeBreakLocation (CCode_break &cb, size_t pos, const CSeq_feat &cds) |
utility function for setting code break location given offset pos is the position of the amino acid where the translation exception occurs (starts with 1) More... | |
static bool | IsMethionine (const CCode_break &cb) |
static CConstRef< CCode_break > | GetCodeBreakForLocation (size_t pos, const CSeq_feat &cds) |
utility function for finding the code break for a given amino acid position pos is the position of the amino acid where the translation exception occurs (starts with 1) More... | |
static bool | NormalizeGeneQuals (CSeq_feat &cds, CSeq_feat &gene) |
static bool | NormalizeGeneQuals (CBioseq_Handle bsh) |
static bool | NormalizeGeneQuals (CSeq_entry_Handle seh) |
static vector< TFeatGenePair > | GetNormalizableGeneQualPairs (CBioseq_Handle bsh) |
static bool | CleanupUserObject (CUser_object &object) |
static bool | CleanupAuthor (CAuthor &author, bool fix_initials=true) |
static bool | CleanupAuthList (CAuth_list &al, bool fix_initials=true) |
static void | ResetAuthorNames (CAuth_list::TNames &names) |
static bool | CleanupAffil (CAffil &af) |
static bool | IsEmpty (const CAuth_list::TAffil &affil) |
static bool | CleanupCollectionDates (CSeq_entry_Handle seh, bool month_first) |
static void | AutodefId (CSeq_entry_Handle seh) |
static char | ValidAminoAcid (string_view abbrev) |
Static Public Member Functions inherited from CObject | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
Static Public Member Functions inherited from CDebugDumpable | |
static void | EnableDebugDump (bool on) |
Private Attributes | |
CRef< CScope > | m_Scope |
Additional Inherited Members | |
Static Public Attributes inherited from CObject | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
Protected Member Functions inherited from CObject | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
Definition at line 68 of file cleanup.hpp.
using CCleanup::TChanges = CConstRef<CCleanupChange> |
Definition at line 97 of file cleanup.hpp.
typedef pair<CSeq_feat_Handle, CSeq_feat_Handle> CCleanup::TFeatGenePair |
Definition at line 590 of file cleanup.hpp.
Enumerator | |
---|---|
eScope_Copy | |
eScope_UseInPlace |
Definition at line 83 of file cleanup.hpp.
Enumerator | |
---|---|
eClean_NoReporting | |
eClean_GpipeMode | |
eClean_NoNcbiUserObjects | |
eClean_SyncGenCodes | |
eClean_NoProteinTitles | |
eClean_KeepTopSet | |
eClean_KeepSingleSeqSet | |
eClean_InHugeSeqSet |
Definition at line 72 of file cleanup.hpp.
CCleanup::CCleanup | ( | CScope * | scope = nullptr , |
EScopeOptions | scope_handling = eScope_Copy |
||
) |
Definition at line 89 of file cleanup.cpp.
References CScope::AddScope(), eScope_UseInPlace, CObjectManager::GetInstance(), and m_Scope.
CCleanup::~CCleanup | ( | void | ) |
Definition at line 103 of file cleanup.cpp.
|
static |
Add GenBank Wrapper Set.
entry | Seq-entry to edit |
Definition at line 3065 of file cleanup.cpp.
References CSeq_entry_EditHandle::ConvertSeqToSet(), CBioseq_set_Base::eClass_genbank, CBioseq_set_Handle::GetClass(), CSeq_entry_Handle::GetSet(), CSeq_entry_Handle::IsSet(), and CBioseq_set_Handle::IsSetClass().
|
static |
For table2asn -c s Adds an exception of "low-quality sequence region" to coding regions and mRNAs that are not pseudo and have an intron <11bp in length.
entry | Seq-entry to edit |
Definition at line 2931 of file cleanup.cpp.
References CSeqFeatData::eSubtype_cdregion, CSeqFeatData::eSubtype_mRNA, and x_AddLowQualityException().
Referenced by CTable2AsnValidator::Cleanup().
Adds missing MolInfo descriptor to sequence.
seq | Bioseq to edit |
Definition at line 1824 of file cleanup.cpp.
References CMolInfo_Base::eBiomol_mRNA, CMolInfo_Base::eBiomol_peptide, CMolInfo_Base::eBiomol_unknown, CSeq_inst_Base::eMol_rna, CMolInfo_Base::eTech_concept_trans, CMolInfo_Base::eTech_standard, CBioseq_Base::GetInst(), CSeq_inst_Base::GetMol(), CBioseq::IsAa(), CBioseq_Base::IsSetDescr(), CBioseq_Base::IsSetInst(), CSeq_inst_Base::IsSetMol(), NON_CONST_ITERATE, CMolInfo_Base::SetBiomol(), CBioseq_Base::SetDescr(), CSeqdesc_Base::SetMolinfo(), and CMolInfo_Base::SetTech().
Referenced by CNewCleanup_imp::CreateMissingMolInfo().
|
static |
Adds NcbiCleanup User Object to Seq-descr.
Definition at line 1929 of file cleanup.cpp.
References CUser_object::eObjectType_Cleanup, CSeq_descr_Base::IsSet(), Ref(), and CSeq_descr_Base::Set().
Referenced by CNewCleanup_imp::AddNcbiCleanupObject(), and CCleanupHugeAsnReader::x_CleanupTopLevelDescriptors().
Adjusts protein title to reflect partialness.
Bioseq | to adjust |
Definition at line 2371 of file cleanup.cpp.
References CBioSource_Base::eGenome_chloroplast, CBioSource_Base::eGenome_chromatophore, CBioSource_Base::eGenome_chromosome, CBioSource_Base::eGenome_extrachrom, CBioSource_Base::eGenome_insertion_seq, CBioSource_Base::eGenome_proviral, CBioSource_Base::eGenome_transposon, CBioSource_Base::eGenome_unknown, CBioSource_Base::eGenome_virion, NStr::Find(), FOR_EACH_SEQDESC_ON_BIOSEQ, FOR_EACH_SEQDESC_ON_SEQSET, FOR_EACH_SEQID_ON_BIOSEQ, CMolInfo_Base::GetCompleteness(), CBioSource_Base::GetGenome(), CBioseq_Base::GetInst(), CSeqdesc_Base::GetMolinfo(), CBioSource_Base::GetOrg(), CBioSource::GetOrganelleByGenome(), CBioseq::GetParentSet(), CBioseq_set::GetParentSet(), CSeqdesc_Base::GetSource(), CSeq_inst::IsAa(), NStr::IsBlank(), IsCrossKingdom(), CSeqdesc_Base::IsMolinfo(), CMolInfo_Base::IsSetCompleteness(), CBioseq_Base::IsSetDescr(), CBioSource_Base::IsSetGenome(), CBioseq_Base::IsSetInst(), CSeq_inst_Base::IsSetMol(), CBioSource_Base::IsSetOrg(), CSeqdesc_Base::IsSource(), NCBI_COMPLETENESS, NON_CONST_ITERATE, NPOS, CConstRef< C, Locker >::Reset(), s_RemoveOrgFromEndOfProtein(), s_TitleEndsInOrganism(), CBioseq_Base::SetDescr(), CSeqdesc_Base::SetTitle(), string, and NStr::TruncateSpacesInPlace().
Referenced by CNewCleanup_imp::x_AddPartialToProteinTitle().
|
static |
Definition at line 2051 of file cleanup.cpp.
References CSeq_entry_EditHandle::AddSeqdesc(), CSeq_id::Assign(), CSeq_entry_EditHandle::AttachEntry(), CSeq_entry_EditHandle::ConvertSeqToSet(), copy(), CMolInfo_Base::eBiomol_peptide, CBioseq_set_Base::eClass_nuc_prot, CRef< C, Locker >::Empty(), CMolInfo_Base::eTech_concept_trans, CScope::GetBioseqHandle(), CBioseq_set_Handle::GetClass(), CBioseq_set_Handle::GetCompleteBioseq_set(), CSeq_entry_Handle::GetEditHandle(), CSeq_loc::GetId(), CSeq_feat_Base::GetLocation(), CSeq_entry_EditHandle::GetParentBioseq_set(), CBioseq_set_Handle::GetParentEntry(), CSeq_feat_Base::GetProduct(), CSeq_entry_Handle::GetScope(), CBioseq_Handle::GetSeq_entry_Handle(), CScope::GetSeq_entryEditHandle(), CSeq_entry_Handle::GetSet(), CSeq_entry_Handle::IsSet(), CBioseq_set_Handle::IsSetClass(), CSeq_feat_Base::IsSetProduct(), nuc, CMolInfo_Base::SetBiomol(), CBioseq_Base::SetDescr(), CBioseq_Base::SetId(), CSeqdesc_Base::SetMolinfo(), CSeq_entry_Base::SetSeq(), CSeq_entry_EditHandle::SetSet(), CMolInfo_Base::SetTech(), and CSeqTranslator::TranslateToProtein().
Referenced by g_InstantiateMissingProteins(), and WGSCleanup().
|
static |
Creates missing protein title descriptor.
seq | Bioseq to edit |
Definition at line 1865 of file cleanup.cpp.
References CSeq_id_Base::e_Patent, CSeq_id_Base::e_Pdb, CSeq_id_Base::e_Pir, CSeq_id_Base::e_Prf, CSeq_id_Base::e_Swissprot, CSeqdesc_Base::e_Title, CBioseq_Handle::GetEditHandle(), CBioseq_Handle::GetId(), CBioseq_Handle::GetInst(), CBioseq_Handle::IsAa(), CBioseq_Handle::IsSetId(), CBioseq_Handle::IsSetInst(), CSeq_inst_Base::IsSetMol(), ITERATE, CAutoAddDesc::Set(), CBioseq_EditHandle::SetDescr(), and CSeqdesc_Base::SetTitle().
Referenced by CNewCleanup_imp::AddProteinTitles(), and CTable2AsnValidator::Cleanup().
|
static |
Definition at line 3536 of file cleanup.cpp.
References NStr::Equal(), CBioSource_Base::GetOrg(), COrg_ref_Base::GetTaxname(), CBioSource_Base::IsSetOrg(), and COrg_ref_Base::IsSetTaxname().
Referenced by CMergeBiosources::apply(), MergeDupBioSources(), and CNewCleanup_imp::x_MoveNpSrc().
|
static |
Definition at line 4942 of file cleanup.cpp.
References CSeq_entry_EditHandle::AddSeqdesc(), CSerialObject::Assign(), b, CAutoDef::CreateIDOptions(), CSeqdesc_Base::e_User, CSeq_entry_Handle::GetEditHandle(), CAutoDef::RegenerateSequenceDefLines(), CSeq_entry_EditHandle::RemoveSeqdesc(), CSeqdesc_Base::SetUser(), and ud().
Referenced by CCleanupApp::x_ProcessXOptions().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CBioseq_Handle & | bsh, |
Uint4 | options = 0 |
||
) |
Definition at line 198 of file cleanup.cpp.
References CNewCleanup_imp::BasicCleanupBioseqHandle(), CBioseq_Handle::GetScope(), makeCleanupChange(), and CNewCleanup_imp::SetScope().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CBioseq_set & | bss, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::BasicCleanup | ( | CBioseq_set_Handle & | bssh, |
Uint4 | options = 0 |
||
) |
Definition at line 208 of file cleanup.cpp.
References CNewCleanup_imp::BasicCleanupBioseqSetHandle(), CBioseq_set_Handle::GetScope(), makeCleanupChange(), and CNewCleanup_imp::SetScope().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CBioSource & | src, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_annot & | sa, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_annot_Handle & | sak, |
Uint4 | options = 0 |
||
) |
Definition at line 218 of file cleanup.cpp.
References CNewCleanup_imp::BasicCleanupSeqAnnotHandle(), CSeq_annot_Handle::GetScope(), makeCleanupChange(), and CNewCleanup_imp::SetScope().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_descr & | desc, |
Uint4 | options = 0 |
||
) |
Definition at line 247 of file cleanup.cpp.
References CLEANUP_SETUP, and CSeq_descr_Base::Set().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_entry & | se, |
Uint4 | options = 0 |
||
) |
Definition at line 132 of file cleanup.cpp.
References CLEANUP_SETUP.
Referenced by CMytestApplication::DoProcess(), CAsnvalThreadState::ProcessSeqAnnot(), CAsnvalThreadState::ProcessSeqEntry(), CAsnvalThreadState::ProcessSeqFeat(), CAsnvalThreadState::ProcessSeqSubmit(), and CAsnvalThreadState::ReadClassMember().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_entry_Handle & | seh, |
Uint4 | options = 0 |
||
) |
Definition at line 188 of file cleanup.cpp.
References CNewCleanup_imp::BasicCleanupSeqEntryHandle(), CSeq_entry_Handle::GetScope(), makeCleanupChange(), and CNewCleanup_imp::SetScope().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_feat & | sf, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_feat_Handle & | sfh, |
Uint4 | options = 0 |
||
) |
Definition at line 228 of file cleanup.cpp.
References CNewCleanup_imp::BasicCleanupSeqFeatHandle(), CSeq_feat_Handle::GetScope(), makeCleanupChange(), and CNewCleanup_imp::SetScope().
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeq_submit & | ss, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSeqdesc & | desc, |
Uint4 | options = 0 |
||
) |
Definition at line 238 of file cleanup.cpp.
References CLEANUP_SETUP.
CCleanup::TChanges CCleanup::BasicCleanup | ( | CSubmit_block & | block, |
Uint4 | options = 0 |
||
) |
Definition at line 148 of file cleanup.cpp.
References CLEANUP_SETUP.
|
static |
Get BioSource from feature to use for source descriptor.
Definition at line 3824 of file cleanup.cpp.
References a, CSerialObject::Assign(), CSubSource_Base::eSubtype_other, CNewCleanup_imp::ExtendedCleanup(), f, ITERATE, makeCleanupChange(), CSubSource_Base::SetName(), CBioSource_Base::SetOrg(), CSubSource_Base::SetSubtype(), and CBioSource_Base::SetSubtype().
Referenced by CMergeBiosources::apply(), and ConvertSrcFeatsToSrcDescs().
Definition at line 585 of file cleanup_author.cpp.
References Asn2gnbkCompressSpaces(), CLEAN_AFFIL_MEMBER, CleanVisString(), CAffil_Base::e_Std, CAffil_Base::e_Str, NStr::EqualCase(), NStr::EqualNocase(), CAffil_Base::C_Std::GetCountry(), CAffil_Base::C_Std::GetSub(), CAffil_Base::C_Std::IsSetCountry(), CAffil_Base::C_Std::IsSetSub(), NStr::Replace(), CAffil_Base::C_Std::SetCountry(), CAffil_Base::SetStd(), CAffil_Base::SetStr(), CAffil_Base::C_Std::SetSub(), and CAffil_Base::Which().
Referenced by CCitSubCleaner::Clean(), and CleanupAuthList().
|
static |
Definition at line 491 of file cleanup_author.cpp.
References Asn2gnbkCompressSpaces(), CleanupAffil(), CleanupAuthor(), CleanVisStringContainer(), CAuth_list::ConvertMlToStandard(), CAuth_list_Base::GetAffil(), CAuth_list_Base::GetNames(), IsEmpty(), CAuth_list_Base::IsSetAffil(), CAuth_list_Base::IsSetNames(), names, NCBI_FALLTHROUGH, remove_if(), CAuth_list_Base::ResetAffil(), ResetAuthorNames(), CAuth_list_Base::SetAffil(), CAuth_list_Base::SetNames(), and CAuth_list_Base::C_Names::Which().
Referenced by CCitGenCleaner::Clean(), CCitSubCleaner::Clean(), CCitBookCleaner::Clean(), CCitPatCleaner::Clean(), CMedlineEntryCleaner::Clean(), CCitArtCleaner::CleanArticle(), and CNewCleanup_imp::x_AuthListBCWithFixInitials().
Definition at line 51 of file cleanup_author.cpp.
References CPerson_id_Base::e_Consortium, CPerson_id_Base::e_Ml, CPerson_id_Base::e_Name, CPerson_id_Base::e_Str, NStr::IsBlank(), n, and NStr::TruncateSpacesInPlace().
Referenced by CleanupAuthList().
|
static |
Definition at line 4883 of file cleanup.cpp.
References CSerialObject::Assign(), CSeqFeatData_Base::e_Biosrc, NStr::Equal(), CSubSource_Base::eSubtype_collection_date, CSubSource::FixDateFormat(), CSeqFeatData_Base::GetBiosrc(), CSeq_entry_Handle::GetCompleteSeq_entry(), CSeq_feat_Handle::GetData(), CSeq_feat_Handle::GetOriginalSeq_feat(), GetSourceDescriptors(), CBioSource_Base::IsSetSubtype(), CSeq_feat_EditHandle::Replace(), CSeq_feat_Base::SetData(), CSeqdesc_Base::SetSource(), and CBioSource_Base::SetSubtype().
Referenced by CTable2AsnValidator::Cleanup().
|
static |
Definition at line 50 of file cleanup_user_object.cpp.
References CleanVisString().
Referenced by CLoadStructComments::LoadWithAccessions(), CStructuredCommentPanel::OnExport(), CMacroFunction_ParseToStructComm::TheFunction(), CStructuredCommentPanel::UpdateSeqdesc(), CNewCleanup_imp::UserObjectBC(), and x_CleanupUserField().
|
static |
Definition at line 1595 of file cleanup.cpp.
References eExtreme_Biological, NON_CONST_ITERATE, and CPacked_seqint_Base::Set().
|
static |
Definition at line 1615 of file cleanup.cpp.
References CSerialObject::Assign(), ClearInternalPartials(), f, CSeq_feat_EditHandle::Replace(), and CSeq_feat_Base::SetLocation().
|
static |
Clear internal partials.
Definition at line 1553 of file cleanup.cpp.
References CSeq_loc_Base::e_Mix, CSeq_loc_Base::e_Packed_int, CSeq_loc::SetMix(), CSeq_loc::SetPacked_int(), and CSeq_loc_Base::Which().
Referenced by ClearInternalPartials(), and CCleanupApp::x_ProcessFeatureOptions().
|
static |
Definition at line 1570 of file cleanup.cpp.
References ClearInternalPartials(), eExtreme_Biological, NON_CONST_ITERATE, and CSeq_loc_mix_Base::Set().
|
static |
Definition at line 4408 of file cleanup.cpp.
References CSerialObject::Assign(), CSeq_inst::ConvertDeltaToRaw(), CBioseq_Handle::GetInst(), and CBioseq_EditHandle::SetInst().
Referenced by CCleanupApp::x_ProcessXOptions().
|
static |
Convert full-length publication features to publication descriptors.
seh | Seq-entry to edit |
Definition at line 3388 of file cleanup.cpp.
References CSerialObject::Assign(), b, CSeqFeatData_Base::e_Pub, eExtreme_Biological, CPubdesc_Base::GetComment(), CSeqdesc_Base::GetPub(), NStr::IsBlank(), CPubdesc_Base::IsSetComment(), MoveOneFeatToPubdesc(), CPubdesc_Base::SetComment(), and CSeqdesc_Base::SetPub().
Referenced by CTbl2AsnApp::ProcessOneEntry(), CTbl2AsnApp::ProcessSingleEntry(), and CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
Convert full-length source features to source descriptors.
seh | Seq-entry to edit |
Definition at line 3857 of file cleanup.cpp.
References CBioseq_EditHandle::AddSeqdesc(), CBioseq_set_EditHandle::AddSeqdesc(), CSerialObject::Assign(), b, BioSrcFromFeat(), CSeqFeatData_Base::e_Biosrc, CSeqdesc_Base::e_Source, CBioseq_set_Base::eClass_nuc_prot, eExtreme_Biological, CSubSource_Base::eSubtype_transgenic, CSeq_feat_EditHandle::GetAnnot(), CBioseq_set_Handle::GetClass(), CSeq_annot_Handle::GetCompleteSeq_annot(), CSeqdesc_Base::GetSource(), CBioSource::HasSubtype(), CBioseq_set_Handle::IsSetClass(), CBioSource_Base::IsSetIs_focus(), MergeDupBioSources(), NormalizeDescriptorOrder(), CSeq_annot_EditHandle::Remove(), CSeq_feat_EditHandle::Remove(), RemoveDupBioSource(), CBioseq_EditHandle::SetDescr(), CBioseq_set_EditHandle::SetDescr(), CSeqdesc_Base::SetSource(), and CNewCleanup_imp::ShouldRemoveAnnot().
Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
decodes various tags, including carriage-return-line-feed constructs
Definition at line 4099 of file cleanup.cpp.
References _ASSERT, CTextFsm< MatchType >::AddWord(), ArraySize(), copy(), CTextFsm< MatchType >::GetInitialState(), CTextFsm< MatchType >::GetMatches(), CTextFsm< MatchType >::GetNextState(), CTextFsm< MatchType >::IsMatchFound(), CTextFsm< MatchType >::IsPrimed(), NPOS, CTextFsm< MatchType >::Prime(), result, and str().
Referenced by CNewCleanup_imp::x_DecodeXMLMarkChanged().
|
static |
Expands gene to include features it cross-references.
gene | Seq-feat to adjust |
tse | Top-level Seq-entry in which to find other features |
Definition at line 2571 of file cleanup.cpp.
References eExtreme_Positional, CSeqFeatData::eSubtype_any, f, CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), CSeq_feat_Base::GetLocation(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_feat_Base::GetXref(), CSeq_loc_Base::IsInt(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::IsSetXref(), ITERATE, and CSeq_feat_Base::SetLocation().
Referenced by WGSCleanup().
CCleanup::TChanges CCleanup::ExtendedCleanup | ( | CSeq_annot & | sa, |
Uint4 | options = 0 |
||
) |
CCleanup::TChanges CCleanup::ExtendedCleanup | ( | CSeq_entry & | se, |
Uint4 | options = 0 |
||
) |
Cleanup a Seq-entry.
Definition at line 259 of file cleanup.cpp.
References CLEANUP_SETUP.
Referenced by CTable2AsnValidator::Cleanup(), and WGSCleanup().
|
static |
Definition at line 283 of file cleanup.cpp.
References CNewCleanup_imp::ExtendedCleanupSeqEntryHandle(), and makeCleanupChange().
CCleanup::TChanges CCleanup::ExtendedCleanup | ( | CSeq_submit & | ss, |
Uint4 | options = 0 |
||
) |
|
static |
Definition at line 1072 of file cleanup.cpp.
References eExtreme_Biological, eExtreme_Positional, eNa_strand_minus, f, CSeq_loc::GetId(), CSeq_feat_Base::GetLocation(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeq_loc_Base::IsMix(), CSeq_loc::IsSetStrand(), CSeq_loc::SetInt(), CSeq_loc::SetMix(), and CSeq_loc::SetPartialStop().
Referenced by CNewCleanup_imp::CdRegionEC(), ExtendToStopCodon(), CCleanupApp::x_FixCDS(), CFeatTableEdit::xGenerate_mRNA_Product(), and CFeatureTableReader::xTranslateProtein().
|
static |
Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partial will be set if location extends to end of sequence but no stop codon is found)
f | Seq-feat to edit |
bsh | CBioseq_Handle on which the feature is located |
limit | maximum number of nt to extend, or 0 if unlimited |
Definition at line 1113 of file cleanup.cpp.
References CSeqVector::begin(), CBioseq_Handle::eCoding_Iupac, eExtreme_Biological, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_three, CCdregion_Base::eFrame_two, eNa_strand_minus, ExtendStopPosition(), f, CBioseq_Handle::GetBioseqLength(), CTrans_table::GetCodonResidue(), CBioseq_Handle::GetId(), CBioseq_Handle::GetInst_Length(), GetLength(), CBioseq_Handle::GetScope(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CGen_code_table::GetTransTable(), i, CSeq_loc::IsSetStrand(), len, mod(), CTrans_table::NextCodonState(), CSeq_loc::SetInt(), CSeq_loc::SetStrand(), and CSeqVector::size().
Referenced by ExtendToStopIfShortAndNotPartial(), and CCleanupApp::x_BatchExtendCDS().
|
static |
Extends a coding region up to 50 nt.
if the coding region: 1. does not end with a stop codon 2. is adjacent to a stop codon 3. is not pseudo
f | Seq-feat to edit |
bsh | CBioseq_Handle on which the feature is located |
Definition at line 1291 of file cleanup.cpp.
References eExtreme_Biological, NStr::EndsWith(), ExtendToStopCodon(), f, CSeq_feat_Base::GetLocation(), GetmRNAforCDS(), CBioseq_Handle::GetScope(), CSeq_loc::GetStop(), IsPseudo(), s_IsLocationEndAtOtherLocationInternalEndpoint(), and CSeqTranslator::Translate().
Referenced by CNewCleanup_imp::CdRegionEC(), CFeatTableEdit::xGenerate_mRNA_Product(), and CFeatureTableReader::xTranslateProtein().
|
static |
Detects gene features with matching locus_tag.
f | Seq-feat parent feature of gene_xref [in] |
gene_xref | Gene-ref of gene-xref [in] |
bsh | CBioseq_Handle parent bioseq in which to search for genes [in] |
Definition at line 990 of file cleanup.cpp.
References CSeqFeatData::eSubtype_gene, f, CGene_ref_Base::GetLocus_tag(), CGene_ref_Base::IsSetLocus_tag(), and match().
Referenced by RemoveOrphanLocus_tagGeneXrefs().
|
static |
Detects gene features with matching locus.
f | Seq-feat parent feature of gene_xref [in] |
gene_xref | Gene-ref of gene-xref [in] |
bsh | CBioseq_Handle parent bioseq in which to search for genes [in] |
Definition at line 942 of file cleanup.cpp.
References CSeqFeatData::eSubtype_gene, f, CGene_ref_Base::GetLocus(), CGene_ref_Base::IsSetLocus(), and match().
Referenced by RemoveOrphanLocusGeneXrefs().
|
static |
Fix EC numbers.
entry | Seq-entry-handle to clean |
Definition at line 1713 of file cleanup.cpp.
References CSerialObject::Assign(), CSeqFeatData_Base::e_Prot, f, CSeq_feat_Base::GetData(), CProt_ref_Base::GetEc(), CSeqFeatData_Base::GetProt(), RemoveBadECNumbers(), CSeq_feat_EditHandle::Replace(), CSeq_feat_Base::SetData(), and UpdateECNumbers().
Referenced by CCleanupApp::x_ProcessFeatureOptions().
|
static |
Examine all genes and gene xrefs in the Seq-entry.
If no genes have locus and some have locus tag AND no gene xrefs have locus-tag and some gene xrefs have locus, change all gene xrefs to use locus tag. If no genes have locus tag and some have locus AND no gene xrefs have locus and some gene xrefs have locus tag, change all gene xrefs to use locus.
seh | Seq-entry to edit |
Definition at line 3915 of file cleanup.cpp.
References CSerialObject::Assign(), g(), CSeq_feat_Handle::GetData(), CSeqFeatData_Base::GetGene(), CSeq_feat_Handle::GetGeneXref(), CMappedFeat::GetSeq_feat(), CSeqFeatData_Base::IsGene(), CGene_ref_Base::IsSetLocus(), CGene_ref_Base::IsSetLocus_tag(), CSeq_feat_Handle::IsSetXref(), NON_CONST_ITERATE, CSeq_feat_EditHandle::Replace(), CFeat_CI::Rewind(), and CSeq_feat_Base::SetXref().
Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().
From GB-7563 An action has been requested that will do the following: 1.
This action should be limited to protein sequences where the product is an exact match to a specified text (the usual string constraint is not needed). 2. Protein sequences for which the coding region is 5' partial should not be affected. 3. When the protein name matches, the following actions should be taken if and only if the first amino acid of the protein sequence is not M (methionine): a. The first amino acid of the protein sequence should be changed to methionine. b. The coding region should have the text "RNA editing" added to Seq-feat.except_text (separated from any existing text by a semicolon). If Seq-feat.except is not already true, it should be set to true. c. A code-break should be added to Cdregion.code-break where the Code-break.loc is the location of the first codon of the coding region and Code-break.aa is ncbieaa 'M' (Indexers will refer to "code-breaks" as "translation exceptions" because these appear in the flatfile as a /transl_except qualifier.
It will be the responsibility of the caller to only invoke this function for coding regions where the product name is a match, and the protein sequence does not already start with an M.
Definition at line 4849 of file cleanup.cpp.
References eExtreme_Biological, NStr::Find(), GetCodeBreakForLocation(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetExcept(), CSeq_feat_Base::GetExcept_text(), CSeq_feat_Base::GetLocation(), NStr::IsBlank(), CSeqFeatData_Base::IsCdregion(), IsMethionine(), CSeq_loc::IsPartialStart(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetExcept(), CSeq_feat_Base::IsSetExcept_text(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::SetExcept(), and CSeq_feat_Base::SetExcept_text().
Referenced by CRestoreRNAediting::OnApply().
|
static |
Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle.
bsh | Bioseq-handle to search |
Definition at line 3207 of file cleanup.cpp.
References CSeqdesc_Base::e_Pub, CSeqFeatData_Base::e_Pub, CSeq_feat_Handle::GetData(), CSeqdesc_Base::GetPub(), CSeqFeatData_Base::GetPub(), GetPubdescLabels(), CAliasBase< TPrim >::Set(), CCit_gen_Base::SetCit(), CPub_Base::SetGen(), CPub_Base::SetMuid(), CPub_Base::SetPmid(), and CCit_gen_Base::SetSerial_number().
Referenced by CNewCleanup_imp::MoveCitationQuals().
|
static |
utility function for finding the code break for a given amino acid position pos is the position of the amino acid where the translation exception occurs (starts with 1)
Definition at line 4720 of file cleanup.cpp.
References CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_one, CCdregion_Base::eFrame_three, CCdregion_Base::eFrame_two, CSeqFeatData_Base::GetCdregion(), CCdregion_Base::GetCode_break(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CSeq_feat_Base::GetLocation(), CSeqFeatData_Base::IsCdregion(), CCdregion_Base::IsSetCode_break(), CSeq_feat_Base::IsSetData(), CCdregion_Base::IsSetFrame(), CSeq_feat_Base::IsSetLocation(), LocationOffset(), and offset.
Referenced by FixRNAEditingCodingRegion().
|
static |
Definition at line 154 of file gene_qual_normalization.cpp.
References CSeqdesc_Base::e_Source, f, NStr::Find(), CBioSource::GetLineage(), CMappedFeat::GetOriginalFeature(), CSeqdesc_Base::GetSource(), CBioseq_Handle::IsAa(), IsMappablePair(), CBioSource::IsSetLineage(), CSeqdesc_Base::IsSource(), and NPOS.
Referenced by GetNormalizeGeneQualsCommand(), and NormalizeGeneQuals().
|
static |
Definition at line 4263 of file cleanup.cpp.
References CSeq_id::Assign(), eExtreme_Biological, eExtreme_Positional, eLocationInFrame_BadStart, eLocationInFrame_BadStartAndStop, eLocationInFrame_BadStop, eLocationInFrame_InFrame, eLocationInFrame_NotIn, CSeq_loc_Mapper_Base::eLocationToProduct, CSerialObject::Equals(), CSeq_loc::fMerge_All, CSeq_loc::fSort, CScope::GetBioseqHandle(), CSeq_loc::GetId(), CSeq_feat_Base::GetLocation(), CScope::GetSeq_featHandle(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeq_loc_Base::IsInt(), IsLocationInFrame(), CSeq_loc::IsPartialStart(), CSeq_loc::IsPartialStop(), CSeq_loc_Base::IsPnt(), CSeq_loc_Mapper_Base::Map(), prot, CSeq_loc::ResetStrand(), Seq_loc_Merge(), Seq_loc_Subtract(), CSeq_loc::SetPartialStart(), CSeq_loc::SetPartialStop(), and tmp.
|
static |
Definition at line 4348 of file cleanup.cpp.
References GetOverlappingCDS(), and CSeq_feat_Base::IsSetProduct().
Referenced by MoveFeatToProtein(), CFeatureTableReader::MoveRegionsToProteins(), and s_MoveProteinSpecificFeats().
Definition at line 1467 of file cleanup.cpp.
References kEmptyStr, and prot.
Referenced by g_InstantiateMissingProteins(), GetProteinName(), s_GetProteinNameFromXrefOrQual(), WGSCleanup(), and CCleanupApp::x_FixCDS().
|
static |
Definition at line 1497 of file cleanup.cpp.
References CSeqFeatData::eSubtype_prot, f, CSeq_entry_Handle::GetBioseqHandle(), CSeq_loc::GetId(), CSeq_feat_Base::GetProduct(), GetProteinName(), CSeq_feat_Base::IsSetProduct(), prot, and s_GetProteinNameFromXrefOrQual().
|
static |
For Publication Citations Get labels for a pubdesc.
To be used in citations.
Definition at line 3139 of file cleanup.cpp.
References CPub::eContent, NStr::eNocase, IAbstractCitation::fLabel_Unique, CPub_equiv_Base::Get(), CPubdesc_Base::GetPub(), NStr::IsBlank(), CPubdesc_Base::IsSetPub(), ITERATE, label, s_GetAuthorsString(), and NStr::StartsWith().
Referenced by GetCitationList(), CValidator::CCacheImpl::GetPubdescToInfo(), s_CollectPubDescriptorLabels(), and CValidError_imp::ValidateCitations().
|
static |
Definition at line 424 of file cleanup_author.cpp.
References CAffil_Base::GetStd(), CAffil_Base::GetStr(), NStr::IsBlank(), CAffil_Base::C_Std::IsSetAffil(), CAffil_Base::C_Std::IsSetCity(), CAffil_Base::C_Std::IsSetCountry(), CAffil_Base::C_Std::IsSetDiv(), CAffil_Base::C_Std::IsSetEmail(), CAffil_Base::C_Std::IsSetFax(), CAffil_Base::C_Std::IsSetPhone(), CAffil_Base::C_Std::IsSetPostal_code(), CAffil_Base::C_Std::IsSetStreet(), CAffil_Base::C_Std::IsSetSub(), CAffil_Base::IsStd(), and CAffil_Base::IsStr().
Referenced by CCitSubCleaner::Clean(), and CleanupAuthList().
|
static |
Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calculated using overlap)
sf | Seq-feat with the xref [in] |
scope | Scope in which to search for location [in] |
gene_xref | Gene-ref of gene-xref [in] |
Definition at line 744 of file cleanup.cpp.
References Compare(), CSeqFeatData_Base::e_Gene, eOverlap_Contained, eSame, CSeqFeatData::eSubtype_gene, fCompareOverlapping, g(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetGene(), CSeq_feat_Base::GetLocation(), GetOverlappingFeatures(), GetOverlappingGene(), CConstRef< C, Locker >::GetPointer(), CSeqFeatData_Base::IsGene(), CSeq_feat_Base::IsSetData(), CGene_ref::IsSuppressed(), ITERATE, and CGene_ref::RefersToSameGene().
Referenced by RemoveUnnecessaryGeneXrefs(), and CMacroFunction_RemoveGeneXref::s_GeneXrefMatchesNecessary().
|
static |
Definition at line 4688 of file cleanup.cpp.
References CCode_break_Base::C_Aa::e_Ncbi8aa, CCode_break_Base::C_Aa::e_Ncbieaa, CCode_break_Base::C_Aa::e_Ncbistdaa, CCode_break_Base::GetAa(), CCode_break_Base::C_Aa::GetNcbi8aa(), CCode_break_Base::C_Aa::GetNcbieaa(), CCode_break_Base::C_Aa::GetNcbistdaa(), CCode_break_Base::IsSetAa(), methionine_encoded, and CCode_break_Base::C_Aa::Which().
Referenced by FixRNAEditingCodingRegion().
Is this a "minimal" pub? (If yes, do not rescue from a Seq-feat.cit)
Definition at line 3426 of file cleanup.cpp.
References gen, CPub_equiv_Base::Get(), CPubdesc_Base::GetPub(), CPubdesc_Base::IsSetPub(), and ITERATE.
Referenced by RescueSiteRefPubs().
|
static |
Checks whether it is possible to extend the original location up to improved one.
It is possible only if the original location is less than improved
orig | Seq-loc to check |
improved | Seq-loc original location may be extended to |
Definition at line 1333 of file cleanup.cpp.
References eExtreme_Biological, eNa_strand_minus, CSeq_loc::GetStop(), and orig.
Referenced by CNewCleanup_imp::CdRegionEC(), CCleanupApp::x_FixCDS(), CFeatTableEdit::xGenerate_mRNA_Product(), and CFeatureTableReader::xTranslateProtein().
|
static |
From SQD-4329 For each sequence with a source that has an IRD db_xref, create a misc_feature across the entire span and move the IRD db_xref from the source to the misc_feature.
Create a suppressing gene xref for the misc_feature.
entry | Seq-entry on which to search for sources and create features |
Definition at line 4654 of file cleanup.cpp.
References AddIRDMiscFeature(), CSeqdesc_Base::e_Source, CSeq_inst_Base::eMol_na, NStr::Equal(), COrg_ref_Base::GetDb(), CBioSource_Base::GetOrg(), CSeqdesc_Base::GetSource(), COrg_ref_Base::IsSetDb(), CBioSource_Base::IsSetOrg(), COrg_ref_Base::ResetDb(), and COrg_ref_Base::SetDb().
Referenced by CCleanupApp::x_ProcessXOptions().
|
static |
Definition at line 4583 of file cleanup.cpp.
References CSeqdesc_Base::e_Source, CSeq_inst_Base::eMol_na, map_checker< Container >::end(), map_checker< Container >::find(), CInfluenzaSet::GetKey(), CBioSource_Base::GetOrg(), CSeqdesc_Base::GetSource(), NStr::IsBlank(), CBioSource_Base::IsSetOrg(), and ncbi::grid::netcache::search::fields::key.
Referenced by CCleanupApp::x_ProcessXOptions().
|
static |
Definition at line 3629 of file cleanup.cpp.
References a, CBioSource_Base::eGenome_unknown, CBioSource_Base::eOrigin_unknown, CBioSource_Base::GetGenome(), CBioSource_Base::GetOrg(), CBioSource_Base::GetOrigin(), CBioSource_Base::GetSubtype(), CBioSource_Base::IsSetGenome(), CBioSource_Base::IsSetIs_focus(), CBioSource_Base::IsSetOrigin(), CBioSource_Base::IsSetSubtype(), ITERATE, s_SubSourceListUniqued(), CBioSource_Base::SetGenome(), CBioSource_Base::SetIs_focus(), CBioSource_Base::SetOrg(), CBioSource_Base::SetOrigin(), CBioSource_Base::SetSubtype(), and x_MergeDupOrgRefs().
|
static |
Definition at line 3765 of file cleanup.cpp.
References AreBioSourcesMergeable(), CNewCleanup_imp::ExtendedCleanup(), makeCleanupChange(), and CSeq_descr_Base::Set().
Referenced by CMergeBiosources::apply(), ConvertSrcFeatsToSrcDescs(), CNewCleanup_imp::x_MergeDupBioSources(), and CNewCleanup_imp::x_MoveNpSrc().
|
static |
Moves one feature from nucleotide bioseq to the appropriate protein sequence.
fh | Feature to edit |
Definition at line 589 of file cleanup.cpp.
References CSerialObject::Assign(), CBioseq_EditHandle::AttachAnnot(), CNewCleanup_imp::BasicCleanupSeqFeat(), ConvertProteinToImp(), CSeqFeatData_Base::e_Imp, CProt_ref_Base::eProcessed_mature, CProt_ref_Base::eProcessed_not_set, CProt_ref_Base::eProcessed_preprotein, ftable, CBioseq_Base::GetAnnot(), CSeq_feat_Handle::GetAnnot(), CBioseq_Handle::GetBioseqCore(), CScope::GetBioseqHandle(), CSeq_feat_Base::GetComment(), CBioseq_Handle::GetCompleteBioseq(), CSeq_annot_Handle::GetCompleteSeq_annot(), CSeq_feat_Base::GetData(), CSeq_feat_Handle::GetData(), CBioseq_Handle::GetEditHandle(), CBioseq_Base::GetId(), CSeqFeatData_Base::GetImp(), CImp_feat_Base::GetKey(), CSeq_feat_Base::GetLocation(), CSeq_feat_Handle::GetLocation(), CProt_ref_Base::GetProcessed(), CSeq_feat_Handle::GetProduct(), CSeqFeatData_Base::GetProt(), GetProteinLocationFromNucleotideLocation(), CSeq_feat_Handle::GetScope(), CScope::GetSeq_annotHandle(), CSeq_feat_Handle::GetSeq_feat(), CBioseq_Handle::IsAa(), NStr::IsBlank(), CSeqFeatData_Base::IsImp(), CSeqFeatData_Base::IsProt(), CBioseq_Base::IsSetAnnot(), CSeq_feat_Base::IsSetComment(), CImp_feat_Base::IsSetKey(), CProt_ref_Base::IsSetName(), CProt_ref_Base::IsSetProcessed(), CSeq_feat_Base::IsSetProduct(), CSeq_feat_Handle::IsSetProduct(), ITERATE, makeCleanupChange(), orig, CSeq_feat_EditHandle::Replace(), RescueProtProductQual(), CSeq_feat_Base::ResetComment(), CSeq_feat_Base::ResetLocation(), CSeq_feat_Base::ResetProduct(), s_GetCdsByLocation(), s_GetCdsByProduct(), s_IsPreprotein(), s_ProcessedFromKey(), CSeq_feat_Base::SetData(), SetFeaturePartial(), CSeq_feat_Base::SetLocation(), CNewCleanup_imp::SetScope(), CNewCleanup_imp::ShouldRemoveAnnot(), CSeq_annot_EditHandle::TakeFeat(), and CSeqFeatData_Base::Which().
Referenced by MoveProteinSpecificFeats().
|
static |
Definition at line 3353 of file cleanup.cpp.
References CBioseq_EditHandle::AddSeqdesc(), CBioseq_set_EditHandle::AddSeqdesc(), b, CBioseq_set_Base::eClass_nuc_prot, CBioseq_set_Handle::GetClass(), CBioseq_set_Handle::GetDescr(), CSeqdesc_Base::GetPub(), CBioseq_set_Handle::IsSetClass(), CBioseq_set_Handle::IsSetDescr(), NormalizeDescriptorOrder(), OkToPromoteNpPub(), PubAlreadyInSet(), CSeq_feat_EditHandle::Remove(), RemoveDuplicatePubs(), CBioseq_EditHandle::SetDescr(), and CBioseq_set_EditHandle::SetDescr().
Referenced by ConvertPubFeatsToPubDescs(), and RescueSiteRefPubs().
|
static |
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein sequence.
seh | Seq-entry Handle to edit [in] |
Definition at line 724 of file cleanup.cpp.
References CSeqFeatData_Base::e_Bond, CSeqFeatData_Base::e_Imp, CSeqFeatData_Base::e_Prot, CSeqFeatData_Base::e_Psec_str, CSeq_inst_Base::eMol_na, SAnnotSelector::IncludeFeatType(), and MoveFeatToProtein().
Referenced by s_MoveProteinSpecificFeats(), and CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
Normalize Descriptor Order on a specific Seq-entry.
entry | Seq-entry to edit |
Definition at line 3000 of file cleanup.cpp.
References s_SeqDescLessThan(), seq_mac_is_sorted(), and CSeq_descr_Base::Set().
Referenced by ConvertSrcFeatsToSrcDescs(), CCleanupApp::HandleSeqEntry(), MoveOneFeatToPubdesc(), NormalizeDescriptorOrder(), RenormalizeNucProtSets(), WGSCleanup(), CCleanupHugeAsnReader::x_CleanupTopLevelDescriptors(), CCleanupApp::x_ProcessXOptions(), and CNewCleanup_imp::x_SortSeqDescs().
|
static |
Normalize Descriptor Order on a specific Seq-entry.
seh | Seq-entry-Handle to edit |
Definition at line 3010 of file cleanup.cpp.
References CSeq_entry_CI::fIncludeGivenEntry, CSeq_entry_CI::fRecursive, and NormalizeDescriptorOrder().
|
static |
Definition at line 203 of file gene_qual_normalization.cpp.
References CSerialObject::Assign(), GetNormalizableGeneQualPairs(), NormalizeGeneQuals(), and CSeq_feat_EditHandle::Replace().
|
static |
Definition at line 224 of file gene_qual_normalization.cpp.
References CSeq_inst_Base::eMol_na, and NormalizeGeneQuals().
Definition at line 98 of file gene_qual_normalization.cpp.
References NStr::Equal(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetGene(), CGene_ref_Base::GetLocus_tag(), NStr::IsBlank(), CSeqFeatData_Base::IsGene(), CGene_ref_Base::IsSetLocus(), CGene_ref_Base::IsSetLocus_tag(), CSeq_feat_Base::IsSetXref(), CSeq_feat_Base::SetData(), and CSeq_feat_Base::SetXref().
Referenced by GetNormalizeGeneQualsCommand(), NormalizeGeneQuals(), and CNewCleanup_imp::x_ExtendedCleanupExtra().
For some sequences, pubs should not be promoted to nuc-prot set from sequence.
Definition at line 3330 of file cleanup.cpp.
Some pubs should not be promoted to nuc-prot set from sequence.
Definition at line 3343 of file cleanup.cpp.
References CPubdesc_Base::IsSetComment(), CPubdesc_Base::IsSetFig(), CPubdesc_Base::IsSetName(), and CPubdesc_Base::IsSetNum().
Referenced by MoveOneFeatToPubdesc(), and CNewCleanup_imp::x_MoveNpPub().
|
static |
Parse string into code break and add to coding region.
feat | feature that contains coding region - necessary to determine codon boundaries |
cds | coding region to which code breaks will be added |
str | string from which to parse code break |
scope | scope in which to find sequences referenced (used for location comparisons) |
Definition at line 4425 of file cleanup.cpp.
References Compare(), CCleanupMessage::eCodeBreak, eContained, eDiag_Error, eNa_strand_minus, eNa_strand_plus, fCompareOverlapping, FIELD_IS_SET, NStr::Find(), GET_FIELD, CSeq_loc::GetId(), GetLength(), CSeq_feat_Base::GetLocation(), isalpha(), CSeq_loc_Base::IsInt(), CSeq_loc_Base::IsPnt(), CSeq_feat_Base::IsSetLocation(), isspace(), len, msg(), NPOS, IObjtoolsListener::PutMessage(), ReadLocFromText(), RESET_FIELD, CCode_break_Base::SetAa(), CCdregion_Base::SetCode_break(), CCode_break_Base::SetLoc(), CCode_break_Base::C_Aa::SetNcbieaa(), CSeq_loc::SetStrand(), str(), NStr::TruncateSpaces_Unsafe(), and x_ValidAminoAcid().
Referenced by ParseCodeBreaks().
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except Gb-quals that were successfully parsed.
feat | feature that contains coding region |
scope | scope in which to find sequences referenced (used for location comparisons) |
Definition at line 4556 of file cleanup.cpp.
References NStr::EqualNocase(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetQual(), CSeqFeatData_Base::IsCdregion(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetLocation(), CSeq_feat_Base::IsSetQual(), ParseCodeBreak(), CSeq_feat_Base::ResetQual(), CSeq_feat_Base::SetData(), and CSeq_feat_Base::SetQual().
Referenced by CNewCleanup_imp::x_CleanSeqFeatQuals(), CImportFeatTable::x_DoImportCDS(), and CFeatureTableReader::xMoveCdRegions().
|
static |
Definition at line 3319 of file cleanup.cpp.
References CSeq_descr_Base::Get(), CPubdesc_Base::GetPub(), ITERATE, and s_FirstPubMatchesSecond().
Referenced by MoveOneFeatToPubdesc(), and CNewCleanup_imp::x_MoveNpPub().
|
static |
Delete EC numbers.
ec_num_list | Prot-ref ec number list to clean |
Definition at line 1689 of file cleanup.cpp.
References CleanVisStringJunk(), CProt_ref::eEC_deleted, CProt_ref::eEC_unknown, CProt_ref::GetECNumberStatus(), and CProt_ref::IsECNumberSplit().
Referenced by FixECNumbers().
|
static |
Remove duplicate biosource descriptors.
Definition at line 3794 of file cleanup.cpp.
References ITERATE, and CSeq_descr_Base::Set().
Referenced by ConvertSrcFeatsToSrcDescs().
|
static |
Remove duplicate publications.
Definition at line 3281 of file cleanup.cpp.
References CSeq_descr_Base::Set().
Referenced by MoveOneFeatToPubdesc(), and CNewCleanup_imp::x_RemoveDupPubs().
|
static |
Removes NcbiCleanup User Objects in the Seq-entry.
seq_entry | Seq-entry to edit |
Definition at line 1898 of file cleanup.cpp.
References CUser_object::eObjectType_Cleanup, CSeq_entry_Base::GetSet(), CSeq_entry_Base::IsSeq(), CSeq_entry_Base::IsSet(), CSeq_entry::IsSetDescr(), CBioseq_set_Base::IsSetSeq_set(), NON_CONST_ITERATE, CBioseq_Base::ResetDescr(), CBioseq_set_Base::ResetDescr(), CSeq_entry::SetDescr(), CSeq_entry_Base::SetSeq(), CBioseq_set_Base::SetSeq_set(), and CSeq_entry_Base::SetSet().
Referenced by CCleanupApp::HandleSeqEntry(), and CNewCleanup_imp::x_AddNcbiCleanupObject().
Removes non-suppressing Gene-xrefs.
f | Seq-feat to edit [in] |
Definition at line 828 of file cleanup.cpp.
References f.
Referenced by CRemoveGeneXrefs::GetCommand(), and CRemoveGeneXrefs::RemoveNonsuppressing().
|
static |
Removes orphaned locus_tag Gene-xrefs.
f | Seq-feat to edit [in] |
bsh | CBioseq_Handle in which to search for gene features [in] |
Definition at line 1013 of file cleanup.cpp.
References f, and FindMatchingLocus_tagGene().
Referenced by CRemoveGeneXrefs::RemoveOrphanLocus_tag().
|
static |
Removes orphaned locus Gene-xrefs.
f | Seq-feat to edit [in] |
bsh | CBioseq_Handle in which to search for gene features [in] |
Definition at line 965 of file cleanup.cpp.
References f, and FindMatchingLocusGene().
Referenced by CRemoveGeneXrefs::RemoveOrphanLocus().
Removes protein product from pseudo coding region.
cds | Seq-feat to adjust |
scope | Scope in which to find protein sequence and remove it |
Definition at line 2537 of file cleanup.cpp.
References CSeqFeatData::eSubtype_prot, CScope::GetBioseqHandle(), CSeq_feat_Base::GetComment(), CSeq_feat_Base::GetData(), CSeq_feat_Base::GetProduct(), NStr::IsBlank(), CSeqFeatData_Base::IsCdregion(), IsPseudo(), CSeq_feat_Base::IsSetComment(), CSeq_feat_Base::IsSetData(), CSeq_feat_Base::IsSetProduct(), label, prot, CBioseq_EditHandle::Remove(), CSeq_feat_Base::ResetProduct(), and CSeq_feat_Base::SetComment().
Referenced by CNewCleanup_imp::CdRegionEC(), and WGSCleanup().
|
static |
Removes unnecessary Gene-xrefs on features in Seq-entry.
seh | Seq-entry-Handle to edit [in] |
Definition at line 804 of file cleanup.cpp.
References CSerialObject::Assign(), CSeq_entry_Handle::GetScope(), RemoveUnnecessaryGeneXrefs(), and CSeq_feat_EditHandle::Replace().
Removes unnecessary Gene-xrefs.
f | Seq-feat to edit [in] |
scope | Scope in which to search for locations [in] |
Definition at line 779 of file cleanup.cpp.
References f, and IsGeneXrefUnnecessary().
Referenced by CRemoveGeneXrefs::GetCommand(), CRemoveGeneXrefs::RemoveUnnecessary(), RemoveUnnecessaryGeneXrefs(), and CCleanupApp::x_ProcessFeatureOptions().
|
static |
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile.
seq | Bioseq-Handle to edit |
Definition at line 3027 of file cleanup.cpp.
References CSeq_descr_Base::Get(), CBioseq_Handle::GetDescr(), CBioseq_Handle::IsSetDescr(), ITERATE, CBioseq_EditHandle::RemoveSeqdesc(), and CConstRef< C, Locker >::Reset().
Referenced by RenormalizeNucProtSets().
|
static |
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in the flatfile.
set | Bioseq-set-Handle to edit |
Definition at line 3046 of file cleanup.cpp.
References ITERATE, and CConstRef< C, Locker >::Reset().
|
static |
Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore phase because it changes a seq to a set.
seh | Seq-entry to edit |
Definition at line 4063 of file cleanup.cpp.
References CSeq_entry_EditHandle::ConvertSetToSeq(), CBioseq_set_Base::eClass_eco_set, CBioseq_set_Base::eClass_gen_prod_set, CBioseq_set_Base::eClass_genbank, CBioseq_set_Base::eClass_mut_set, CBioseq_set_Base::eClass_nuc_prot, CBioseq_set_Base::eClass_phy_set, CBioseq_set_Base::eClass_pop_set, CBioseq_set_Base::eClass_small_genome_set, CBioseq_set_Base::eClass_wgs_set, CBioseq_set_Handle::GetClass(), CSeq_entry_Handle::GetCompleteSeq_entry(), CSeq_entry_Handle::GetEditHandle(), CSeq_entry_Handle::GetScope(), CSeq_entry_Handle::GetSeq(), CScope::GetSeq_entryHandle(), CBioseq_set_Base::GetSeq_set(), CSeq_entry_Base::GetSet(), CSeq_entry_Handle::GetSet(), CSeq_entry_Handle::IsSet(), CBioseq_set_Handle::IsSetClass(), CBioseq_Handle::IsSetDescr(), CBioseq_set_Base::IsSetSeq_set(), ITERATE, NormalizeDescriptorOrder(), RemoveUnseenTitles(), and CSeq_entry_EditHandle::SetSeq().
Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra(), and CCleanupApp::x_RemoveDuplicateFeatures().
|
static |
Definition at line 4362 of file cleanup.cpp.
References CTSE_Handle::GetBioseqHandle(), CBioseq_Handle::GetParentBioseq_set(), CSeq_feat_Base::GetProduct(), CBioseq_Handle::GetSeq_entry_Handle(), CBioseq_set_Handle::GetTSE_Handle(), CSeq_loc_Base::GetWhole(), CSeq_feat_Base::IsSetProduct(), CSeq_loc_Base::IsWhole(), and CBioseq_set_EditHandle::TakeEntry().
|
static |
Find proteins that are not packaged in the same nuc-prot set as the coding region for which they are a product, and move them to that nuc-prot set.
Ignore coding regions that are in gen-prod-sets.
seh | Seq-entry to edit |
Definition at line 4385 of file cleanup.cpp.
References CSeq_entry_Base::e_Set, CBioseq_set_Base::eClass_nuc_prot, CSeq_entry_CI::fIncludeGivenEntry, CSeq_entry_CI::fRecursive, ITERATE, and si.
Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
Repairs non-reciprocal xref pairs for specified feature if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype.
f | Seq-feat to edit [in] |
tse | top-level Seq-entry in which to search for the other half of the xref pair |
Definition at line 905 of file cleanup.cpp.
References CSeqFeatData_Base::e_not_set, f, CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), and ITERATE.
Referenced by CSequenceEditingEventHandler::FixNonReciprocalLinks(), RepairXrefs(), and CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
Repairs non-reciprocal xref pairs for specified feature pair if xrefs between subtypes are permitted and feature with missing xref does not have an xref to a different feature of the same subtype.
f | Seq-feat to edit [in] |
tse | top-level Seq-entry in which to search for the other half of the xref pair |
Definition at line 854 of file cleanup.cpp.
References CSeq_feat::AddSeqFeatXref(), CSeqFeatData::AllowXref(), CSerialObject::Assign(), CSeqFeatData_Base::e_not_set, CSeq_feat_Handle::GetAnnot(), CSeq_feat_Base::GetData(), CSeq_feat_Handle::GetData(), CSeq_annot_Handle::GetEditHandle(), CTSE_Handle::GetFeaturesWithId(), CObject_id_Base::GetId(), CSeq_feat_Base::GetId(), CSeq_feat_Handle::GetSeq_feat(), CSeqFeatData::GetSubtype(), CSeq_feat_Handle::GetXref(), CSeqFeatData_Base::IsGene(), CFeat_id_Base::IsLocal(), CSeq_feat_Base::IsSetId(), CSeq_feat_Handle::IsSetXref(), ITERATE, and CSeq_feat_EditHandle::Replace().
|
static |
Repairs non-reciprocal xref pairs in specified seq-entry.
seh | Seq-entry to edit [in] |
Definition at line 926 of file cleanup.cpp.
References CMappedFeat::GetSeq_feat(), CSeq_entry_Handle::GetTSE_Handle(), and RepairXrefs().
|
static |
Rescue pubs from Site-ref features.
seh | Seq-entry to edit |
Definition at line 3457 of file cleanup.cpp.
References CSerialObject::Assign(), b, CNewCleanup_imp::BasicCleanup(), CSeqFeatData_Base::e_Imp, CPub_set_Base::e_Pub, CPubdesc_Base::eReftype_feats, CPubdesc_Base::eReftype_sites, f, CSeq_feat_EditHandle::GetAnnot(), CSeq_annot_Handle::GetCompleteSeq_annot(), IsMinPub(), IsSiteRef(), ITERATE, makeCleanupChange(), MoveOneFeatToPubdesc(), CSeq_annot_EditHandle::Remove(), CSeq_feat_EditHandle::Remove(), CPubdesc_Base::SetPub(), CSeqdesc_Base::SetPub(), CPubdesc_Base::SetReftype(), CNewCleanup_imp::ShouldRemoveAnnot(), ShouldStripPubSerial(), and t.
Referenced by CNewCleanup_imp::x_ExtendedCleanupExtra().
|
static |
Definition at line 567 of file cleanup_author.cpp.
References names.
Referenced by CleanupAuthList().
|
staticprivate |
Definition at line 390 of file cleanup_user_object.cpp.
References CUser_field_Base::C_Data::e_Ints, CUser_field_Base::C_Data::e_Oss, CUser_field_Base::C_Data::e_Reals, CUser_field_Base::C_Data::e_Strs, CUser_field_Base::GetData(), CUser_field_Base::C_Data::GetInts(), CUser_field_Base::GetNum(), CUser_field_Base::C_Data::GetOss(), CUser_field_Base::C_Data::GetReals(), CUser_field_Base::C_Data::GetStrs(), CUser_field_Base::IsSetData(), CUser_field_Base::IsSetNum(), CUser_field_Base::SetNum(), and CUser_field_Base::C_Data::Which().
Referenced by x_CleanupUserField().
|
staticprivate |
Definition at line 370 of file cleanup_user_object.cpp.
References CUser_object::eObjectType_DBLink, CUser_object::GetObjectType(), CUser_object_Base::IsSetData(), CUser_object_Base::SetData(), and val.
|
staticprivate |
Definition at line 125 of file cleanup_user_object.cpp.
References DEFINE_STATIC_ARRAY_MAP(), NStr::eNocase, NStr::Equal(), CUser_field_Base::GetData(), CUser_field_Base::GetLabel(), CObject_id_Base::GetStr(), CUser_object_Base::GetType(), CUser_field_Base::C_Data::IsFields(), CUser_field_Base::IsSetData(), CUser_object_Base::IsSetData(), CUser_field_Base::IsSetLabel(), CUser_object_Base::IsSetType(), CObject_id_Base::IsStr(), CUser_field_Base::C_Data::IsStr(), CUser_field_Base::SetData(), CUser_object_Base::SetData(), and NStr::StartsWith().
|
staticprivate |
Definition at line 276 of file cleanup_user_object.cpp.
References CSubSource::DateFromCollectionDate(), NStr::Equal(), CSubSource::FixDateFormat(), CUser_field_Base::GetData(), CDate::GetDate(), CUser_field_Base::GetLabel(), CDate_Base::GetStd(), CObject_id_Base::GetStr(), CUser_field_Base::C_Data::GetStr(), NStr::IsBlank(), CUser_field_Base::IsSetData(), CDate_std_Base::IsSetDay(), CUser_field_Base::IsSetLabel(), CDate_std_Base::IsSetMonth(), CDate_std_Base::IsSetYear(), CDate_Base::IsStd(), CObject_id_Base::IsStr(), CUser_field_Base::C_Data::IsStr(), CUser_field_Base::SetData(), CUser_object_Base::SetData(), and NStr::ToUpper().
Referenced by s_CleanupStructuredComment().
Definition at line 98 of file cleanup_author.cpp.
References CleanVisString(), NStr::EndsWith(), NStr::Equal(), NStr::eTrunc_Begin, CName_std::ExtractSuffixFromLastName(), NStr::Find(), first(), CName_std::FixSuffix(), CName_std_Base::GetFirst(), CName_std_Base::GetInitials(), CName_std_Base::GetLast(), CName_std_Base::GetMiddle(), CName_std_Base::GetSuffix(), isalpha(), NStr::IsBlank(), islower(), CName_std_Base::IsSetFirst(), CName_std_Base::IsSetFull(), CName_std_Base::IsSetInitials(), CName_std_Base::IsSetLast(), CName_std_Base::IsSetMiddle(), CName_std_Base::IsSetSuffix(), CName_std_Base::IsSetTitle(), isupper(), kEmptyCStr, kEmptyStr, NStr::ReplaceInPlace(), CName_std_Base::ResetFirst(), CName_std_Base::ResetFull(), CName_std_Base::ResetInitials(), CName_std_Base::ResetLast(), CName_std_Base::ResetMiddle(), CName_std_Base::ResetSuffix(), CName_std_Base::ResetTitle(), s_ExtractSuffixFromInitials(), s_FixEtAl(), CName_std_Base::SetFirst(), CName_std_Base::SetInitials(), CName_std_Base::SetLast(), CName_std_Base::SetMiddle(), CName_std_Base::SetSuffix(), toupper(), and NStr::TruncateSpacesInPlace().
|
staticprivate |
Definition at line 177 of file cleanup_user_object.cpp.
References CUtf8::AsUTF8(), data, eEncoding_Ascii, CUser_object::eObjectType_StructuredComment, NStr::Equal(), CComment_set::GetCommentRules(), CUser_field_Base::GetData(), CUser_field_Base::GetLabel(), CUser_object::GetObjectType(), CObject_id_Base::GetStr(), CUser_field_Base::C_Data::GetStr(), CUser_field_Base::IsSetData(), CUser_object_Base::IsSetData(), CUser_field_Base::IsSetLabel(), CObject_id_Base::IsStr(), CUser_field_Base::C_Data::IsStr(), kGenomeAssemblyData, CComment_rule::MakePrefixFromRoot(), CComment_rule::MakeSuffixFromRoot(), CComment_rule::NormalizePrefix(), CComment_rule::ReorderFields(), s_CleanupGenomeAssembly(), s_RemoveEmptyFields(), CUser_field_Base::SetData(), CUser_object_Base::SetData(), CUser_field_Base::SetLabel(), and CUser_field::SetString().
|
staticprivate |
Definition at line 375 of file cleanup_author.cpp.
References _ASSERT, EXTRACTSUFFIXFROMINITIALS, CName_std_Base::IsSetInitials(), CName_std_Base::IsSetSuffix(), NPOS, and CName_std_Base::SetInitials().
Referenced by s_CleanupNameStdBC().
|
staticprivate |
Definition at line 403 of file cleanup_author.cpp.
References NStr::Equal(), CName_std_Base::GetFirst(), CName_std_Base::GetInitials(), CName_std_Base::GetLast(), init(), NStr::IsBlank(), CName_std_Base::IsSetFirst(), CName_std_Base::IsSetInitials(), CName_std_Base::IsSetLast(), CName_std_Base::ResetFirst(), CName_std_Base::ResetInitials(), and CName_std_Base::SetLast().
Referenced by s_CleanupNameStdBC().
|
staticprivate |
Definition at line 1379 of file cleanup.cpp.
References NStr::EqualNocase(), CSeq_feat_Base::GetQual(), CSeq_feat_Base::GetXref(), CSeq_feat_Base::IsSetQual(), and CSeq_feat_Base::IsSetXref().
|
staticprivate |
Definition at line 333 of file cleanup_user_object.cpp.
References CUser_field_Base::C_Data::e_not_set, CUser_object::eObjectType_StructuredComment, CUser_object::GetObjectType(), NStr::IsBlank(), CUser_object_Base::IsSetData(), CUser_object_Base::SetData(), and val.
Referenced by s_CleanupStructuredComment().
|
staticprivate |
Definition at line 1400 of file cleanup.cpp.
References append(), NStr::EqualNocase(), NStr::IsBlank(), CSeq_feat_Base::IsSetQual(), CSeq_feat_Base::IsSetXref(), SetProteinName(), CSeq_feat_Base::SetQual(), and CSeq_feat_Base::SetXref().
Referenced by SetProteinName().
Extends a location to the specificed position.
loc | Seq-loc to extend |
pos | position of new end of location |
scope | Scope in which to look for sequences |
Definition at line 1038 of file cleanup.cpp.
References CSeq_loc::Assign(), eExtreme_Positional, CSeq_loc::fMerge_AbuttingOnly, CSeq_loc::fSort, CSeq_loc::GetId(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeq_loc::IsPartialStart(), CSeq_loc::IsPartialStop(), Seq_loc_Add(), CSeq_loc::SetPartialStart(), and CSeq_loc::SetPartialStop().
Referenced by ExtendToGapsOrEnds().
Translates coding region and selects best frame (without stops, or longest)
cds | Coding region Seq-feat to edit |
scope | Scope in which to find coding region |
Definition at line 1194 of file cleanup.cpp.
References CCdregion_Base::eFrame_not_set, CSeqTranslator::FindBestFrame(), CSeqFeatData_Base::GetCdregion(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CCdregion_Base::IsSetFrame(), and CSeq_feat_Base::SetData().
Referenced by WGSCleanup().
1.
Set the partial flags when the CDS is partial and codon_start is 2 or 3 2. Make the CDS partial at the 5' end if there is no start codon 3. Make the CDS partial at the 3' end if there is no stop codon
cds | Coding region Seq-feat to edit |
scope | Scope in which to find coding region and coding region's protein product sequence |
Definition at line 1513 of file cleanup.cpp.
References AdjustFeaturePartialFlagForLocation(), eExtreme_Biological, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_one, NStr::EndsWith(), CSeqFeatData_Base::GetCdregion(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CSeq_feat_Base::GetLocation(), NStr::IsBlank(), CSeq_loc::IsPartialStart(), CSeq_loc::IsPartialStop(), CCdregion_Base::IsSetFrame(), CSeq_feat_Base::SetLocation(), NStr::StartsWith(), and CSeqTranslator::Translate().
Referenced by WGSCleanup().
|
static |
utility function for setting code break location given offset pos is the position of the amino acid where the translation exception occurs (starts with 1)
Definition at line 4757 of file cleanup.cpp.
References CSeq_loc_Base::e_Packed_int, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_one, CCdregion_Base::eFrame_three, CCdregion_Base::eFrame_two, eNa_strand_minus, CPacked_seqint_Base::Get(), CSeqFeatData_Base::GetCdregion(), CSeq_feat_Base::GetData(), CCdregion_Base::GetFrame(), CSeq_feat_Base::GetLocation(), CSeq_loc_Base::GetPacked_int(), CSeqFeatData_Base::IsCdregion(), CSeq_feat_Base::IsSetData(), CCdregion_Base::IsSetFrame(), len, offset, CCode_break_Base::ResetLoc(), CCode_break_Base::SetLoc(), CSeq_loc::SetPacked_int(), tmp, and CSeq_loc_Base::Which().
Set feature partial based on feature location.
Definition at line 1633 of file cleanup.cpp.
References f, CSeq_loc_CI::GetFuzzFrom(), and CSeq_loc_CI::GetFuzzTo().
Referenced by MoveFeatToProtein().
|
static |
Chooses best frame based on location 1.
If the location is 5' complete, then the frame must be one. 2. If the location is 5' partial and 3' complete, select a frame using the value of the location length modulo 3.
cdregion | Coding Region in which to set frame |
loc | Location to use for setting frame |
scope | Scope in which to find location sequence(s) |
Definition at line 1253 of file cleanup.cpp.
References CCdregion_Base::eFrame_not_set, CCdregion_Base::GetFrame(), CCdregion_Base::IsSetFrame(), and CCdregion_Base::SetFrame().
Referenced by CkCdRegion(), CNewCleanup_imp::ImpFeatBC(), and CCleanupApp::x_FixCDS().
|
static |
Definition at line 1211 of file cleanup.cpp.
References _ASSERT, eExtreme_Biological, CCdregion_Base::eFrame_not_set, CCdregion_Base::eFrame_one, CCdregion_Base::eFrame_three, CCdregion_Base::eFrame_two, GetLength(), CSeq_loc::IsPartialStart(), and CSeq_loc::IsPartialStop().
Set partialness of gene to match longest feature contained in gene.
gene | Seq-feat to edit |
scope | Scope in which to find gene |
Definition at line 1739 of file cleanup.cpp.
References Compare(), CopyFeaturePartials(), eContains, eSame, fCompareOverlapping, CScope::GetBioseqHandle(), CSeq_feat_Handle::GetData(), GetLength(), CSeq_feat_Base::GetLocation(), CMappedFeat::GetLocation(), CMappedFeat::GetSeq_feat(), CSeqFeatData_Base::IsGene(), len, and CConstRef< C, Locker >::Reset().
Referenced by WGSCleanup().
|
static |
Sets genetic codes for coding regions on Bioseq-Handle.
Bioseq-Handle | to examine |
Definition at line 2122 of file cleanup.cpp.
References CSerialObject::Assign(), CSeqFeatData_Base::e_Cdregion, CSeqdesc_Base::e_Source, CSeqFeatData_Base::GetCdregion(), CCdregion_Base::GetCode(), CSeq_feat_Base::GetData(), CBioSource::GetGenCode(), CGenetic_code::GetId(), CBioSource_Base::GetOrg(), COrg_ref_Base::GetOrgname(), CMappedFeat::GetOriginalFeature(), CSeqdesc_Base::GetSource(), CSeq_feat::HasExceptionText(), CBioseq_Handle::IsNa(), CCdregion_Base::IsSetCode(), CSeq_feat_EditHandle::Replace(), CCdregion_Base::ResetCode(), CCdregion_Base::SetCode(), and CSeq_feat_Base::SetData().
Referenced by CNewCleanup_imp::SetGeneticCode(), WGSCleanup(), and CCleanupApp::x_FixCDS().
|
static |
Sets MolInfo::biomol for a sequence.
seq | Bioseq to edit |
biomol | biomol value to set |
Definition at line 1802 of file cleanup.cpp.
References CBioseq_EditHandle::AddSeqdesc(), CSeqdesc_Base::e_Molinfo, CMolInfo_Base::GetBiomol(), CBioseq_Handle::GetEditHandle(), CSeqdesc_Base::GetMolinfo(), CMolInfo_Base::IsSetTech(), CMolInfo_Base::SetBiomol(), and CSeqdesc_Base::SetMolinfo().
|
static |
Sets MolInfo::tech for a sequence.
seq | Bioseq to edit |
tech | tech value to set |
Definition at line 1776 of file cleanup.cpp.
References CBioseq_EditHandle::AddSeqdesc(), CSeqdesc_Base::e_Molinfo, CMolInfo_Base::eBiomol_peptide, CBioseq_Handle::GetEditHandle(), CBioseq_Handle::GetInst(), CSeqdesc_Base::GetMolinfo(), CMolInfo_Base::GetTech(), CBioseq_Handle::IsAa(), CBioseq_Handle::IsSetInst(), CSeq_inst_Base::IsSetMol(), CMolInfo_Base::IsSetTech(), CMolInfo_Base::SetBiomol(), CSeqdesc_Base::SetMolinfo(), and CMolInfo_Base::SetTech().
Referenced by CNewCleanup_imp::CdRegionEC().
Definition at line 1358 of file cleanup.cpp.
References NStr::EqualNocase(), CSeq_feat_Base::GetData(), CSeqFeatData_Base::GetRna(), CSeqFeatData_Base::IsRna(), CSeq_feat_Base::IsSetData(), CRNA_ref_Base::IsSetExt(), CSeq_feat_Base::IsSetQual(), CSeq_feat_Base::SetData(), and CSeq_feat_Base::SetQual().
Referenced by WGSCleanup().
Definition at line 1345 of file cleanup.cpp.
References append(), CProt_ref_Base::GetName(), NStr::IsBlank(), CProt_ref_Base::IsSetName(), and CProt_ref_Base::SetName().
Referenced by s_SetProductOnFeat(), SetProteinName(), WGSCleanup(), CCleanupApp::x_FixCDS(), and CNewCleanup_imp::x_ProtGBQualBC().
|
static |
Definition at line 1425 of file cleanup.cpp.
References AddProteinFeature(), append(), CSerialObject::Assign(), CSeqFeatData::eSubtype_prot, CScope::GetBioseqHandle(), CMappedFeat::GetOriginalFeature(), CSeq_feat_Base::GetProduct(), CMappedFeat::GetSeq_feat_Handle(), CSeq_feat_Base::IsSetProduct(), CSeq_feat_Base::IsSetXref(), NON_CONST_ITERATE, prot, CSeq_feat_EditHandle::Replace(), s_SetProductOnFeat(), CSeq_feat_Base::SetData(), SetProteinName(), and CSeq_feat_Base::SetXref().
void CCleanup::SetScope | ( | CScope * | scope | ) |
Definition at line 108 of file cleanup.cpp.
References CScope::AddScope(), CObjectManager::GetInstance(), m_Scope, and CRef< C, Locker >::Reset().
Referenced by CAsnvalThreadState::ProcessSeqAnnot(), CAsnvalThreadState::ProcessSeqEntry(), CAsnvalThreadState::ProcessSeqFeat(), CAsnvalThreadState::ProcessSeqSubmit(), and CAsnvalThreadState::ReadClassMember().
Definition at line 4016 of file cleanup.cpp.
References FIELD_IS_SET, GET_FIELD, CBioseq_Base::GetId(), ITERATE, NCBI_SEQID, and CSeq_id_Base::Which().
Referenced by RescueSiteRefPubs(), and CNewCleanup_imp::SetGlobalFlags().
|
static |
Looks up Org-refs in the Seq-entry.
seh | Seq-entry to edit |
Definition at line 1972 of file cleanup.cpp.
References CSerialObject::Assign(), CSeqFeatData_Base::e_Biosrc, CSeqFeatData_Base::GetBiosrc(), CSeq_entry_Handle::GetCompleteSeq_entry(), CSeq_feat_Handle::GetData(), CBioSource_Base::GetOrg(), CSeq_feat_Handle::GetOriginalSeq_feat(), CSeq_entry_Handle::GetScope(), CScope::GetSeq_featHandle(), GetSourceDescriptors(), CBioSource_Base::IsSetOrg(), CSeq_feat_EditHandle::Replace(), CTaxon3::SendOrgRefList(), CSeq_feat_Base::SetData(), CBioSource_Base::SetOrg(), CSeqdesc_Base::SetSource(), and CTaxon3::yes.
|
static |
Update EC numbers.
ec_num_list | Prot-ref ec number list to clean |
Definition at line 1663 of file cleanup.cpp.
References CleanVisStringJunk(), CProt_ref::eEC_replaced, CProt_ref::GetECNumberReplacement(), CProt_ref::GetECNumberStatus(), NStr::IsBlank(), CProt_ref::IsECNumberSplit(), and NON_CONST_ITERATE.
Referenced by FixECNumbers(), and CNewCleanup_imp::x_CleanupECNumberListEC().
|
static |
Definition at line 4974 of file cleanup.cpp.
References x_ValidAminoAcid().
Referenced by CkQualPosSeqaa(), and GetQualValueAa().
|
static |
Performs WGS specific cleanup.
entry | Seq-entry to edit |
Definition at line 2653 of file cleanup.cpp.
References AddProtein(), AddProteinFeature(), AdjustForCDSPartials(), CSerialObject::Assign(), CopyFeaturePartials(), CSeqFeatData_Base::e_Cdregion, CSeqFeatData_Base::e_Gene, CSeqFeatData_Base::e_Rna, eExtreme_Biological, CSeq_inst_Base::eMol_na, eNa_strand_minus, NStr::Equal(), NStr::EqualNocase(), CSeqFeatData::eSubtype_prot, CSeqFeatData::eSubtype_rRNA, ExpandGeneToIncludeChildren(), ExtendedCleanup(), CAliasBase< TPrim >::Get(), CScope::GetBioseqHandle(), CScope::GetBioseqHandleFromTSE(), CBioseq_Handle::GetCompleteBioseq(), CSeq_feat_Base::GetData(), CSeq_loc::GetId(), CBioseq_Handle::GetInst(), CSeq_data_Base::GetIupacaa(), GetLength(), CSeq_feat_Base::GetLocation(), GetmRNAforCDS(), GetNewProtId(), CSeq_feat_Base::GetProduct(), GetProteinName(), CSeq_feat_Base::GetQual(), CSeqFeatData_Base::GetRna(), CRNA_ref::GetRnaProductName(), CSeq_entry_Handle::GetScope(), CSeq_inst_Base::GetSeq_data(), CScope::GetSeq_featHandle(), CSeq_loc::GetStart(), CSeq_loc::GetStop(), CSeq_loc::GetStrand(), CSeqFeatData::GetSubtype(), CSeq_entry_Handle::GetTopLevelEntry(), CSeq_entry_Handle::GetTSE_Handle(), NStr::IsBlank(), IsGeneralIdProtPresent(), CSeq_data_Base::IsIupacaa(), IsPseudo(), CSeq_feat_Base::IsSetData(), CBioseq_Handle::IsSetInst(), CSeq_feat_Base::IsSetProduct(), CSeq_feat_Base::IsSetQual(), CSeq_inst_Base::IsSetSeq_data(), CSeq_loc::IsSetStrand(), NormalizeDescriptorOrder(), prot, RemovePseudoProduct(), CSeq_feat_EditHandle::Replace(), RetranslateCDS(), s_CleanupIsShortrRNA(), SetBestFrame(), SetCDSPartialsByFrameAndTranslation(), SetGenePartialByLongestContainedFeature(), SetGeneticCodes(), CBioseq_EditHandle::SetInst(), CSeq_feat_Base::SetLocation(), SetMrnaName(), CSeq_feat_Base::SetProduct(), SetProteinName(), and CSeq_inst_Base::SetSeq_data().
Referenced by CTable2AsnValidator::Cleanup(), and CCleanupApp::x_ProcessXOptions().
|
staticprivate |
Definition at line 2906 of file cleanup.cpp.
References CSerialObject::Assign(), CSeq_entry_Handle::GetScope(), IsPseudo(), CSeq_feat_EditHandle::Replace(), x_AddLowQualityException(), and x_HasShortIntron().
Definition at line 2888 of file cleanup.cpp.
References NStr::Find(), CSeq_feat_Base::GetExcept_text(), NStr::IsBlank(), CSeq_feat_Base::IsSetExcept(), CSeq_feat_Base::IsSetExcept_text(), kLowQualitySequence, CSeq_feat_Base::SetExcept(), and CSeq_feat_Base::SetExcept_text().
Referenced by AddLowQualityException(), and x_AddLowQualityException().
|
staticprivate |
Definition at line 74 of file cleanup_user_object.cpp.
References Asn2gnbkCompressSpaces(), CleanupUserObject(), CleanVisString(), CUser_field_Base::C_Data::e_Fields, CUser_field_Base::C_Data::e_Object, CUser_field_Base::C_Data::e_Objects, CUser_field_Base::C_Data::e_Str, CUser_field_Base::C_Data::e_Strs, CUser_field_Base::GetData(), CUser_field_Base::GetLabel(), CUser_field_Base::IsSetData(), CUser_field_Base::IsSetLabel(), CObject_id_Base::IsStr(), s_AddNumToUserField(), CUser_field_Base::SetData(), CUser_field_Base::SetLabel(), str(), and CUser_field_Base::C_Data::Which().
Definition at line 2841 of file cleanup.cpp.
References abs, eNa_strand_minus, eNa_strand_plus, CRange_Base::GetFrom(), CSeq_loc_CI::GetRange(), CSeq_loc_CI::GetStrand(), CRange_Base::GetTo(), CSeq_loc_CI::IsEmpty(), and CSeq_loc_CI::IsSetStrand().
Referenced by x_AddLowQualityException().
Definition at line 3670 of file cleanup.cpp.
References a, COrgName_Base::GetDiv(), COrgName_Base::GetGcode(), COrgName_Base::GetLineage(), COrgName_Base::GetMgcode(), COrgName_Base::GetMod(), COrgName_Base::IsSetDiv(), COrgName_Base::IsSetGcode(), COrgName_Base::IsSetLineage(), COrgName_Base::IsSetMgcode(), COrgName_Base::IsSetMod(), ITERATE, COrgName_Base::SetDiv(), COrgName_Base::SetGcode(), COrgName_Base::SetLineage(), COrgName_Base::SetMgcode(), and COrgName_Base::SetMod().
Referenced by x_MergeDupOrgRefs().
Definition at line 3726 of file cleanup.cpp.
References a, COrg_ref_Base::GetDb(), COrg_ref_Base::GetMod(), COrg_ref_Base::GetOrgname(), COrg_ref_Base::GetSyn(), HasMod(), COrg_ref_Base::IsSetDb(), COrg_ref_Base::IsSetMod(), COrg_ref_Base::IsSetOrgname(), COrg_ref_Base::IsSetSyn(), ITERATE, COrg_ref_Base::SetDb(), COrg_ref_Base::SetMod(), COrg_ref_Base::SetOrgname(), COrg_ref_Base::SetSyn(), and x_MergeDupOrgNames().
Referenced by MergeDupBioSources().
Definition at line 613 of file cleanup.hpp.
Referenced by CCleanup(), and SetScope().