1 #ifndef CLEANUP___CLEANUP__HPP
2 #define CLEANUP___CLEANUP__HPP
73 eClean_NoReporting = 0x1,
74 eClean_GpipeMode = 0x2,
75 eClean_NoNcbiUserObjects = 0x4,
76 eClean_SyncGenCodes = 0x8,
77 eClean_NoProteinTitles = 0x10,
78 eClean_KeepTopSet = 0x20,
79 eClean_KeepSingleSeqSet = 0x40,
80 eClean_InHugeSeqSet = 0x80,
89 CCleanup(
CScope* scope =
nullptr, EScopeOptions scope_handling = eScope_Copy);
95 void SetScope(
CScope* scope);
138 static bool ShouldStripPubSerial(
const CBioseq& bs);
175 static bool RemoveNonsuppressingGeneXrefs(
CSeq_feat&
f);
257 static bool ExtendStopPosition(
CSeq_feat&
f,
const CSeq_feat* cdregion,
size_t extension = 0);
283 static bool SetCDSPartialsByFrameAndTranslation(
CSeq_feat& cds,
CScope& scope);
287 static bool ClearInternalPartials(
CSeq_loc& loc,
bool is_first =
true,
bool is_last =
true);
288 static bool ClearInternalPartials(
CSeq_loc_mix& mix,
bool is_first =
true,
bool is_last =
true);
289 static bool ClearInternalPartials(
CPacked_seqint& pint,
bool is_first =
true,
bool is_last =
true);
314 static bool SetGenePartialByLongestContainedFeature(
CSeq_feat& gene,
CScope& scope);
318 static void SetMrnaName(
CSeq_feat& mrna,
const string& protein_name);
338 static bool AddMissingMolInfo(
CBioseq& seq,
bool is_product);
348 static bool RemoveNcbiCleanupObject(
CSeq_entry &seq_entry);
351 static void AddNcbiCleanupObject(
int ncbi_cleanup_version,
CSeq_descr& descr);
367 static bool AddPartialToProteinTitle(
CBioseq &bioseq);
386 static bool WGSCleanup(
CSeq_entry_Handle entry,
bool instantiate_missing_proteins =
true,
Uint4 options = 0,
387 bool run_extended_cleanup =
true);
399 static bool NormalizeDescriptorOrder(
CSeq_descr& descr);
428 vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
429 vector<string>& published_labels, vector<string>& unpublished_labels);
436 static vector<CConstRef<CPub> > GetCitationList(
CBioseq_Handle bsh);
439 static bool RemoveDuplicatePubs(
CSeq_descr& descr);
442 static bool OkToPromoteNpPub(
const CPubdesc& pd);
445 static bool OkToPromoteNpPub(
const CBioseq&
b);
460 static bool IsMinPub(
const CPubdesc& pd,
bool is_refseq_prot);
466 static bool RemoveDupBioSource(
CSeq_descr& descr);
472 static bool MergeDupBioSources(
CSeq_descr& descr);
518 static bool ParseCodeBreak(
const CSeq_feat& feat,
566 static bool FixRNAEditingCodingRegion(
CSeq_feat& cds);
591 static vector<TFeatGenePair> GetNormalizableGeneQualPairs(
CBioseq_Handle bsh);
598 static bool CleanupAuthor(
CAuthor& author,
bool fix_initials =
true);
599 static bool CleanupAuthList(
CAuth_list& al,
bool fix_initials =
true);
601 static bool CleanupAffil(
CAffil& af);
610 static char ValidAminoAcid(string_view abbrev);
615 static bool x_CleanupUserField(
CUser_field& field);
620 static bool x_HasShortIntron(
const CSeq_loc& loc,
size_t min_len = 11);
621 static bool x_AddLowQualityException(
CSeq_feat& feat);
624 static bool s_IsProductOnFeat(
const CSeq_feat& cds);
625 static void s_SetProductOnFeat(
CSeq_feat& feat,
const string& protein_name,
bool append);
628 static bool s_CleanupStructuredComment(
CUser_object& obj);
632 static bool s_AddNumToUserField(
CUser_field &field);
634 static bool s_CleanupNameStdBC(
CName_std& name,
bool fix_initials);
635 static void s_ExtractSuffixFromInitials(
CName_std& name);
User-defined methods of the data storage class.
static CRef< CSeq_loc > ExtendToStopCodon(CRef< CSeq_feat > feat, CScope *scope)
@Affil.hpp User-defined methods of the data storage class.
@Auth_list.hpp User-defined methods of the data storage class.
CBioseq_set_EditHandle –.
CCleanup(const CCleanup &)=delete
static bool s_Flatten(CPub_equiv &pub_equiv)
pair< CSeq_feat_Handle, CSeq_feat_Handle > TFeatGenePair
CCleanup & operator=(const CCleanup &)=delete
@Name_std.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static const struct name_t names[]
static void DLIST_NAME() append(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
#define NCBI_CLEANUP_EXPORT
EMol
molecule class in living organism
@ eMol_not_set
> cdna = rna
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
CRef< CSeq_loc > SeqLocExtend(const CSeq_loc &loc, size_t pos, CScope *scope)