NCBI C++ ToolKit
newcleanupp.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef NEWCLEANUP__HPP
2 #define NEWCLEANUP__HPP
3 
4 /*
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Robert Smith, Jonathan Kans, Michael Kornbluh
30 *
31 * File Description:
32 * Basic and Extended Cleanup of CSeq_entries.
33 *
34 * ===========================================================================
35 */
36 
37 #include <stack>
38 
42 #include <objects/seq/MolInfo.hpp>
44 
46 
49 
50 class CSeq_entry;
51 class CSeq_submit;
52 class CBioseq;
53 class CBioseq_set;
54 class CSeq_annot;
55 class CSeq_feat;
56 class CSeqFeatData;
57 class CSeq_descr;
58 class CSeqdesc;
59 class CSeq_loc;
60 class CSeq_loc_mix;
61 class CGene_ref;
62 class CProt_ref;
63 class CRNA_ref;
64 class CImp_feat;
65 class CGb_qual;
66 class CDbtag;
67 class CUser_field;
68 class CUser_object;
69 class CObject_id;
70 class CGB_block;
71 class CMolInfo;
72 class CEMBL_block;
73 class CPubdesc;
74 class CPub_equiv;
75 class CPub;
76 class CCit_gen;
77 class CCit_sub;
78 class CCit_art;
79 class CCit_book;
80 class CCit_pat;
81 class CCit_let;
82 class CId_pat;
83 class CCit_proc;
84 class CCit_jour;
85 class CPubMedId;
86 class CAuth_list;
87 class CAuthor;
88 class CAffil;
89 class CPerson_id;
90 class CName_std;
91 class CBioSource;
92 class COrg_ref;
93 class COrgName;
94 class CSubSource;
95 class CMolInfo;
96 class CCdregion;
97 class CDate;
98 class CDate_std;
99 class CImprint;
100 class CSubmit_block;
101 class CSeq_align;
102 class CDense_diag;
103 class CDense_seg;
104 class CStd_seg;
105 class CMedline_entry;
106 class CPub_set;
107 class CTrna_ext;
108 class CPCRPrimerSet;
109 class CPCRReactionSet;
110 
111 class CSeq_entry_Handle;
112 class CBioseq_Handle;
113 class CBioseq_set_Handle;
114 class CSeq_annot_Handle;
115 class CSeq_feat_Handle;
116 
117 class CObjectManager;
118 class CScope;
119 
121 {
122 public:
123 
124  static const int NCBI_CLEANUP_VERSION = 1;
125 
126  // some cleanup functions will return a value telling you whether
127  // to erase the cleaned value ( or whatever action may be
128  // required ).
129  enum EAction {
132  };
133 
134  // Constructor
135  CNewCleanup_imp (CRef<CCleanupChange> changes, Uint4 options = 0);
136 
137  // Destructor
138  virtual ~CNewCleanup_imp ();
139 
140  /// Main methods
141 
142  void SetScope(CScope& scope) { m_Scope.Reset(&scope); }
143 
144  /// Basic Cleanup methods
145 
146  void BasicCleanupSeqEntry (
147  CSeq_entry& se
148  );
149 
150  void BasicCleanupSeqSubmit (
151  CSeq_submit& ss
152  );
153 
155 
156  void BasicCleanupSeqAnnot (
157  CSeq_annot& sa
158  );
159 
160  void BasicCleanupBioseq (
161  CBioseq& bs
162  );
163 
164  void BasicCleanupBioseqSet (
165  CBioseq_set& bss
166  );
167 
168  void BasicCleanupSeqFeat (
169  CSeq_feat& sf
170  );
171 
172  void BasicCleanupBioSource (
173  CBioSource& src
174  );
175 
177  CSeq_entry_Handle& seh
178  );
179 
181  CBioseq_Handle& bsh
182  );
183 
185  CBioseq_set_Handle& bssh
186  );
187 
189  CSeq_annot_Handle& sah
190  );
191 
193  CSeq_feat_Handle& sfh
194  );
195 
196  void BasicCleanup(CPubdesc& pd, bool strip_serial);
197 
198  void BasicCleanup(CSeqdesc& desc);
199 
200  /// Extended Cleanup methods
201 
203  CSeq_entry& se
204  );
205 
207  CSeq_submit& ss
208  );
209 
211  CSeq_annot& sa
212  );
213 
215  CSeq_entry_Handle& seh
216  );
217 
219 
220  void ExtendedCleanup(CBioSource& biosrc);
221 
222  static bool ShouldRemoveAnnot(const CSeq_annot& annot);
223 
224  static void AddNcbiCleanupObject(CSeq_descr& descr);
225 
226 private:
227 
228  // many more methods and variables ...
229 
230  // We do not include the usual "x_" prefix for private functions
231  // because we want to be able to distinguish between higher-level
232  // functions like those just below, and the lower-level
233  // functions like those farther below.
234 
236 
237  void EnteringEntry(CSeq_entry& se);
238  void LeavingEntry (CSeq_entry& se);
239 
240  void SetGeneticCode (CBioseq& bs);
241 
242  void SubmitblockBC (CSubmit_block& sb);
243 
244  void SeqsetBC (CBioseq_set& bss);
245  void ProtSeqBC (CBioseq& bs);
246 
247  void SeqIdBC( CSeq_id &seq_id );
248 
249  void GBblockOriginBC( string& str);
250  void GBblockBC (CGB_block& gbk);
251  void EMBLblockBC (CEMBL_block& emb);
252 
253  void BiosourceFeatBC (CBioSource& biosrc, CSeq_feat & seqfeat);
254  void BiosourceBC (CBioSource& bsc);
255  void OrgrefModBC (string& str);
256  void OrgrefBC (COrg_ref& org);
257  void x_MovedNamedValuesInStrain(COrgName& orgname);
258  void x_MovedNamedValuesInStrain(COrgName& orgname, COrgMod::ESubtype stype, const string& prefix);
259  void OrgnameBC (COrgName& onm, COrg_ref &org_ref);
260  void OrgmodBC (COrgMod& omd);
261 
262  void DbtagBC (CDbtag& dbt);
263 
264  void PubdescBC (CPubdesc& pub);
265  void PubSetBC( CPub_set &pub_set );
266 
267  void ImpFeatBC( CSeq_feat& sf );
268 
269  void SiteFeatBC( const CSeqFeatData::ESite &site, CSeq_feat& sf );
270 
271  void SeqLocBC( CSeq_loc &loc );
272  void ConvertSeqLocWholeToInt( CSeq_loc &loc );
273  void SeqLocMixBC( CSeq_loc_mix & loc_mix );
274 
275  void SeqfeatBC (CSeq_feat& sf);
276 
277  void GBQualBC (CGb_qual& gbq);
278  void Except_textBC (string& except_text);
279 
280  void GenerefBC (CGene_ref& gr);
281  void ProtNameBC ( std::string & str );
282  void ProtActivityBC ( std::string & str );
283  void ProtrefBC (CProt_ref& pr);
284  void RnarefBC (CRNA_ref& rr);
285  void RnarefGenBC(CRNA_ref& rr);
286 
287  void GeneFeatBC (CGene_ref& gr, CSeq_feat& sf);
288  void ProtFeatfBC (CProt_ref& pr, CSeq_feat& sf);
289  void PostProtFeatfBC (CProt_ref& pr);
290  void RnaFeatBC (CRNA_ref& rr, CSeq_feat& sf);
291  void CdregionFeatBC (CCdregion& cds, CSeq_feat& seqfeat);
292 
293  static bool x_IsCommentRedundantWithEC(const CSeq_feat& seqfeat, CScope& scope);
294 
295  void DeltaExtBC( CDelta_ext & delta_ext, CSeq_inst &seq_inst );
296 
297  void UserObjectBC( CUser_object &user_object );
298 
299  void PCRReactionSetBC( CPCRReactionSet &pcr_reaction_set );
300  void SubSourceListBC(CBioSource& biosrc);
301 
302  void MolInfoBC( CMolInfo &molinfo );
303  void CreateMissingMolInfo( CBioseq& seq );
304 
305  static bool IsInternalTranscribedSpacer(const string& name);
306  static bool TranslateITSName( string &in_out_name );
307 
308 
309  // Extended Cleanup functions
310  void BioSourceEC ( CBioSource& biosrc );
311  void AddProteinTitles (CBioseq& seq);
312  void ProtRefEC( CProt_ref& pr);
313  void CdRegionEC( CSeq_feat& sf);
314  bool x_FixParentPartials(const CSeq_feat& sf, CSeq_feat& parent);
318  static bool IsSyntheticConstruct(const CBioSource& src);
319 
320  void MoveDbxrefs(CSeq_feat& sf);
321  void MoveStandardName(CSeq_feat& sf);
322  void ResynchProteinPartials ( CSeq_feat& feat );
323  void ResynchPeptidePartials( CBioseq& seq );
324  void x_SetPartialsForProtein(CBioseq& prot, bool partial5, bool partial3, bool feat_partial);
325  void RemoveBadProteinTitle(CBioseq& seq);
326  void MoveCitationQuals(CBioseq& seq);
327  void x_RemoveUnseenTitles(CSeq_descr& seq_descr);
328  void KeepLatestDateDesc(CSeq_descr & seq_descr);
330  void x_MergeDupBioSources(CSeq_descr & seq_descr);
331 
332  // void XxxxxxBC (Cxxxxx& xxx);
333 
334  // Prohibit copy constructor & assignment operator
337 
338 private:
339 
340  // data structures used for post-processing
341 
342  // recorded by x_NotePubdescOrAnnotPubs
343  typedef std::map<int, int> TMuidToPmidMap;
345  // recorded by x_RememberMuidThatMightBeConvertibleToPmid
346  typedef std::vector< CRef<CPub> > TMuidPubContainer;
348  // m_OldLabelToPubMap and m_PubToNewPubLabelMap work together.
349  // They supply "old_label -> node" and "node -> new_label", respectively,
350  // so together we can get a mapping of "old_label -> new_label".
351  // m_OldLabelToPubMap is a multimap because a node's address may change as we do our cleaning, and
352  // at least one should remain so we can make the "old_label -> new_label" connection.
353  typedef std::multimap< string, CRef<CPub> > TOldLabelToPubMap;
355  // remember label changes
356  typedef std::map< CRef<CPub>, string > TPubToNewPubLabelMap;
358  // remember all Seq-feat CPubs so we remember to change them later
359  typedef std::vector< CRef<CPub> > TSeqFeatCitPubContainer;
361  // note all Pubdesc/annot cit-gen labels
362  typedef std::vector<string> TPubdescCitGenLabelVec;
364 
365  enum EGBQualOpt {
368  };
369 
370  // Gb_qual cleanup.
372  void x_CleanSeqFeatQuals(CSeq_feat& sf);
373  EAction GBQualSeqFeatBC(CGb_qual& gbq, CSeq_feat& seqfeat);
374 
375  void x_AddNcbiCleanupObject( CSeq_entry &seq_entry );
376 
377 
378  // for rpt_unit and replace GenBank qualifiers
379  bool x_CleanupRptUnit(CGb_qual& gbq);
380  bool x_CleanupRptUnitRange(string& val);
381  static bool x_IsBaseRange(const string& val);
382  static bool x_IsDotBaseRange(const string& val);
383  static bool x_IsHyphenBaseRange(const string& val);
384 
385 
389  EAction x_GeneGBQualBC( CGene_ref& gene, const CGb_qual& gb_qual );
390  EAction x_SeqFeatCDSGBQualBC(CSeq_feat& feat, CCdregion& cds, const CGb_qual& gb_qual);
391  EAction x_HandleTrnaProductGBQual(CSeq_feat& feat, CRNA_ref& rna, const string& product);
392  EAction x_HandleStandardNameRnaGBQual(CSeq_feat& feat, CRNA_ref& rna, const string& standard_name);
394  EAction x_ProtGBQualBC(CProt_ref& prot, const CGb_qual& gb_qual, EGBQualOpt opt );
396  void x_CleanupOldName(COrg_ref& org);
397  void x_CleanupOrgModNoteEC(COrg_ref& org);
398  void x_AddToComment(CSeq_feat& feat, const string& comment);
399 
400  // publication-related cleanup
401 // void x_FlattenPubEquiv(CPub_equiv& pe);
402 
403  // Date-related
404  void x_DateStdBC( CDate_std& date );
405 
406  void x_AddReplaceQual(CSeq_feat& feat, const string& str);
407 
408  void x_SeqIntervalBC( CSeq_interval & seq_interval );
409  void x_BothStrandBC( CSeq_loc &loc );
410  void x_BothStrandBC( CSeq_interval & seq_interval );
411 
412  void x_SplitDbtag( CDbtag &dbt, vector< CRef< CDbtag > > & out_new_dbtags );
413 
414  void x_SeqFeatTRNABC( CSeq_feat& feat, CTrna_ext & tRNA );
415 
416  // modernize PCR Primer
417  void x_ModernizePCRPrimers( CBioSource &biosrc );
418 
419  void x_CleanupOrgModAndSubSourceOther( COrgName &orgname, CBioSource &biosrc );
420 
421  void x_OrgnameModBC( COrgName &orgname, const string &org_ref_common );
422 
423  void x_SubSourceBC( CSubSource & subsrc );
424  void x_OrgModBC( COrgMod & orgmod );
425 
426  void x_FixUnsetMolFromBiomol( CMolInfo& molinfo, CBioseq &bioseq );
427  void FixUnsetMolFromBiomol(CMolInfo::TBiomol biomol, CBioseq& bioseq);
428 
429  void x_AddPartialToProteinTitle( CBioseq &bioseq );
430 
431  string x_ExtractSatelliteFromComment( string &comment );
432 
433  void x_RRNANameBC( string &name );
434 
435  void x_CleanupECNumber( string &ec_num );
436  void x_CleanupECNumberList( CProt_ref::TEc & ec_num_list );
437  void x_CleanupECNumberListEC( CProt_ref::TEc & ec_num_list );
438 
439  void x_CleanupAndRepairInference( string &inference );
440 
441  void x_MendSatelliteQualifier( string &val );
442 
443  // e.g. if ends with ",..", turn into "..."
444  void x_FixUpEllipsis( string &str );
445 
446  void x_RemoveFlankingQuotes( string &val );
447 
448  void x_MoveCdregionXrefsToProt (CCdregion& cds, CSeq_feat& seqfeat);
449  bool x_InGpsGenomic( const CSeq_feat& seqfeat );
450 
451  void x_AddNonCopiedQual(
452  vector< CRef< CGb_qual > > &out_quals,
453  const char *qual,
454  const char *val );
455 
456  void x_GBQualToOrgRef( COrg_ref &org, CSeq_feat &seqfeat );
457  void x_MoveSeqdescOrgToSourceOrg( CSeqdesc &seqdesc );
458  void x_MoveSeqfeatOrgToSourceOrg( CSeq_feat &seqfeat );
459 
461 
462  // string cleanup funcs
466  bool x_CompressSpaces( string &str );
472 
473  void x_PostSeqFeat( CSeq_feat& seq_feat );
474  void x_PostOrgRef( COrg_ref& org );
475  void x_PostBiosource( CBioSource& biosrc );
476 
477  void x_TranslateITSNameAndFlag( string &in_out_name ) ;
478 
479  void x_PCRPrimerSetBC( CPCRPrimerSet &primer_set );
480 
481  void x_CopyGBBlockDivToOrgnameDiv( CSeq_entry &seq_entry);
482 
484 
485  // After we've traversed the hierarchy of objects, there may be some
486  // processing that can only be done after the traversal is complete.
487  // This function does that processing.
488  void x_PostProcessing(void);
489 
490  // after cleaning bioseq and bioseq-set, need to clear empty descriptors
491  void x_ClearEmptyDescr( CBioseq_set& bioseq_set );
492  void x_ClearEmptyDescr( CBioseq& bioseq );
493 
494  // removes single-strandedness from non-viral nucleotide sequences
495  void x_RemoveSingleStrand( CBioseq& bioseq );
496 
497  // functions that prepare for post-processing while traversing
498  void x_NotePubdescOrAnnotPubs( const CPub_equiv &pub_equiv );
500  const CPub_equiv &pub_equiv, int &muid, int &pmid );
501  void x_RememberPubOldLabel( CPub &pub );
503  void x_RememberSeqFeatCitPubs( CPub &pub );
504 
506  void AddMolInfo(CBioseq_set& set, const CMolInfo& mol);
507  void AddMolInfo(CBioseq& seq, const CMolInfo& mol);
508 
509 private:
510  void x_SortSeqDescs( CSeq_entry & seq_entry );
511 
513 
514  void x_RemoveDupBioSource( CBioseq_set & bioseq_set );
515  void x_RemoveDupBioSource(CSeq_entry& se, const CBioSource& src);
516 
517  void x_RemoveDupPubs(CSeq_descr & descr);
518 
520  void x_RemoveRedundantComment( CGene_ref& gene, CSeq_feat& seq_feat );
521  void x_ExceptTextEC(string& except_text);
522 
523  void x_tRNAEC(CSeq_feat& seq_feat);
524  void x_tRNACodonEC(CSeq_feat& seq_feat);
525  static bool x_IsCodonCorrect(int codon_index, int gcode, unsigned char aa);
526 
527  void x_RemoveEmptyUserObject( CSeq_descr & seq_descr );
528  void x_SetMolInfoTechFromGenBankBlock(CSeq_descr& seq_descr, CGB_block& block);
530  static bool x_CleanGenbankKeywords(CGB_block& blk, CMolInfo::TTech tech);
531  void x_CleanupGenbankBlock(CBioseq& seq);
533  void x_CleanupGenbankBlock( CSeq_descr & seq_descr );
534  void x_CleanupGenbankBlock(CGB_block& block, bool is_patent, CConstRef<CBioSource> biosrc, CMolInfo::TTech tech);
535  static bool x_CanRemoveGenbankBlockSource(const string& src, const CBioSource& biosrc);
536  void x_RescueMolInfo(CBioseq& seq);
537  void x_RemoveOldDescriptors( CSeq_descr & seq_descr );
538  void x_RemoveEmptyDescriptors(CSeq_descr& seq_descr);
539  void x_RemoveEmptyFeatures( CSeq_annot & seq_annot );
540  void x_RemoveEmptyFeatureTables( CBioseq & bioseq );
541  void x_RemoveEmptyFeatureTables( CBioseq_set & bioseq_set );
542  void x_MergeAdjacentFeatureTables( CBioseq & bioseq );
543  void x_MergeAdjacentFeatureTables(list< CRef< CSeq_annot > > & annot_list);
544  void x_MergeAdjacentFeatureTables(CBioseq_set & bioseq_set);
545  bool x_CleanEmptyFeature(CSeq_feat& feat);
546  bool x_ShouldRemoveEmptyFeature(const CSeq_feat& feat );
547  bool x_CleanEmptyGene(CGene_ref& gene);
548  bool x_ShouldRemoveEmptyGene(const CGene_ref& gene, const CSeq_feat& feat);
550  bool x_ShouldRemoveEmptyProt(const CProt_ref& prot );
551  void x_BondEC(CSeq_feat& feat);
552  static bool x_IsPubContentBad(const CPubdesc& pub, bool strict);
553  static bool x_IsPubContentBad(const CPub& pub);
554  static bool x_IsPubContentBad(const CId_pat& pat);
555  bool x_ShouldRemoveEmptyPub(const CPubdesc& pubdesc );
556  static bool x_IsGenbankBlockEmpty(const CGB_block& gbk);
557  void x_RemoveOldFeatures(CBioseq & bioseq);
558 
559  void x_BioseqSetEC( CBioseq_set & bioseq_set );
560  void x_ChangePopToPhy(CBioseq_set& bioseq_set);
563  void x_RemovePopPhyBioSource(CBioseq_set& set, const COrg_ref& org);
564  void x_RemovePopPhyBioSource(CBioseq& seq, const COrg_ref& org);
566  void x_BioseqSetNucProtEC( CBioseq_set & bioseq_set );
567  void x_BioseqSetGenBankEC(CBioseq_set & bioseq_set);
568  void x_RemoveNestedGenBankSet(CBioseq_set & bioseq_set);
569  void x_RemoveNestedNucProtSet(CBioseq_set & bioseq_set);
570  void x_MoveNpDBlinks(CBioseq_set& bioseq_set);
571  void x_MoveNpSrc(CBioseq_set& bioseq_set);
572  void x_MoveNpSrc(CRef<CSeqdesc>& srcdesc, CSeq_descr& descr);
574 
575  void x_MoveNpPub(CBioseq_set& bioseq_set);
576  void x_MoveNpPub(CBioseq_set& np_set, CSeq_descr& descr);
577  void x_MovePopPhyMutPub(CBioseq_set& bioseq_set);
578  void x_RemovePub(CSeq_entry& se, const CPubdesc& pub);
579 
580  bool x_IsDBLinkUserObj( const CSeqdesc & desc );
581 
582  bool x_FixMiscRNA(CSeq_feat& feat);
583  void x_ModernizeRNAFeat(CSeq_feat& feat);
584 
586 
587 protected:
588 
589  // variables used for the whole cleaning process
590 
591  /// If set, holds all the cleanup changes that have occurred so far
593  /// See CCleanup::EValidOptions
595  /// For simplicity, the same object manager is used wherever possible
597  /// For simplicity, the same CScope is used wherever possible
599  /// Set via m_Options when we're in gpipe cleanup mode
600  bool m_IsGpipe { false };
601  /// Set via m_Options to synchronize Cdregion genetic codes with BioSource
602  bool m_SyncGenCodes { false };
603 
604  void SetGlobalFlags(const CSeq_entry& se, bool reset = true);
605  void SetGlobalFlags(const CSeq_submit& ss);
606  void SetGlobalFlags(const CBioseq& bs, bool reset = true);
607  void SetGlobalFlags(const CBioseq_set& set, bool reset = true);
608  void ResetGlobalFlags() { m_StripSerial = true; m_IsEmblOrDdbj = false; }
609  /// set if references should NOT have serial numbers
610  /// under this entry.
611  bool m_StripSerial { true };
612  /// tells if any Seq-id on any Bioseq in the blob being cleaned is embl or ddbj.
613  bool m_IsEmblOrDdbj { false };
614 
615  bool m_KeepTopNestedSet { false };
616  bool m_KeepSingleSeqSet { false };
617 
618  friend class CAutogeneratedCleanup;
620 };
621 
622 
625 
626 #endif /* NEWCLEANUP__HPP */
User-defined methods of the data storage class.
@Affil.hpp User-defined methods of the data storage class.
Definition: Affil.hpp:56
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CAuthor –.
Definition: Author.hpp:59
This file was generated by application DATATOOL.
This file was generated by application DATATOOL.
CBioseq_Handle –.
CBioseq_set_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Date.hpp:53
Definition: Dbtag.hpp:53
CEMBL_block –.
Definition: EMBL_block.hpp:66
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
CImprint –.
Definition: Imprint.hpp:66
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
void x_ModernizeRNAFeat(CSeq_feat &feat)
void MoveDbxrefs(CSeq_feat &sf)
static void AddNcbiCleanupObject(CSeq_descr &descr)
static bool x_CleanGenbankKeywords(CGB_block &blk, CMolInfo::TTech tech)
void OrgmodBC(COrgMod &omd)
CRef< CCleanupChange > m_Changes
If set, holds all the cleanup changes that have occurred so far.
void AddMolInfo(CBioseq_set &set, const CMolInfo &mol)
void BasicCleanupBioseq(CBioseq &bs)
void x_RemoveOldDescriptors(CSeq_descr &seq_descr)
void ResynchProteinPartials(CSeq_feat &feat)
static bool x_IsBaseRange(const string &val)
void x_CopyGBBlockDivToOrgnameDiv(CSeq_entry &seq_entry)
void ProtRefEC(CProt_ref &pr)
std::vector< CRef< CPub > > TMuidPubContainer
Uint4 m_Options
See CCleanup::EValidOptions.
void x_RemoveOldFeatures(CBioseq &bioseq)
void x_ChangeTransposonToMobileElement(CGb_qual &gbq)
void x_BondEC(CSeq_feat &feat)
void RnaFeatBC(CRNA_ref &rr, CSeq_feat &sf)
void x_NotePubdescOrAnnotPubs_RecursionHelper(const CPub_equiv &pub_equiv, int &muid, int &pmid)
void BasicCleanupSeqAnnot(CSeq_annot &sa)
void ExtendedCleanupSeqEntry(CSeq_entry &se)
Extended Cleanup methods.
void x_PostSeqFeat(CSeq_feat &seq_feat)
void SubSourceListBC(CBioSource &biosrc)
bool x_FixMiscRNA(CSeq_feat &feat)
void x_RemoveNestedNucProtSet(CBioseq_set &bioseq_set)
void x_AddNonCopiedQual(vector< CRef< CGb_qual > > &out_quals, const char *qual, const char *val)
void x_AddPartialToProteinTitle(CBioseq &bioseq)
void x_CleanupStringJunkMarkChanged(std::string &str)
bool x_ShouldRemoveEmptyFeature(const CSeq_feat &feat)
void x_PCRPrimerSetBC(CPCRPrimerSet &primer_set)
void BioSourceEC(CBioSource &biosrc)
EAction x_SeqFeatRnaGBQualBC(CSeq_feat &feat, CRNA_ref &rna, CGb_qual &gb_qual)
EAction x_ProtGBQualBC(CProt_ref &prot, const CGb_qual &gb_qual, EGBQualOpt opt)
void x_RemovePub(CSeq_entry &se, const CPubdesc &pub)
void RnarefGenBC(CRNA_ref &rr)
void PCRReactionSetBC(CPCRReactionSet &pcr_reaction_set)
void x_OrgnameModBC(COrgName &orgname, const string &org_ref_common)
static bool x_IsPubContentBad(const CPubdesc &pub, bool strict)
void BasicCleanupSeqFeat(CSeq_feat &sf)
void SeqLocMixBC(CSeq_loc_mix &loc_mix)
void x_ExtendProteinFeatureOnProteinSeq(CBioseq &seq)
void ProtSeqBC(CBioseq &bs)
void ExtendedCleanupSeqAnnot(CSeq_annot &sa)
void x_RRNANameBC(string &name)
void ExtendedCleanupSeqSubmit(CSeq_submit &ss)
void x_PostProcessing(void)
void CreateMissingMolInfo(CBioseq &seq)
void ExtendedCleanup(CSeq_entry_Handle &seh)
void BasicCleanupSubmitblock(CSubmit_block &sb)
void x_RemovePopPhyBioSource(CBioseq_set &set)
void x_SeqFeatTRNABC(CSeq_feat &feat, CTrna_ext &tRNA)
TSeqFeatCitPubContainer m_SeqFeatCitPubContainer
void x_BothStrandBC(CSeq_loc &loc)
void x_RemoveDupBioSource(CBioseq_set &bioseq_set)
void x_TranslateITSNameAndFlag(string &in_out_name)
static bool x_IsGenbankBlockEmpty(const CGB_block &gbk)
std::map< int, int > TMuidToPmidMap
void ProtNameBC(std::string &str)
void x_RememberSeqFeatCitPubs(CPub &pub)
bool x_CleanupRptUnit(CGb_qual &gbq)
void x_PostOrgRef(COrg_ref &org)
void x_ClearEmptyDescr(CBioseq_set &bioseq_set)
void AddProteinTitles(CBioseq &seq)
bool x_CleanEmptyGene(CGene_ref &gene)
bool x_IsDBLinkUserObj(const CSeqdesc &desc)
void x_PostBiosource(CBioSource &biosrc)
void MoveCitationQuals(CBioseq &seq)
void x_FixUnsetMolFromBiomol(CMolInfo &molinfo, CBioseq &bioseq)
void BasicCleanupBioseqHandle(CBioseq_Handle &bsh)
void x_BioseqSetNucProtEC(CBioseq_set &bioseq_set)
void BiosourceFeatBC(CBioSource &biosrc, CSeq_feat &seqfeat)
void BasicCleanupBioSource(CBioSource &src)
void x_AddToComment(CSeq_feat &feat, const string &comment)
void x_CleanupGenbankBlock(CBioseq &seq)
bool x_ShouldRemoveEmptyProt(const CProt_ref &prot)
void x_ExceptTextEC(string &except_text)
EAction x_SeqFeatCDSGBQualBC(CSeq_feat &feat, CCdregion &cds, const CGb_qual &gb_qual)
void x_ExtendSingleGeneOnMrna(CBioseq &seq)
void x_FixStructuredCommentKeywords(CSeq_descr &descr)
void GenerefBC(CGene_ref &gr)
void x_BioseqSetEC(CBioseq_set &bioseq_set)
void x_RemoveEmptyFeatureTables(CBioseq &bioseq)
void x_RemoveSingleStrand(CBioseq &bioseq)
void x_MoveNpDBlinks(CBioseq_set &bioseq_set)
void CdregionFeatBC(CCdregion &cds, CSeq_feat &seqfeat)
static bool IsSyntheticConstruct(const CBioSource &src)
void x_MoveCDSFromNucAnnotToSetAnnot(CBioseq_set &set)
void x_OrgModBC(COrgMod &orgmod)
void x_CleanupAndRepairInference(string &inference)
void x_MoveNpPub(CBioseq_set &bioseq_set)
void ConvertSeqLocWholeToInt(CSeq_loc &loc)
EAction GBQualSeqFeatBC(CGb_qual &gbq, CSeq_feat &seqfeat)
void x_MoveCdregionXrefsToProt(CCdregion &cds, CSeq_feat &seqfeat)
void x_BioseqSetGenBankEC(CBioseq_set &bioseq_set)
void x_SetPartialsForProtein(CBioseq &prot, bool partial5, bool partial3, bool feat_partial)
static bool IsInternalTranscribedSpacer(const string &name)
EAction x_HandleTrnaProductGBQual(CSeq_feat &feat, CRNA_ref &rna, const string &product)
void x_MergeDupBioSources(CSeq_descr &seq_descr)
void x_ConvertGoQualifiers(CSeq_feat &sf)
TMuidPubContainer m_MuidPubContainer
void DeltaExtBC(CDelta_ext &delta_ext, CSeq_inst &seq_inst)
void x_RemoveFlankingQuotes(string &val)
void x_CleanupOrgModNoteEC(COrg_ref &org)
void BasicCleanupBioseqSet(CBioseq_set &bss)
void x_CollapseSet(CBioseq_set &set)
static bool x_IsDotBaseRange(const string &val)
bool x_ShouldRemoveEmptyPub(const CPubdesc &pubdesc)
bool x_CleanupRptUnitRange(string &val)
void x_SeqIntervalBC(CSeq_interval &seq_interval)
void x_ExpandCombinedQuals(CSeq_feat::TQual &quals)
void PubSetBC(CPub_set &pub_set)
void x_CleanupOrgModAndSubSourceOther(COrgName &orgname, CBioSource &biosrc)
void Except_textBC(string &except_text)
bool m_IsGpipe
Set via m_Options when we're in gpipe cleanup mode.
void x_RemoveDupPubs(CSeq_descr &descr)
void EnteringEntry(CSeq_entry &se)
void SeqsetBC(CBioseq_set &bss)
void BasicCleanupBioseqSetHandle(CBioseq_set_Handle &bssh)
void GeneFeatBC(CGene_ref &gr, CSeq_feat &sf)
bool x_ShouldRemoveEmptyGene(const CGene_ref &gene, const CSeq_feat &feat)
void SetScope(CScope &scope)
Main methods.
EAction x_GeneGBQualBC(CGene_ref &gene, const CGb_qual &gb_qual)
void ExtendedCleanupSeqEntryHandle(CSeq_entry_Handle &seh)
CNewCleanup_imp & operator=(const CNewCleanup_imp &)
bool x_FixParentPartials(const CSeq_feat &sf, CSeq_feat &parent)
void PostProtFeatfBC(CProt_ref &pr)
void x_CompressStringSpacesMarkChanged(std::string &str)
void SetGeneticCode(CBioseq &bs)
void BasicCleanupSeqEntryHandle(CSeq_entry_Handle &seh)
void x_RemovePopPhyMolInfo(CBioseq_set &set)
void ImpFeatBC(CSeq_feat &sf)
void BasicCleanupSeqSubmit(CSeq_submit &ss)
EAction x_HandleStandardNameRnaGBQual(CSeq_feat &feat, CRNA_ref &rna, const string &standard_name)
void SubmitblockBC(CSubmit_block &sb)
void x_CleanSeqFeatQuals(CSeq_feat &sf)
bool m_IsEmblOrDdbj
tells if any Seq-id on any Bioseq in the blob being cleaned is embl or ddbj.
static bool ShouldRemoveAnnot(const CSeq_annot &annot)
CNewCleanup_imp(CRef< CCleanupChange > changes, Uint4 options=0)
void x_RemoveUnseenTitles(CSeq_descr &seq_descr)
bool x_CleanEmptyFeature(CSeq_feat &feat)
void UserObjectBC(CUser_object &user_object)
void x_tRNACodonEC(CSeq_feat &seq_feat)
void x_SetMolInfoTechFromGenBankBlock(CSeq_descr &seq_descr, CGB_block &block)
void x_ChangeInsertionSeqToMobileElement(CGb_qual &gbq)
static bool x_IsHyphenBaseRange(const string &val)
virtual ~CNewCleanup_imp()
void x_DateStdBC(CDate_std &date)
void x_RemoveEmptyFeatures(CSeq_annot &seq_annot)
static bool TranslateITSName(string &in_out_name)
void x_MoveNpSrc(CBioseq_set &bioseq_set)
void x_ConvertDoubleQuotesMarkChanged(std::string &str)
void ProtActivityBC(std::string &str)
void SiteFeatBC(const CSeqFeatData::ESite &site, CSeq_feat &sf)
void x_CleanupECNumberListEC(CProt_ref::TEc &ec_num_list)
void x_MendSatelliteQualifier(string &val)
string x_ExtractSatelliteFromComment(string &comment)
void MoveStandardName(CSeq_feat &sf)
void RnarefBC(CRNA_ref &rr)
void BiosourceBC(CBioSource &bsc)
void CdRegionEC(CSeq_feat &sf)
void OrgnameBC(COrgName &onm, COrg_ref &org_ref)
void PubdescBC(CPubdesc &pub)
void x_CleanupOldName(COrg_ref &org)
void GBblockBC(CGB_block &gbk)
void GBQualBC(CGb_qual &gbq)
void x_SubSourceBC(CSubSource &subsrc)
void x_RemoveEmptyUserObject(CSeq_descr &seq_descr)
void ResynchPeptidePartials(CBioseq &seq)
bool x_InGpsGenomic(const CSeq_feat &seqfeat)
void x_RemoveProtDescThatDupsProtName(CProt_ref &prot)
bool x_CleanEmptyProt(CProt_ref &prot)
void BasicCleanup(CPubdesc &pd, bool strip_serial)
void x_MovePopPhyMutPub(CBioseq_set &bioseq_set)
std::vector< CRef< CPub > > TSeqFeatCitPubContainer
void x_ExtendedCleanupExtra(CSeq_entry_Handle seh)
void BasicCleanupSeqAnnotHandle(CSeq_annot_Handle &sah)
void DbtagBC(CDbtag &dbt)
void x_RemoveSpacesBetweenTildesMarkChanged(std::string &str)
void x_MovedNamedValuesInStrain(COrgName &orgname)
void x_SortSeqDescs(CSeq_entry &seq_entry)
void x_RemoveEmptyDescriptors(CSeq_descr &seq_descr)
void x_AddReplaceQual(CSeq_feat &feat, const string &str)
CNewCleanup_imp(const CNewCleanup_imp &)
void ResetGlobalFlags()
void x_NotePubdescOrAnnotPubs(const CPub_equiv &pub_equiv)
static bool x_IsCommentRedundantWithEC(const CSeq_feat &seqfeat, CScope &scope)
static bool x_CanRemoveGenbankBlockSource(const string &src, const CBioSource &biosrc)
void KeepLatestDateDesc(CSeq_descr &seq_descr)
void x_TruncateSpacesMarkChanged(std::string &str)
void SetGlobalFlags(const CSeq_entry &se, bool reset=true)
void x_RememberMuidThatMightBeConvertibleToPmid(CPub &pub)
void x_RemoveRedundantComment(CGene_ref &gene, CSeq_feat &seq_feat)
CRef< CObjectManager > m_Objmgr
For simplicity, the same object manager is used wherever possible.
void x_ExtendFeatureToCoverSequence(CSeq_feat_Handle fh, const CBioseq &seq)
void SeqfeatBC(CSeq_feat &sf)
TMuidToPmidMap m_MuidToPmidMap
void x_SplitDbtag(CDbtag &dbt, vector< CRef< CDbtag > > &out_new_dbtags)
TPubToNewPubLabelMap m_PubToNewPubLabelMap
std::map< CRef< CPub >, string > TPubToNewPubLabelMap
void ProtFeatfBC(CProt_ref &pr, CSeq_feat &sf)
TOldLabelToPubMap m_OldLabelToPubMap
void x_AuthListBCWithFixInitials(CAuth_list &al)
void x_RememberPubOldLabel(CPub &pub)
void BasicCleanupSeqFeatHandle(CSeq_feat_Handle &sfh)
void x_tRNAEC(CSeq_feat &seq_feat)
bool x_CompressSpaces(string &str)
void ChangeMade(CCleanupChange::EChanges e)
TPubdescCitGenLabelVec m_PubdescCitGenLabelVec
void x_MoveSeqfeatOrgToSourceOrg(CSeq_feat &seqfeat)
void x_CleanupECNumberList(CProt_ref::TEc &ec_num_list)
void x_CleanupStringMarkChanged(std::string &str)
static bool x_IsCodonCorrect(int codon_index, int gcode, unsigned char aa)
void ProtrefBC(CProt_ref &pr)
void GBblockOriginBC(string &str)
void SeqLocBC(CSeq_loc &loc)
bool m_SyncGenCodes
Set via m_Options to synchronize Cdregion genetic codes with BioSource.
void x_CleanupECNumber(string &ec_num)
void x_MoveNPTitle(CBioseq_set &set)
void OrgrefModBC(string &str)
void x_GBQualToOrgRef(COrg_ref &org, CSeq_feat &seqfeat)
std::vector< string > TPubdescCitGenLabelVec
void x_AddNcbiCleanupObject(CSeq_entry &seq_entry)
void x_MergeAdjacentFeatureTables(CBioseq &bioseq)
void FixUnsetMolFromBiomol(CMolInfo::TBiomol biomol, CBioseq &bioseq)
void x_RescueMolInfo(CBioseq &seq)
void BasicCleanupSeqEntry(CSeq_entry &se)
Basic Cleanup methods.
void OrgrefBC(COrg_ref &org)
void EMBLblockBC(CEMBL_block &emb)
void x_TrimInternalSemicolonsMarkChanged(std::string &str)
void x_DecodeXMLMarkChanged(std::string &str)
void x_SingleSeqSetToSeq(CBioseq_set &set)
void LeavingEntry(CSeq_entry &se)
std::multimap< string, CRef< CPub > > TOldLabelToPubMap
void MolInfoBC(CMolInfo &molinfo)
void SeqIdBC(CSeq_id &seq_id)
CRef< CScope > m_Scope
For simplicity, the same CScope is used wherever possible.
void RemoveBadProteinTitle(CBioseq &seq)
void x_ModernizePCRPrimers(CBioSource &biosrc)
void x_ChangePopToPhy(CBioseq_set &bioseq_set)
void x_FixUpEllipsis(string &str)
void x_StripSpacesMarkChanged(std::string &str)
void x_AddEnvSamplOrMetagenomic(CBioSource &biosrc)
void x_MoveSeqdescOrgToSourceOrg(CSeqdesc &seqdesc)
static const int NCBI_CLEANUP_VERSION
bool m_StripSerial
set if references should NOT have serial numbers under this entry.
void x_RemoveNestedGenBankSet(CBioseq_set &bioseq_set)
CObjectManager –.
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
CPCRPrimerSet –.
CPCRReactionSet –.
Definition: Pub.hpp:56
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSubmit_block –.
Definition: set.hpp:45
string
Definition: cgiapp.hpp:687
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
list< string > TEc
Definition: Prot_ref_.hpp:110
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
ESERV_Site site
static const char * prefix[]
Definition: pcregrep.c:405
static const char * str(char *buf, int n)
Definition: stats.c:84
Modified on Thu Feb 29 12:20:40 2024 by modify_doxy.py rev. 669887