NCBI C++ ToolKit
autodef_feature_clause_base.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE_BASE__HPP
2 #define OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE_BASE__HPP
3 
4 /* $Id: autodef_feature_clause_base.hpp 87390 2019-08-26 14:51:57Z bollin $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Colleen Bollin
30 *
31 * File Description:
32 * Creates unique definition lines for sequences in a set using organism
33 * descriptions and feature clauses.
34 */
35 
36 #include <corelib/ncbistd.hpp>
38 
40 
43 
45 
46 
48 {
49 public:
50  typedef vector<CRef<CAutoDefFeatureClause_Base > > TClauseList;
51 
53  virtual ~CAutoDefFeatureClause_Base();
54 
55  virtual void AddSubclause (CRef<CAutoDefFeatureClause_Base> subclause);
56 
57  string PrintClause(bool print_typeword, bool typeword_is_plural, bool suppress_allele);
58 
59  virtual CSeqFeatData::ESubtype GetMainFeatureSubtype() const;
60  size_t GetNumSubclauses() { return m_ClauseList.size(); }
61  virtual void Label(bool suppress_allele);
62  virtual bool AddmRNA (CAutoDefFeatureClause_Base *mRNAClause);
63  virtual bool AddGene (CAutoDefFeatureClause_Base *gene_clause, bool suppress_allele);
64 
65  virtual sequence::ECompare CompareLocation(const CSeq_loc& loc) const;
66  virtual void AddToOtherLocation(CRef<CSeq_loc> loc);
67  virtual void AddToLocation(CRef<CSeq_loc> loc, bool also_set_partials = true);
68  virtual bool SameStrand(const CSeq_loc& loc) const;
69  virtual bool IsPartial() const { return false; }
70  virtual bool IsMobileElement() const { return false; }
71  virtual bool IsInsertionSequence() const { return false; }
72  virtual bool IsControlRegion() const { return false; }
73  virtual bool IsEndogenousVirusSourceFeature() const { return false; }
74  virtual bool IsGeneCluster() const { return false; }
75  virtual bool IsNoncodingProductFeat() const { return false; }
76  virtual bool IsSatelliteClause() const { return false; }
77  virtual bool IsExonList() const { return false; }
78 
79  static bool IsuORF(const string& product);
80 
81  virtual CAutoDefFeatureClause_Base *FindBestParentClause(CAutoDefFeatureClause_Base * subclause, bool gene_cluster_opp_strand);
82 
83  void GroupClauses(bool gene_cluster_opp_strand);
84  void GroupAltSplicedExons(CBioseq_Handle bh);
85 
86  virtual CRef<CSeq_loc> GetLocation() const;
87 
88  string ListClauses(bool allow_semicolons, bool suppress_final_and, bool suppress_allele);
89 
90  bool IsGeneMentioned(CAutoDefFeatureClause_Base *gene_clause) const;
91  bool IsUnattachedGene() const;
92  bool IsTypewordFirst() const { return m_ShowTypewordFirst; }
93  bool DisplayAlleleName () const;
94 
95  const string& GetInterval() const { return m_Interval; }
96  const string& GetTypeword() const { return m_Typeword; }
97  const string& GetDescription() const { return m_Description; }
98  const string& GetProductName() const { return m_ProductName; }
99  const string& GetGeneName() const { return m_GeneName; }
100  const string& GetAlleleName() const { return m_AlleleName; }
101  virtual void SetProductName(string product_name);
102  bool GetGeneIsPseudo() const { return m_GeneIsPseudo; }
103  bool NeedPlural() const { return m_MakePlural; }
104  bool IsAltSpliced() const { return m_IsAltSpliced; }
105  void SetAltSpliced(string splice_name);
106  bool IsMarkedForDeletion() const { return m_DeleteMe; }
107  void MarkForDeletion() { m_DeleteMe = true; }
108  void SetMakePlural() { m_MakePlural = true; }
109  bool HasmRNA() const { return m_HasmRNA; }
110  void SetInfoOnly (bool info_only) { m_ClauseInfoOnly = info_only; }
111  void PluralizeInterval();
112  void PluralizeDescription();
113 
114  void ShowSubclauses();
115 
116  // Grouping functions
117  void RemoveDeletedSubclauses();
118 
119  void GroupmRNAs(bool suppress_allele);
120  void GroupGenes(bool suppress_allele);
121  void GroupConsecutiveExons(CBioseq_Handle bh);
122  void GroupSegmentedCDSs(bool suppress_allele);
123  void RemoveGenesMentionedElsewhere();
124  void RemoveuORFs();
125  void RemoveOptionalMobileElements();
126  void ConsolidateRepeatedClauses(bool suppress_allele);
127  void FindAltSplices(bool suppress_allele);
128  void TransferSubclauses(TClauseList &other_clause_list);
129  void CountUnknownGenes();
130  void ExpandExonLists();
131  virtual void ReverseCDSClauseLists();
132 
133  virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base * /*parent_clause*/) const { return false; }
134  virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base * /*parent_clause*/, bool /*gene_cluster_opp_strand*/) const { return false; }
135 
136  virtual void SuppressMobileElementAndInsertionSequenceSubfeatures();
137 
138  void SuppressSubfeatures() { m_SuppressSubfeatures = true; }
139 
140  string FindGeneProductName(CAutoDefFeatureClause_Base *gene_clause);
141  void AssignGeneProductNames(CAutoDefFeatureClause_Base *main_clause, bool suppress_allele);
142 
143  void RemoveFeaturesByType(unsigned int feature_type, bool except_promoter = false);
144  bool IsFeatureTypeLonely(unsigned int feature_type) const;
145  void RemoveFeaturesInmRNAsByType(unsigned int feature_type, bool except_promoter = false);
146  void RemoveFeaturesUnderType(unsigned int feature_type);
147  void RemoveFeaturesInLocation(const CSeq_loc& loc);
148 
149  virtual bool ShouldRemoveExons() const { return false; }
150  virtual bool IsExonWithNumber() const { return false; }
151 
152  void RemoveUnwantedExons();
153 
154  virtual bool IsBioseqPrecursorRNA() const;
155  void RemoveBioseqPrecursorRNAs();
156 
157  virtual bool IsPromoter() const { return false; }
158 
159  void Consolidate(CAutoDefFeatureClause_Base& other, bool suppress_allele);
161 
162  static vector<string> GetMiscRNAElements(const string& product);
163  static vector<string> GetTrnaIntergenicSpacerClausePhrases(const string& comment);
164  static bool IsValidFeatureClausePhrase(const string& phrase);
165  static vector<string> GetFeatureClausePhrases(string comment);
166  static CRef<CAutoDefFeatureClause> ClauseFromPhrase(const string& phrase, CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, bool first, bool last, const CAutoDefOptions& opts);
167 
168 
169 protected:
172 
173  string m_GeneName;
174  string m_AlleleName;
176  string m_Interval;
178  bool m_HasmRNA;
179  bool m_HasGene;
185  string m_Typeword;
191 
193 
195 
196  size_t x_LastIntervalChangeBeforeEnd () const;
197  bool x_OkToConsolidate (unsigned int clause1, unsigned int clause2) const;
198  bool x_OkToConsolidate(const CAutoDefFeatureClause_Base& other) const;
199  bool x_MeetAltSpliceRules (size_t clause1, size_t clause2, string &splice_name) const;
200 
201  void x_RemoveNullClauses();
202 
203  // for miscRNA elements
204  typedef enum {
205  eMiscRnaWordType_InternalSpacer = 0,
211  eMiscRnaWordType_Unrecognized
212  } ERnaMiscWord;
213  static bool x_AddOneMiscWordElement(const string& phrase, vector<string>& elements);
214  static ERnaMiscWord x_GetRnaMiscWordType(const string& phrase);
215  static const string& x_GetRnaMiscWord(ERnaMiscWord word_type);
216 
217  // for tRNA/intergenic spacer elements
218  typedef enum {
219  eTRNAIntergenicSpacerType_Gene = 0,
220  eTRNAIntergenicSpacerType_Spacer = 1,
221  eTRNAIntergenicSpacerType_Unrecognized
222  } ETRNAIntergenicSpacerType;
223  static ETRNAIntergenicSpacerType x_GetTRNAIntergenicSpacerType(const string& phrase);
224  static string x_tRNAGeneFromProduct(const string& product);
225  static bool x_AddOnetRNAIntergenicSpacerElement(const string& phrase, vector<string>& elements);
226 };
227 
228 
230 {
231 public:
234 
235  virtual void Label(bool suppress_allele);
236  virtual bool IsRecognizedFeature() const { return true; }
237 };
238 
239 
241 {
242 public:
244 
245  virtual void AddSubclause (CRef<CAutoDefFeatureClause_Base> subclause);
246  virtual void Label(bool suppress_allele);
247  virtual bool IsRecognizedFeature() const { return true; }
248  virtual bool IsExonList() const { return true; }
249  virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *parent_clause, bool gene_cluster_opp_strand) const;
250  virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base *parent_clause) const;
251  virtual CSeqFeatData::ESubtype GetMainFeatureSubtype() const;
252  void SetSuppressFinalAnd(bool suppress) { m_SuppressFinalAnd = suppress; }
253 private:
254  CRef<CSeq_loc> SeqLocIntersect (CRef<CSeq_loc> loc1, CRef<CSeq_loc> loc2);
255 
259 };
260 
263 
264 #endif //OBJMGR_UTIL___AUTODEF_FEATURE_CLAUSE_BASE__HPP
virtual bool IsRecognizedFeature() const
virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base *) const
void TakeSubclauses(CAutoDefFeatureClause_Base &other)
vector< CRef< CAutoDefFeatureClause_Base > > TClauseList
virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *, bool) const
virtual bool IsEndogenousVirusSourceFeature() const
CBioseq_Handle –.
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
ECompare
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJEDIT_EXPORT
Definition: ncbi_export.h:1291
Modified on Wed Apr 17 13:10:23 2024 by modify_doxy.py rev. 669887