NCBI C++ ToolKit
create_defline.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJMGR_UTIL___CREATE_DEFLINE__HPP
2 #define OBJMGR_UTIL___CREATE_DEFLINE__HPP
3 
4 /*
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Jonathan Kans, Aaron Ucko
30 *
31 * ===========================================================================
32 */
33 
34 /// @file create_defline.hpp
35 /// API (CDeflineGenerator) for computing sequences' titles ("definitions").
36 
37 #include <util/strsearch.hpp>
38 #include <objects/seq/MolInfo.hpp>
40 #include <objmgr/mapped_feat.hpp>
41 #include <objmgr/util/feature.hpp>
42 #include <objmgr/util/indexer.hpp>
43 
44 /** @addtogroup ObjUtilSequence
45  *
46  * @{
47  */
48 
51 
52 // Forward declarations
53 class CScope;
54 class CBioseq_Handle;
55 
56 BEGIN_SCOPE(sequence)
57 
58 /// Class for computing sequences' titles ("definitions").
59 ///
60 /// PREFERRED USAGE:
61 ///
62 /// CDeflineGenerator gen(tseh);
63 ///
64 /// const string& title = gen.GenerateDefline(bsh, flags);
65 ///
66 /// Same CDeflineGenerator should be used for all titles within nuc-prot set
67 /// blob, since it tracks presence or absence of biosource features to speed
68 /// up protein title generation
69 
71 {
72 public:
73  /// Constructor
74  CDeflineGenerator (void);
75 
76  /// Constructor
78 
79  /// Destructor
80  ~CDeflineGenerator (void);
81 
82  /// User-settable flags for tuning behavior
83  enum EUserFlags {
84  fIgnoreExisting = 1 << 0, ///< Generate fresh titles unconditionally.
85  fAllProteinNames = 1 << 1, ///< List all relevant proteins, not just one.
86  fLocalAnnotsOnly = 1 << 2, ///< Never use related sequences' annotations.
87  /// Refrain from anything that could add substantial overhead.
88  fNoExpensiveOps = fLocalAnnotsOnly,
89  fGpipeMode = 1 << 3, ///< Use GPipe defaults.
90  fOmitTaxonomicName = 1 << 4, ///< Do not add organism suffix to proteins.
91  fDevMode = 1 << 5, ///< Development mode for testing new features.
92  fShowModifiers = 1 << 6, ///< Show key-value pair modifiers (e.g. "[organism=Homo sapiens]")
93  fUseAutoDef = 1 << 7, ///< Run auto-def for nucleotides if user object is present
94  fFastaFormat = 1 << 8, ///< Generate FASTA defline
95  fDoNotUseAutoDef = 1 << 9 ///< Disable internal call to auto-def
96  };
97  typedef int TUserFlags; ///< Binary "OR" of EUserFlags
98 
99  /// Main method
100  string GenerateDefline (
101  const CBioseq_Handle& bsh,
102  TUserFlags flags = 0
103  );
104 
105  /// Main method
106  string GenerateDefline (
107  const CBioseq_Handle& bsh,
108  CSeqEntryIndex& idx,
109  TUserFlags flags = 0
110  );
111 
112  /// Main method
113  string GenerateDefline (
114  const CBioseq& bioseq,
115  CScope& scope,
116  CSeqEntryIndex& idx,
117  TUserFlags flags = 0
118  );
119 
120  /// Main method
121  string GenerateDefline (
122  const CBioseq_Handle& bsh,
123  feature::CFeatTree& ftree,
124  TUserFlags flags = 0
125  );
126 
127  /// Main method
128  string GenerateDefline (
129  const CBioseq& bioseq,
130  CScope& scope,
131  TUserFlags flags = 0
132  );
133 
134  /// Main method
135  string GenerateDefline (
136  const CBioseq& bioseq,
137  CScope& scope,
138  feature::CFeatTree& ftree,
139  TUserFlags flags = 0
140  );
141 
142  string x_GetModifiers(const CBioseq_Handle & handle);
143 
144 public:
145  bool UsePDBCompoundForDefline (void) const { return m_UsePDBCompoundForDefline; }
146 
147 private:
148  // Prohibit copy constructor & assignment operator
151 
152 private:
153  /// internal methods
154 
155  void x_Init (void);
156 
157  void x_SetFlags (
158  const CBioseq_Handle& bsh,
160  );
161  void x_SetFlagsIdx (
162  const CBioseq_Handle& bsh,
164  );
165  void x_SetBioSrc (
166  const CBioseq_Handle& bsh
167  );
168  void x_SetBioSrcIdx (
169  const CBioseq_Handle& bsh
170  );
171 
172  const char* x_OrganelleName(
173  CBioSource::TGenome genome
174  ) const;
175 
176  bool x_CDShasLowQualityException (
177  const CSeq_feat& sft
178  );
179 
180  void x_DescribeClones (
181  vector<CTempString>& desc,
182  string& buf
183  );
184  CConstRef<CSeq_feat> x_GetLongestProtein (
185  const CBioseq_Handle& bsh
186  );
187  CConstRef<CGene_ref> x_GetGeneRefViaCDS (
188  const CMappedFeat& mapped_cds
189  );
190 
191  void x_SetTitleFromBioSrc (void);
192  void x_SetTitleFromNC (void);
193  void x_SetTitleFromNM (
194  const CBioseq_Handle& bsh
195  );
196  void x_SetTitleFromNR (
197  const CBioseq_Handle& bsh
198  );
199  void x_SetTitleFromPatent (void);
200  void x_SetTitleFromPDB (void);
201  void x_SetTitleFromGPipe (void);
202  void x_SetTitleFromProtein (
203  const CBioseq_Handle& bsh
204  );
205  void x_SetTitleFromProteinIdx (
206  const CBioseq_Handle& bsh
207  );
208  void x_SetTitleFromSegSeq (
209  const CBioseq_Handle& bsh
210  );
211  void x_SetTitleFromWGS (void);
212  void x_SetTitleFromMap (void);
213 
214  void x_SetPrefix (
215  string& prefix,
216  const CBioseq_Handle& bsh
217  );
218  void x_SetSuffix (
219  string& suffix,
220  const CBioseq_Handle& bsh,
221  bool appendComplete
222  );
223 
224  void x_AdjustProteinTitleSuffix (
225  const CBioseq_Handle& bsh
226  );
227  void x_AdjustProteinTitleSuffixIdx (
228  const CBioseq_Handle& bsh
229  );
230 
231  bool x_IsComplete() const;
232 
233 private:
234  /// index with feature tree for each Bioseq
236 
237  /// internal feature tree for parent mapping
242 
243  /// ignore existing title is forced for certain types
249  bool m_DevMode;
250 
251  /// seq-inst fields
252  bool m_IsNA;
253  bool m_IsAA;
256 
257  bool m_IsSeg;
258  bool m_IsDelta;
260  bool m_IsMap;
261 
262  /// seq-id fields
263  bool m_IsNC;
264  bool m_IsNM;
265  bool m_IsNR;
266  bool m_IsNZ;
268  bool m_IsPDB;
269  bool m_IsWP;
274 
275  string m_MainTitle;
276  string m_GeneralStr;
280 
282 
284  string m_PDBChainID;
285 
286  /// molinfo fields
290 
291  bool m_HTGTech;
293  bool m_IsTLS;
294  bool m_IsTSA;
295  bool m_IsWGS;
297 
299 
300  /// genbank or embl block keyword fields
304  bool m_TPAExp;
305  bool m_TPAInf;
308 
309  /// pdb block fields
311 
312  /// biosource fields
321 
323 
327 
328  /// subsource fields
336 
337  /// orgmod fields
345 
346  /// user object fields
352 
353  /// comment fields
356 
357  /// map fields
358  string m_rEnzyme;
359 
361 
363  /// exception fields
364 
365  /// (Careful: CTextFsm has no virtual destructor)
366  class CLowQualityTextFsm : public CTextFsm<int> {
367  public:
368  CLowQualityTextFsm(void);
369  };
370 
372 };
373 
374 
375 END_SCOPE(sequence)
378 
379 /* @} */
380 
381 #endif /* OBJMGR_UTIL___CREATE_DEFLINE__HPP */
CBioseq_Handle –.
Class for computing sequences' titles ("definitions").
CMappedFeat –.
Definition: mapped_feat.hpp:59
CSafeStatic<>::
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
static uch flags
CRef< feature::CFeatTree > m_Feat_Tree
CTempString m_Chromosome
subsource fields
CTempString m_SpecimenVoucher
bool m_Reconstruct
ignore existing title is forced for certain types
int TUserFlags
Binary "OR" of EUserFlags.
CTempString m_PDBCompound
pdb block fields
bool m_HTGSCancelled
genbank or embl block keyword fields
CTempString m_UnreviewedPrefix
CTempString m_LinkageGroup
bool m_IsNA
seq-inst fields
CMolInfo::TTech m_MITech
CTempString m_Substrain
CSeq_inst::TLength m_Length
string m_rEnzyme
map fields
CTempString m_Organelle
CTempString m_Cultivar
CTempString m_UnverifiedPrefix
CBioSource::TGenome m_Genome
CTempString m_Comment
comment fields
bool m_IsNC
seq-id fields
CConstRef< CBioSource > m_Source
biosource fields
CTempString m_TargetedLocus
bool UsePDBCompoundForDefline(void) const
static CSafeStatic< CLowQualityTextFsm > ms_p_Low_Quality_Fsa
bool m_IsUnverified
user object fields
CSeq_entry_Handle m_TopSEH
internal feature tree for parent mapping
CMolInfo::TCompleteness m_MICompleteness
CDeflineGenerator(const CDeflineGenerator &)
CTempString m_Breed
orgmod fields
CRef< CSeqEntryIndex > m_Idx
index with feature tree for each Bioseq
CTempString m_MetaGenomeSource
CSeq_inst::TTopology m_Topology
EUserFlags
User-settable flags for tuning behavior.
CMolInfo::TBiomol m_MIBiomol
molinfo fields
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJUTIL_EXPORT
Definition: ncbi_export.h:1339
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
TSeqPos TLength
Definition: Seq_inst_.hpp:147
static const char * x_OrganelleName(TBIOSOURCE_GENOME genome, bool has_plasmid, bool virus_or_phage, bool wgs_suffix)
Definition: indexer.cpp:1176
char * buf
static const char * suffix[]
Definition: pcregrep.c:408
static const char * prefix[]
Definition: pcregrep.c:405
String search utilities.
Modified on Sat Dec 09 04:48:43 2023 by modify_doxy.py rev. 669887