NCBI C++ ToolKit
GC_Assembly.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: GC_Assembly.hpp 99604 2023-04-24 15:24:30Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  */
27 
28 /// @file GC_Assembly.hpp
29 /// User-defined methods of the data storage class.
30 ///
31 /// This file was originally generated by application DATATOOL
32 /// using the following specifications:
33 /// 'genome_collection.asn'.
34 ///
35 /// New methods or data members can be added to it if needed.
36 /// See also: GC_Assembly_.hpp
37 
38 
39 #ifndef INTERNAL_GPIPE_OBJECTS_GENOMECOLL_GC_ASSEMBLY_HPP
40 #define INTERNAL_GPIPE_OBJECTS_GENOMECOLL_GC_ASSEMBLY_HPP
41 
42 #include <corelib/ncbimtx.hpp>
43 
44 // generated includes
46 
47 // generated classes
52 
54 
55 BEGIN_objects_SCOPE // namespace ncbi::objects::
56 
57 class CGC_AssemblyDesc;
58 class CGC_Sequence;
59 class CGC_Replicon;
60 
61 /////////////////////////////////////////////////////////////////////////////
63 {
65 
66 public:
67  typedef list< CConstRef<CGC_Sequence> > TSequenceList;
68  typedef list< CConstRef<CGC_AssemblyUnit> > TAssemblyUnits;
69  typedef list< CConstRef<CGC_Assembly> > TFullAssemblies;
70 
71 public:
72  // constructor
73  CGC_Assembly(void);
74  // destructor
75  ~CGC_Assembly(void);
76 
77  /// Retrieve the release id for this assembly
78  int GetReleaseId() const;
79 
80  /// Retrieve the accession for this assembly
81  string GetAccession() const;
82 
83  /// Either accession or submitter-provided id
84  string GetBestIdentifier() const;
85 
86  /// Retrieve the full set of assembly descriptors
87  const CGC_AssemblyDesc& GetDesc() const;
88 
89  /// Retrieve the tax-id for this assembly
90  TTaxId GetTaxId() const;
91 
92  /// Retrieve the name of this assembly
93  string GetName() const;
94 
95  /// Get full label for assmebly; if this is a unit, full assembly name
96  /// followed by unit name
97  string GetDisplayName() const;
98 
99  /// Retrieve the file-safe version of assembly name, if available;
100  /// othwreise default to standard name
101  string GetFileSafeName() const;
102 
103  /// Get file-safe version of full label for assmebly
104  string GetFileSafeDisplayName() const;
105 
106  /// Is this assembly a RefSeq assembly?
107  bool IsRefSeq() const;
108 
109  /// Is this assembly a GenBank assembly?
110  bool IsGenBank() const;
111 
112  /// Is this a non-nuclear assembly unit?
113  bool IsOrganelle() const;
114 
115  /// If this is an assembly unit, get unit class
116  CGC_AssemblyUnit::TClass GetUnitClass() const;
117 
118  /// Generate the internal up-pointers
119  void CreateHierarchy(CGC_Assembly *target_set = NULL);
120 
121  /// Generate the Seq-id index
122  void CreateIndex();
123 
124  enum ESubset {
130  eAll
131  };
132  /// Retrieve a subset of molecules
133  ///
134  void GetMolecules(TSequenceList& molecules,
135  ESubset subset) const;
136 
137  /// Retrieve a subset of molecules separately for each unit, in the same
138  /// order in which the units are returned by GetAssemblyUnits()
139  ///
140  void GetMoleculesByUnit(vector<TSequenceList>& molecules,
141  ESubset subset) const;
142 
143  /// Retrieve a list of all assembly units contained in this assembly
144  TAssemblyUnits GetAssemblyUnits() const;
145 
146  /// Retrieve a list of all full assemblies contained in this assembly
147  /// Note that, if the assembly is a full assembly, then it will be the only
148  /// assembly returned; also, if the assembly is not an assembly set, then
149  /// the base assembly will be returned.
150  TFullAssemblies GetFullAssemblies() const;
151 
152  /// Find all references to a given sequence within an assembly
153  void Find(const CSeq_id_Handle& id,
154  TSequenceList& sequences) const;
155 
159  eChooseBest
160  };
161 
162  /// Find a single sequence corresponding to the supplied id.
163  /// Flag find_option specifies what to do if more than one sequence is found with this id.
164  /// - eEnforceSingle: throw an exception. This is the default
165  /// - eChooseAny: arbitrarily choose one of the sequences
166  /// - eChooseBest: choose the best sequneces available with this id. Criteria are:
167  /// -- Choose a sequence from the reference full assembly in preference to a sequence from another full assembly
168  /// -- Choose a sequence from the primary unit in preference to a sequence from another unit
169  /// -- Choose a top-level sequence in preference to a non-top-level sequence
170  /// -- Choose a scaffold in preference to a component
171  /// -- If there's more than one sequence that's "best" by the above criteria, choose one arbitrarily
173  EFindSeqOption find_option = eEnforceSingle) const;
174 
175  /// Returns replicon type, location and role
176  void GetRepliconTypeLocRole(const CSeq_id_Handle& id, string& type, string& location, set<int>& role) const;
177 
178  /// PreWrite() / PostRead() handle events for indexing of local structures
179  void PreWrite() const;
180  void PostRead();
181 
182  /// Access the top-level target set that this assemhly belongs to
183  CConstRef<CGC_Assembly> GetTargetSet() const;
184 
185  /// Is this assembly the reference assembly of the target set, or part of it?
186  bool IsTargetSetReference() const;
187 
188 private:
189  // Prohibit copy constructor and assignment operator
192 
196 
198 
199  /// indexing infrastructure
200  void x_Index(CGC_Assembly& assm, CGC_Replicon& replicon);
201  void x_Index(CGC_Assembly& assm, CGC_Sequence& seq);
202  void x_Index(CGC_AssemblyUnit& unit, CGC_Replicon& replicon);
203  void x_Index(CGC_AssemblyUnit& unit, CGC_Sequence& seq);
204  void x_Index(CGC_Replicon& replicon, CGC_Sequence& seq);
205  void x_Index(CGC_Sequence& parent, CGC_Sequence& seq,
206  CGC_TaggedSequences::TState relation);
207  void x_Index(CGC_Sequence& seq,
208  CGC_TaggedSequences::TState relation);
209 
210  void x_Index(CGC_Assembly& root);
211 
212  const list< CRef< CDbtag > >& x_GetId() const;
213  string x_GetSubmitterId() const;
214 };
215 
216 /////////////////// CGC_Assembly inline methods
217 
218 
219 /////////////////// end of CGC_Assembly inline methods
220 
223 
224 
225 END_objects_SCOPE // namespace ncbi::objects::
226 
228 
229 
230 #endif // INTERNAL_GPIPE_OBJECTS_GENOMECOLL_GC_ASSEMBLY_HPP
231 /* Original file checksum: lines: 86, chars: 2546, CRC32: 1afb85e */
User-defined methods of the data storage class.
Data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CConstRef –.
Definition: ncbiobj.hpp:1266
CGC_AssemblyDesc –.
A GC-Assembly is either a single assembly unit or it is a set of assemblies.
list< CConstRef< CGC_Assembly > > TFullAssemblies
Definition: GC_Assembly.hpp:69
list< CConstRef< CGC_AssemblyUnit > > TAssemblyUnits
Definition: GC_Assembly.hpp:68
CGC_Assembly_Base Tparent
Definition: GC_Assembly.hpp:64
list< CConstRef< CGC_Sequence > > TSequenceList
Definition: GC_Assembly.hpp:67
map< CSeq_id_Handle, TSequenceList > TSequenceIndex
CGC_Assembly * m_TargetSet
TSequenceIndex m_SequenceMap
CGC_Assembly(const CGC_Assembly &value)
@ eSubmitterPseudoScaffold
CGC_Assembly & operator=(const CGC_Assembly &value)
CMutex –.
Definition: ncbimtx.hpp:749
static const char location[]
Definition: config.c:97
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
#define NCBISER_HAVE_POST_READ(Class)
Definition: serialbase.hpp:943
#define NCBISER_HAVE_PRE_WRITE(Class)
Definition: serialbase.hpp:952
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NCBI_GENOME_COLLECTION_EXPORT
Definition: ncbi_export.h:528
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Multi-threading – mutexes; rw-locks; semaphore.
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
bool IsOrganelle(int genome)
Definition: utilities.cpp:2831
CSeq_id_Handle x_GetId(const CScope::TIds &ids, EGetIdType type)
Definition: sequence.cpp:462
Definition: type.c:6
Modified on Tue Apr 23 07:40:21 2024 by modify_doxy.py rev. 669887