NCBI C++ ToolKit
tse_chunk_info.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
2 #define OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
3 
4 /* $Id: tse_chunk_info.hpp 96264 2022-03-04 17:44:38Z vasilche $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Eugene Vasilchenko
30 *
31 * File Description:
32 * Split TSE chunk info
33 *
34 */
35 
36 
37 #include <corelib/ncbiobj.hpp>
38 
39 #include <objmgr/annot_name.hpp>
42 #include <util/mutex_pool.hpp>
43 #include <objmgr/blob_id.hpp>
44 
45 #include <vector>
46 #include <list>
47 #include <map>
48 
51 
52 class CTSE_Info;
53 class CTSE_Split_Info;
54 class CSeq_entry_Info;
55 class CSeq_annot_Info;
56 class CSeq_literal;
57 class CSeq_descr;
58 class CSeq_annot;
59 class CBioseq_Base_Info;
60 class CBioseq_Info;
61 class CBioseq_set_Info;
62 class CDataLoader;
63 class CTSE_SetObjectInfo;
64 class ITSE_Assigner;
66 
67 
69 public:
70  virtual void Loaded(CTSE_Chunk_Info& chunk) = 0;
71 };
72 
73 
75 {
76 public:
77  //////////////////////////////////////////////////////////////////
78  // types used
79  //////////////////////////////////////////////////////////////////
80 
81  // chunk identification
83  typedef int TBlobVersion;
84  typedef int TChunkId;
85 
86  enum {
87  kMain_ChunkId = -1, // not a chunk, but main Seq-entry
88  kMasterWGS_ChunkId = kMax_Int-1, // chunk with master WGS descr
89  kDelayedMain_ChunkId= kMax_Int // main Seq-entry with delayed ext annot
90  };
91 
92  // contents place identification
93  typedef int TBioseq_setId;
95  typedef pair<TBioseqId, TBioseq_setId> TPlace;
96  typedef unsigned TDescTypeMask;
97  typedef pair<TDescTypeMask, TPlace> TDescInfo;
98  typedef vector<TPlace> TPlaces;
99  typedef vector<TDescInfo> TDescInfos;
100  typedef vector<TBioseq_setId> TBioseqPlaces;
101  typedef vector<TBioseqId> TBioseqIds;
103  typedef vector<TAssemblyInfo> TAssemblyInfos;
104 
105  // annot contents identification
108  typedef pair<TLocationId, TLocationRange> TLocation;
109  typedef vector<TLocation> TLocationSet;
112 
113  // annot contents indexing
115  typedef list<TObjectIndex> TObjectIndexList;
116 
117  // attached data types
118  typedef list< CRef<CSeq_literal> > TSequence;
119  typedef list< CRef<CSeq_align> > TAssembly;
120 
121  //////////////////////////////////////////////////////////////////
122  // constructor & destructor
123  //////////////////////////////////////////////////////////////////
125  virtual ~CTSE_Chunk_Info(void);
126 
127  //////////////////////////////////////////////////////////////////
128  // chunk identification getters
129  //////////////////////////////////////////////////////////////////
130  TBlobId GetBlobId(void) const;
131  TBlobVersion GetBlobVersion(void) const;
132  TChunkId GetChunkId(void) const;
133  const CTSE_Split_Info& GetSplitInfo(void) const;
134 
135  //////////////////////////////////////////////////////////////////
136  // loading control
137  //////////////////////////////////////////////////////////////////
138  bool NotLoaded(void) const;
139  bool IsLoaded(void) const;
140  void Load(void) const;
141  CInitGuard* GetLoadInitGuard(void);
142 
143  //////////////////////////////////////////////////////////////////
144  // chunk content identification
145  // should be set before attaching to CTSE_Info
146  //////////////////////////////////////////////////////////////////
147  void x_AddDescInfo(TDescTypeMask type_mask, const TBioseqId& id);
148  void x_AddDescInfo(TDescTypeMask type_mask, TBioseq_setId id);
149  void x_AddDescInfo(const TDescInfo& info);
150 
151  void x_AddAssemblyInfo(const TBioseqId& id);
152 
153  void x_AddAnnotPlace(const TBioseqId& id);
154  void x_AddAnnotPlace(TBioseq_setId id);
155  void x_AddAnnotPlace(const TPlace& place);
156 
157  // The bioseq-set contains some bioseq(s)
158  void x_AddBioseqPlace(TBioseq_setId id);
159  // The chunk contains the whole bioseq and its annotations,
160  // the annotations can not refer other bioseqs.
161  void x_AddBioseqId(const TBioseqId& id);
162 
163  void x_AddAnnotType(const CAnnotName& annot_name,
164  const SAnnotTypeSelector& annot_type,
165  const TLocationId& location_id);
166  void x_AddAnnotType(const CAnnotName& annot_name,
167  const SAnnotTypeSelector& annot_type,
168  const TLocationId& location_id,
169  const TLocationRange& location_range);
170  void x_AddAnnotType(const CAnnotName& annot_name,
171  const SAnnotTypeSelector& annot_type,
172  const TLocationSet& location);
173 
174  // The chunk contains features with ids
175  void x_AddFeat_ids(void);
176  typedef int TFeatIdInt;
177  typedef string TFeatIdStr;
178  typedef vector<TFeatIdInt> TFeatIdIntList;
179  typedef list<TFeatIdStr> TFeatIdStrList;
180  struct SFeatIds {
183  };
185 
186  void x_AddFeat_ids(const SAnnotTypeSelector& type,
187  const TFeatIdIntList& ids);
188  void x_AddXref_ids(const SAnnotTypeSelector& type,
189  const TFeatIdIntList& ids);
190  void x_AddFeat_ids(const SAnnotTypeSelector& type,
191  const TFeatIdStrList& ids);
192  void x_AddXref_ids(const SAnnotTypeSelector& type,
193  const TFeatIdStrList& ids);
194 
195  // The chunk contains seq-data. The corresponding bioseq's
196  // data should be not set or set to delta with empty literal(s)
197  void x_AddSeq_data(const TLocationSet& location);
198 
199  //////////////////////////////////////////////////////////////////
200  // chunk data loading interface
201  // is called from CDataLoader
202  //////////////////////////////////////////////////////////////////
203 
204  // synchronization
205  operator CInitMutex_Base&(void)
206  {
207  return m_LoadLock;
208  }
209  void SetLoaded(CObject* obj = 0);
210  void SetLoadListener(CRef<CTSEChunkLoadListener> listener);
211 
212  // data attachment
213  void x_LoadDescr(const TPlace& place, const CSeq_descr& descr);
214  void x_LoadAnnot(const TPlace& place, const CSeq_annot& annot);
216  void x_LoadBioseq(const TPlace& place, const CBioseq& bioseq);
217  void x_LoadBioseqs(const TPlace& place, const list< CRef<CBioseq> >& bioseqs);
218  void x_LoadSequence(const TPlace& place, TSeqPos pos,
219  const TSequence& seq);
220  void x_LoadAssembly(const TBioseqId& seq_id, const TAssembly& assembly);
221 
222  void x_LoadSeq_entry(CSeq_entry& entry, CTSE_SetObjectInfo* set_info = 0);
223 
224  // update in-memory size
225  void x_AddUsedMemory(size_t size);
226 
227  //////////////////////////////////////////////////////////////////
228  // methods to find out what information is needed to be loaded
229  //////////////////////////////////////////////////////////////////
230  const TDescInfos& GetDescInfos(void) const
231  {
232  return m_DescInfos;
233  }
234  const TPlaces& GetAnnotPlaces(void) const
235  {
236  return m_AnnotPlaces;
237  }
238  const TBioseqPlaces& GetBioseqPlaces(void) const
239  {
240  return m_BioseqPlaces;
241  }
242  const TBioseqIds& GetBioseqIds(void) const
243  {
244  return m_BioseqIds;
245  }
246  const TAnnotContents& GetAnnotContents(void) const
247  {
248  return m_AnnotContents;
249  }
250  const TLocationSet& GetSeq_dataInfos(void) const
251  {
252  return m_Seq_data;
253  }
254  const TAssemblyInfos& GetAssemblyInfos(void) const
255  {
256  return m_AssemblyInfos;
257  }
258 
259  Uint4 GetLoadBytes() const;
260  double GetLoadSeconds() const;
261  pair<Uint4, double> GetLoadCost() const;
262 
263  void x_SetLoadBytes(Uint4 bytes);
264  void x_SetLoadSeconds(double seconds);
265 
266 protected:
267  //////////////////////////////////////////////////////////////////
268  // interaction with CTSE_Info
269  //////////////////////////////////////////////////////////////////
270 
271  // attach to CTSE_Info
272  void x_SplitAttach(CTSE_Split_Info& split_info);
273  void x_TSEAttach(CTSE_Info& tse, ITSE_Assigner& tse_info);
274  bool x_Attached(void) const;
275 
276  // return true if chunk is loaded
277  bool x_GetRecords(const CSeq_id_Handle& id, bool bioseq) const;
278 
279  // append ids with all Bioseqs Seq-ids from this Split-Info
280  void GetBioseqsIds(TBioseqIds& ids) const;
281 
282  // biose lookup
283  bool ContainsBioseq(const CSeq_id_Handle& id) const;
284 
285  // annot index maintainance
286  bool x_AnnotIndexNeedsUpdate() const;
287  void x_UpdateAnnotIndex(CTSE_Info& tse);
288  void x_UpdateAnnotIndexContents(CTSE_Info& tse);
289  bool x_ContainsFeatType(CSeqFeatData::E_Choice type) const;
290  bool x_ContainsFeatType(CSeqFeatData::ESubtype subtype) const;
291  bool x_ContainsFeatIds(CSeqFeatData::E_Choice type,
292  EFeatIdType id_type) const;
293  bool x_ContainsFeatIds(CSeqFeatData::ESubtype subtype,
294  EFeatIdType id_type) const;
295 
296  //void x_UnmapAnnotObjects(CTSE_Info& tse);
297  //void x_DropAnnotObjects(CTSE_Info& tse);
298  void x_DropAnnotObjects(void);
299 
300  void x_InitObjectIndexList(void);
301 
302 private:
303  friend class CTSE_Info;
304  friend class CTSE_Split_Info;
305 
306  friend class CTSE_Default_Assigner;
307 
308 
311 
314 
317 
319 
327 
330 
335 };
336 
337 
338 inline
340 {
341  return m_ChunkId;
342 }
343 
344 
345 inline
347 {
348  return !m_LoadLock;
349 }
350 
351 
352 inline
354 {
355  return m_LoadLock;
356 }
357 
358 
359 inline
361 {
362  return false;
363 }
364 
365 
366 inline
368 {
370  return *m_SplitInfo;
371 }
372 
373 
374 inline
376 {
377  return m_LoadBytes;
378 }
379 
380 
381 inline
383 {
384  return m_LoadSeconds;
385 }
386 
387 
390 
391 #endif//OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
CMutex –.
Definition: ncbimtx.hpp:749
CObject –.
Definition: ncbiobj.hpp:180
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
virtual void Loaded(CTSE_Chunk_Info &chunk)=0
TFeatIdsMap m_XrefIds
TBioseqId TAssemblyInfo
vector< TDescInfo > TDescInfos
pair< TBioseqId, TBioseq_setId > TPlace
map< SAnnotTypeSelector, TLocationSet > TAnnotTypes
CBlobIdKey TBlobId
vector< TAssemblyInfo > TAssemblyInfos
TFeatIdsMap m_FeatIds
CRef< CTSEChunkLoadListener > m_LoadListener
TObjectIndexList m_ObjectIndexList
CTSE_Chunk_Info & operator=(const CTSE_Chunk_Info &)
const TAnnotContents & GetAnnotContents(void) const
list< TFeatIdStr > TFeatIdStrList
Uint4 GetLoadBytes() const
list< CRef< CSeq_align > > TAssembly
CInitMutex< CObject > m_LoadLock
CTSE_Chunk_Info(const CTSE_Chunk_Info &)
unsigned TDescTypeMask
CTSE_Split_Info * m_SplitInfo
pair< TDescTypeMask, TPlace > TDescInfo
vector< TBioseqId > TBioseqIds
TDescInfos m_DescInfos
TBioseqPlaces m_BioseqPlaces
vector< TLocation > TLocationSet
TBioseqIds m_BioseqIds
const TLocationSet & GetSeq_dataInfos(void) const
map< CAnnotName, TAnnotTypes > TAnnotContents
list< TObjectIndex > TObjectIndexList
CSeq_id_Handle TBioseqId
CRange< TSeqPos > TLocationRange
TAnnotContents m_AnnotContents
CSeq_id_Handle TLocationId
TLocationSet m_Seq_data
TChunkId GetChunkId(void) const
double GetLoadSeconds() const
vector< TPlace > TPlaces
const TBioseqPlaces & GetBioseqPlaces(void) const
TAssemblyInfos m_AssemblyInfos
vector< TFeatIdInt > TFeatIdIntList
list< CRef< CSeq_literal > > TSequence
const TBioseqIds & GetBioseqIds(void) const
bool x_AnnotIndexNeedsUpdate() const
const TPlaces & GetAnnotPlaces(void) const
const CTSE_Split_Info & GetSplitInfo(void) const
pair< TLocationId, TLocationRange > TLocation
vector< TBioseq_setId > TBioseqPlaces
bool NotLoaded(void) const
SAnnotObjectsIndex TObjectIndex
map< SAnnotTypeSelector, SFeatIds > TFeatIdsMap
const TDescInfos & GetDescInfos(void) const
const TAssemblyInfos & GetAssemblyInfos(void) const
bool IsLoaded(void) const
Definition: map.hpp:338
static const char location[]
Definition: config.c:97
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NCBI_DEPRECATED
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_Int
Definition: ncbi_limits.h:184
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJMGR_EXPORT
Definition: ncbi_export.h:1307
E_Choice
Choice variants.
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
@ kMain_ChunkId
Definition: blob_id.hpp:147
@ kDelayedMain_ChunkId
Definition: blob_id.hpp:149
@ kMasterWGS_ChunkId
Definition: blob_id.hpp:148
Definition: type.c:6
#define _ASSERT
Modified on Fri Sep 20 14:57:51 2024 by modify_doxy.py rev. 669887