NCBI C++ ToolKit
huge_asn_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: huge_asn_loader.cpp 102385 2024-04-29 14:33:08Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergiy Gotvyanskyy
27 *
28 * File Description:
29 *
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 
38 
42 
46 
47 namespace
48 {
49 
50 class CLoaderMakerWithReader : public CLoaderMaker_Base
51 {
52 public:
53  CLoaderMakerWithReader(const string& name, CHugeAsnReader* reader):
54  m_reader(reader)
55  {
56  m_Name = name;
57  }
58 
59  virtual CDataLoader* CreateLoader(void) const
60  {
61  return new CHugeAsnDataLoader(m_Name, m_reader);
62  }
63 
64  typedef CHugeAsnDataLoader::TRegisterLoaderInfo TRegisterInfo;
65  TRegisterInfo GetRegisterInfo(void)
66  {
67  TRegisterInfo info;
68  info.Set(m_RegisterInfo.GetLoader(), m_RegisterInfo.IsCreated());
69  return info;
70  }
71 private:
72  CHugeAsnReader* m_reader = nullptr;
73 };
74 
75 } // anonymous namespace
76 
78  CDataLoader(name),
79  m_reader{reader}
80 {
81 }
82 
83 
85 {
86  /*
87  if (m_owning)
88  {
89  delete m_reader;
90  }
91  */
92 }
93 
94 #ifdef _DEBUG
95 //#define DEBUG_HUGE_ASN_LOADER
96 #endif
97 
98 #ifdef DEBUG_HUGE_ASN_LOADER
99 static thread_local std::string loading_ids;
100 #endif
101 
103 {
104 #ifdef DEBUG_HUGE_ASN_LOADER
105  loading_ids = idh.AsString();
106 #endif
107  auto info = m_reader->FindTopObject(idh.GetSeqId());
108  if (info) {
109  TBlobId blob_id = new CBlobIdPtr(info);
110  return blob_id;
111  }
112 #ifdef DEBUG_HUGE_ASN_LOADER
113  cerr << MSerial_AsnText << "Seq id not found: " << loading_ids << "\n";
114 #endif
115  return {};
116 }
117 
119 {
120  // Load data, get the lock
121  CTSE_LoadLock lock = GetDataSource()->GetTSE_LoadLock(blob_id);
122  if ( !lock.IsLoaded() ) {
123  auto id = (const CBlobIdPtr*)&*blob_id;
124  auto info_ptr = id->GetValue();
126  auto entry = m_reader->LoadSeqEntry(*info);
127 #ifdef DEBUG_HUGE_ASN_LOADER
128  cerr << MSerial_AsnText << "Loaded: " << loading_ids << "\n";
129 #endif
130  CTSE_Info& tse_info = *lock;
131  tse_info.SetSeq_entry(*entry);
132  lock.SetLoaded();
133  }
134  return lock;
135 }
136 
139 {
140  TTSE_LockSet locks;
141  TBlobId blob_id = GetBlobId(idh);
142  if ( blob_id ) {
143  TTSE_Lock lock = GetBlobById(blob_id);
144  if ( lock ) {
145  locks.insert(lock);
146  }
147  }
148  return locks;
149 }
150 
153  const string& loader_name,
154  CHugeAsnReader* reader,
155  CObjectManager::EIsDefault is_default,
156  CObjectManager::TPriority priority)
157 {
158  CLoaderMakerWithReader maker(loader_name, reader);
159  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
160  return maker.GetRegisterInfo();
161 }
162 
164 {
165  auto info = m_reader->FindBioseq(idh.GetSeqId());
166  return info? info->m_length : kInvalidSeqPos;
167 }
168 
170 {
171  auto info = m_reader->FindBioseq(idh.GetSeqId());
172  if (info == nullptr)
174  "CHugeAsnDataLoader::GetSequenceType() sequence not found");
175 
176  if (info->m_mol == CSeq_inst::eMol_not_set)
177  NCBI_THROW(CLoaderException, eNoData,
178  "CHugeAsnDataLoader::GetSequenceType() type not set");
179 
180  return info->m_mol;
181 }
182 
184 {
185  auto info = m_reader->FindBioseq(idh.GetSeqId());
186  STypeFound ret;
187  if (info) {
188  ret.sequence_found = true;
189  ret.type = info->m_mol;
190  }
191  return ret;
192 }
193 
195 {
196  //cerr << "CHugeAsnDataLoader::GetIds invoked\n";
197  auto info = m_reader->FindBioseq(idh.GetSeqId());
198  if (info)
199  {
200  for (auto id: info->m_ids)
201  {
202  auto newidh = CSeq_id_Handle::GetHandle(*id);
203  if (std::find(begin(ids), end(ids), newidh) == ids.end())
204  {
205  ids.push_back(newidh);
206  }
207  }
208  }
209 }
210 
211 namespace
212 {
213  TTaxId x_FindTaxId(const CHugeAsnReader::TBioseqSetList& cont, CHugeAsnReader::TBioseqSetList::const_iterator parent, CConstRef<CSeq_descr> descr)
214  {
215  if (descr)
216  {
217  for (auto d: descr->Get())
218  {
219  const COrg_ref* org_ref = nullptr;
220  switch(d->Which())
221  {
222  case CSeqdesc::e_Source:
223  if (d->GetSource().IsSetOrg())
224  org_ref = &d->GetSource().GetOrg();
225  break;
226  case CSeqdesc::e_Org:
227  org_ref = &d->GetOrg();
228  break;
229  default:
230  break;
231  }
232  if (org_ref)
233  return org_ref->GetTaxId();
234  }
235  }
236  if (parent != cont.end())
237  return x_FindTaxId(cont, parent->m_parent_set, parent->m_descr);
238 
239  return ZERO_TAX_ID;
240  }
241 }
242 
244 {
245  auto info = m_reader->FindBioseq(idh.GetSeqId());
246  if (info)
247  {
248  auto taxid = x_FindTaxId(m_reader->GetBiosets(), info->m_parent_set, info->m_descr);
249  return taxid;
250  }
251  return INVALID_TAX_ID;
252 }
253 
User-defined methods of the data storage class.
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
void GetIds(const CSeq_id_Handle &idh, CDataLoader::TIds &ids) override
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &loader_name, CHugeAsnReader *reader, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_Default)
TBlobId GetBlobId(const CSeq_id_Handle &idh) override
CRef< CHugeAsnReader > m_reader
TTaxId GetTaxId(const CSeq_id_Handle &idh) override
Request for a taxonomy id of a sequence.
TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice) override
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh) override
TSeqPos GetSequenceLength(const CSeq_id_Handle &idh) override
Request for a length of a sequence.
CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle &idh) override
Request for a type of a sequence Returns CSeq_inst::eMol_not_set if sequence is not known.
CHugeAsnDataLoader(const string &name, CHugeAsnReader *reader)
TTSE_Lock GetBlobById(const TBlobId &blob_id) override
const TBioseqSetInfo * FindTopObject(CConstRef< CSeq_id > seqid) const
auto & GetBiosets() const
virtual CRef< CSeq_entry > LoadSeqEntry(const TBioseqSetInfo &info, eAddTopEntry add_top_entry=eAddTopEntry::yes) const
const TBioseqInfo * FindBioseq(CConstRef< CSeq_id > seqid) const
std::list< TBioseqSetInfo > TBioseqSetList
Data loader exceptions, used by GenBank loader.
CObjectManager –.
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
Definition: tse_info.cpp:351
bool IsLoaded(void) const
void SetLoaded(void)
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
string
Definition: cgiapp.hpp:687
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
vector< CSeq_id_Handle > TIds
CSeq_inst::TMol type
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
CBlobIdFor< const void * > CBlobIdPtr
Definition: blob_id.hpp:152
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
static MDB_envinfo info
Definition: mdb_load.c:37
Definition: fix_pub.hpp:45
@ eNotFound
Not found.
CRef< objects::CObjectManager > om
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SRegisterLoaderInfo –.
Modified on Wed Jun 12 11:16:45 2024 by modify_doxy.py rev. 669887