NCBI C++ ToolKit
huge_asn_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: huge_asn_loader.cpp 101225 2023-11-16 19:11:06Z gotvyans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergiy Gotvyanskyy
27 *
28 * File Description:
29 *
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 
38 
42 
46 
47 namespace
48 {
49 
50 class CLoaderMakerWithReader : public CLoaderMaker_Base
51 {
52 public:
53  CLoaderMakerWithReader(const string& name, CHugeAsnReader* reader):
54  m_reader(reader)
55  {
56  m_Name = name;
57  }
58 
59  virtual CDataLoader* CreateLoader(void) const
60  {
61  return new CHugeAsnDataLoader(m_Name, m_reader);
62  }
63 
64  typedef CHugeAsnDataLoader::TRegisterLoaderInfo TRegisterInfo;
65  TRegisterInfo GetRegisterInfo(void)
66  {
67  TRegisterInfo info;
68  info.Set(m_RegisterInfo.GetLoader(), m_RegisterInfo.IsCreated());
69  return info;
70  }
71 private:
72  CHugeAsnReader* m_reader = nullptr;
73 };
74 
75 } // anonymous namespace
76 
78  CDataLoader(name),
79  m_reader{reader}
80 {
81 }
82 
83 
85 {
86  if (m_owning)
87  {
88  delete m_reader;
89  }
90 }
91 
92 #ifdef _DEBUG
93 //#define DEBUG_HUGE_ASN_LOADER
94 #endif
95 
96 #ifdef DEBUG_HUGE_ASN_LOADER
97 static thread_local std::string loading_ids;
98 #endif
99 
101 {
102 #ifdef DEBUG_HUGE_ASN_LOADER
103  loading_ids = idh.AsString();
104 #endif
105  auto info = m_reader->FindTopObject(idh.GetSeqId());
106  if (info) {
107  TBlobId blob_id = new CBlobIdPtr(info);
108  return blob_id;
109  }
110 #ifdef DEBUG_HUGE_ASN_LOADER
111  cerr << MSerial_AsnText << "Seq id not found: " << loading_ids << "\n";
112 #endif
113  return {};
114 }
115 
117 {
118  // Load data, get the lock
119  CTSE_LoadLock lock = GetDataSource()->GetTSE_LoadLock(blob_id);
120  if ( !lock.IsLoaded() ) {
121  auto id = (const CBlobIdPtr*)&*blob_id;
122  auto info_ptr = id->GetValue();
124  auto entry = m_reader->LoadSeqEntry(*info);
125 #ifdef DEBUG_HUGE_ASN_LOADER
126  cerr << MSerial_AsnText << "Loaded: " << loading_ids << "\n";
127 #endif
128  CTSE_Info& tse_info = *lock;
129  tse_info.SetSeq_entry(*entry);
130  lock.SetLoaded();
131  }
132  return lock;
133 }
134 
137 {
138  TTSE_LockSet locks;
139  TBlobId blob_id = GetBlobId(idh);
140  if ( blob_id ) {
141  TTSE_Lock lock = GetBlobById(blob_id);
142  if ( lock ) {
143  locks.insert(lock);
144  }
145  }
146  return locks;
147 }
148 
151  const string& loader_name,
152  CHugeAsnReader* reader,
153  CObjectManager::EIsDefault is_default,
154  CObjectManager::TPriority priority)
155 {
156  CLoaderMakerWithReader maker(loader_name, reader);
157  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
158  return maker.GetRegisterInfo();
159 }
160 
162 {
163  auto info = m_reader->FindBioseq(idh.GetSeqId());
164  return info? info->m_length : kInvalidSeqPos;
165 }
166 
168 {
169  auto info = m_reader->FindBioseq(idh.GetSeqId());
170  if (info == nullptr)
172  "CHugeAsnDataLoader::GetSequenceType() sequence not found");
173 
174  if (info->m_mol == CSeq_inst::eMol_not_set)
175  NCBI_THROW(CLoaderException, eNoData,
176  "CHugeAsnDataLoader::GetSequenceType() type not set");
177 
178  return info->m_mol;
179 }
180 
182 {
183  auto info = m_reader->FindBioseq(idh.GetSeqId());
184  STypeFound ret;
185  if (info) {
186  ret.sequence_found = true;
187  ret.type = info->m_mol;
188  }
189  return ret;
190 }
191 
193 {
194  //cerr << "CHugeAsnDataLoader::GetIds invoked\n";
195  auto info = m_reader->FindBioseq(idh.GetSeqId());
196  if (info)
197  {
198  for (auto id: info->m_ids)
199  {
200  auto newidh = CSeq_id_Handle::GetHandle(*id);
201  if (std::find(begin(ids), end(ids), newidh) == ids.end())
202  {
203  ids.push_back(newidh);
204  }
205  }
206  }
207 }
208 
209 namespace
210 {
211  TTaxId x_FindTaxId(const CHugeAsnReader::TBioseqSetList& cont, CHugeAsnReader::TBioseqSetList::const_iterator parent, CConstRef<CSeq_descr> descr)
212  {
213  if (descr)
214  {
215  for (auto d: descr->Get())
216  {
217  const COrg_ref* org_ref = nullptr;
218  switch(d->Which())
219  {
220  case CSeqdesc::e_Source:
221  if (d->GetSource().IsSetOrg())
222  org_ref = &d->GetSource().GetOrg();
223  break;
224  case CSeqdesc::e_Org:
225  org_ref = &d->GetOrg();
226  break;
227  default:
228  break;
229  }
230  if (org_ref)
231  return org_ref->GetTaxId();
232  }
233  }
234  if (parent != cont.end())
235  return x_FindTaxId(cont, parent->m_parent_set, parent->m_descr);
236 
237  return ZERO_TAX_ID;
238  }
239 }
240 
242 {
243  auto info = m_reader->FindBioseq(idh.GetSeqId());
244  if (info)
245  {
246  auto taxid = x_FindTaxId(m_reader->GetBiosets(), info->m_parent_set, info->m_descr);
247  return taxid;
248  }
249  return INVALID_TAX_ID;
250 }
251 
User-defined methods of the data storage class.
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
void GetIds(const CSeq_id_Handle &idh, CDataLoader::TIds &ids) override
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &loader_name, CHugeAsnReader *reader, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_Default)
CHugeAsnReader * m_reader
TBlobId GetBlobId(const CSeq_id_Handle &idh) override
TTaxId GetTaxId(const CSeq_id_Handle &idh) override
Request for a taxonomy id of a sequence.
TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice) override
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh) override
TSeqPos GetSequenceLength(const CSeq_id_Handle &idh) override
Request for a length of a sequence.
CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle &idh) override
Request for a type of a sequence Returns CSeq_inst::eMol_not_set if sequence is not known.
CHugeAsnDataLoader(const string &name, CHugeAsnReader *reader)
TTSE_Lock GetBlobById(const TBlobId &blob_id) override
const TBioseqSetInfo * FindTopObject(CConstRef< CSeq_id > seqid) const
auto & GetBiosets() const
virtual CRef< CSeq_entry > LoadSeqEntry(const TBioseqSetInfo &info, eAddTopEntry add_top_entry=eAddTopEntry::yes) const
const TBioseqInfo * FindBioseq(CConstRef< CSeq_id > seqid) const
std::list< TBioseqSetInfo > TBioseqSetList
Data loader exceptions, used by GenBank loader.
CObjectManager –.
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
Definition: tse_info.cpp:351
bool IsLoaded(void) const
void SetLoaded(void)
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
string
Definition: cgiapp.hpp:687
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
vector< CSeq_id_Handle > TIds
CSeq_inst::TMol type
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
CBlobIdFor< const void * > CBlobIdPtr
Definition: blob_id.hpp:152
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
static MDB_envinfo info
Definition: mdb_load.c:37
Definition: fix_pub.hpp:45
@ eNotFound
Not found.
CRef< objects::CObjectManager > om
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SRegisterLoaderInfo –.
Modified on Sat Dec 02 09:22:21 2023 by modify_doxy.py rev. 669887