NCBI C++ ToolKit
taxon_cache.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: taxon_cache.cpp 45144 2020-06-08 16:24:37Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Roman Katargin, Anatoliy Kuznetsov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <iostream>
34 #include <corelib/ncbimtx.hpp>
35 
37 #include <util/line_reader.hpp>
38 
40 
43 
45 
47 
48 void CTaxonCache::InitStorage(const char* cache_path)
49 {
50  m_CachePath.assign(cache_path);
51 }
52 
54 {
55  try {
57  } catch (std::exception& ex) {
58  LOG_POST(Error << "Exception in CTaxonCache::CTaxonCache() : " << ex.what());
59  }
60 }
61 
63 {
64  try {
65  LOG_POST(Info << "Saving taxon cache to" << m_CachePath);
67  } catch (std::exception& ex) {
68  LOG_POST(Error << "Exception in CTaxonCache::~CTaxonCache() : " << ex.what());
69  }
70 }
71 
73 {
74  if (m_CachePath.empty())
75  return;
76 
77  CNcbiOfstream ostr(m_CachePath.c_str());
78  for (auto &tax_it : m_Map) {
79  ostr << tax_it.first << '\t' << tax_it.second.Label << '\t' << tax_it.second.TaxName << '\t' << tax_it.second.CommonName << '\t' << tax_it.second.BlastName << '\n';
80  }
81 }
82 
84 {
85  if (m_CachePath.empty() || !CFile(m_CachePath).Exists())
86  return;
87 
89  string str_id, label;
90 
91  while ( !line_reader->AtEOF() ) {
92  line_reader->ReadLine();
93  vector<CTempString> strings;
94  NStr::Split(line_reader->GetCurrentLine(), "\t", strings, 0);
95  if (5 != strings.size())
96  continue;
97 
98  int tax_id = NStr::StringToInt(strings[0]);
99  if (0 == tax_id)
100  continue;
101 
102  STaxonomy tax_info;
103  tax_info.Label = strings[1];
104  tax_info.TaxName = strings[2];
105  tax_info.CommonName = strings[3];
106  tax_info.BlastName = strings[4];
107 
108  m_Map[tax_id] = tax_info;
109  }
110 }
111 
112 
114 {
115  static CRef<CTaxonCache> s_Cache;
116  if (!s_Cache) {
117  CMutexGuard LOCK(s_Mutex);
118  if (!s_Cache) {
119  s_Cache.Reset (new CTaxonCache());
120  }
121  }
122  return *s_Cache;
123 }
124 
125 void CTaxonCache::Initialize(const vector<int>& tax_ids)
126 {
127  CMutexGuard LOCK(s_Mutex);
128 
129  vector<int> to_initialize;
130  ITERATE(vector<int>, it, tax_ids) {
131  if (m_Map.find(*it) == m_Map.end())
132  to_initialize.push_back(*it);
133  }
134 
135  if (to_initialize.empty())
136  return;
137 
138  try {
139  CTaxon1 taxon;
140  taxon.Init();
141 
142  ITERATE(vector<int>, it, to_initialize) {
143  bool is_species = false;
144  bool is_uncultured = false;
145  string blast_name;
146  CConstRef<COrg_ref> org_ref = taxon.GetOrgRef(TAX_ID_FROM(int, *it), is_species, is_uncultured, blast_name);
147  if (0 == org_ref)
148  continue;
149 
150  STaxonomy tax_info;
151  org_ref->GetLabel(&tax_info.Label);
152  if (org_ref->CanGetTaxname())
153  tax_info.TaxName = org_ref->GetTaxname();
154  if (org_ref->CanGetCommon())
155  tax_info.CommonName = org_ref->GetCommon();
156  tax_info.BlastName = blast_name;
157 
158  m_Map[*it] = tax_info;
159 
160  }
161  }
162  catch (const exception& ex) {
163  LOG_POST(Error << "CTaxonCache::Initialize: " << ex.what());
164  }
165 }
166 
168 {
169  CMutexGuard LOCK(s_Mutex);
170  m_TaxonClient.reset(0);
171 }
172 
174 {
175  if (m_TaxonClient.get() == 0) {
176  m_TaxonClient.reset(new CTaxon1);
177  bool conn_res = m_TaxonClient->Init();
178  if (conn_res != true) {
179  m_TaxonClient.reset(0);
180  return;
181  }
182  }
183 
184  CTaxon1& taxon = *(m_TaxonClient.get());
185 
186  bool is_species = false;
187  bool is_uncultured = false;
188  string blast_name;
189  CConstRef<COrg_ref> org_ref = taxon.GetOrgRef(TAX_ID_FROM(int, tax_id), is_species, is_uncultured, blast_name);
190  if (!org_ref)
191  return;
192 
193  STaxonomy tax_info;
194  org_ref->GetLabel(&tax_info.Label);
195  if (org_ref->CanGetTaxname())
196  tax_info.TaxName = org_ref->GetTaxname();
197  if (org_ref->CanGetCommon())
198  tax_info.CommonName = org_ref->GetCommon();
199  tax_info.BlastName = blast_name;
200 
201  m_Map[tax_id] = tax_info;
202 }
203 
205 {
206  TMap::const_iterator it = m_Map.find(tax_id);
207  if (it != m_Map.end())
208  return it;
209 
210  try {
211  x_QueryTaxonService(tax_id);
212  }
213  catch (const exception& ex) {
214  LOG_POST(Error << "CTaxonCache::GetLabel: " << ex.what());
215  m_TaxonClient.reset(0);
216  // second attempt after re-initialization
217  try {
218  x_QueryTaxonService(tax_id);
219  }
220  catch (const exception& ex)
221  {
222  LOG_POST(Error << "Second try of CTaxonCache::GetLabel: " << ex.what());
223  m_TaxonClient.reset(0);
224  }
225  }
226  return m_Map.find(tax_id);
227 }
228 
229 string CTaxonCache::GetLabel(int tax_id)
230 {
231  CMutexGuard LOCK(s_Mutex);
232  TMap::const_iterator it = x_GetItem(tax_id);
233  if (it == m_Map.end())
234  return string();
235 
236  return it->second.Label;
237 }
238 
239 string CTaxonCache::GetTaxname(int tax_id)
240 {
241  CMutexGuard LOCK(s_Mutex);
242  TMap::const_iterator it = x_GetItem(tax_id);
243  if (it == m_Map.end())
244  return string();
245 
246  return it->second.TaxName;
247 }
248 
249 string CTaxonCache::GetCommon(int tax_id)
250 {
251  CMutexGuard LOCK(s_Mutex);
252  TMap::const_iterator it = x_GetItem(tax_id);
253  if (it == m_Map.end())
254  return string();
255 
256  return it->second.CommonName;
257 }
258 
259 string CTaxonCache::GetBlastName(int tax_id)
260 {
261  CMutexGuard LOCK(s_Mutex);
262  TMap::const_iterator it = x_GetItem(tax_id);
263  if (it == m_Map.end())
264  return string();
265 
266  return it->second.BlastName;
267 }
268 
CConstRef –.
Definition: ncbiobj.hpp:1266
CFile –.
Definition: ncbifile.hpp:1604
CConstRef< COrg_ref > GetOrgRef(TTaxId tax_id, bool &is_species, bool &is_uncultured, string &blast_name, bool *is_specified=NULL)
Definition: taxon1.cpp:704
bool Init(void)
Definition: taxon1.cpp:101
void Initialize(const vector< int > &tax_ids)
void ResetConnection()
Drop connection to NCBI taxon service.
static void InitStorage(const char *cache_path)
Definition: taxon_cache.cpp:48
TMap::const_iterator x_GetItem(int tax_id)
string GetTaxname(int tax_id)
static CTaxonCache & GetInstance()
string GetCommon(int tax_id)
string GetLabel(int tax_id)
void x_QueryTaxonService(int tax_id)
CTaxonCache()
forbidden
Definition: taxon_cache.cpp:53
string GetBlastName(int tax_id)
void x_SaveTaxonCache() const
Definition: taxon_cache.cpp:72
unique_ptr< objects::CTaxon1 > m_TaxonClient
Definition: taxon_cache.hpp:86
static string m_CachePath
Definition: taxon_cache.hpp:88
void x_LoadTaxonCache()
Definition: taxon_cache.cpp:83
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
string
Definition: cgiapp.hpp:687
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
CTempString GetCurrentLine(void) const
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
void ReadLine(void)
Definition: line_reader.hpp:88
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static const char label[]
Lightweight interface for getting lines of data with minimal memory copying.
Multi-threading – mutexes; rw-locks; semaphore.
static const char *const strings[]
Definition: utf8.c:21
USING_SCOPE(objects)
DEFINE_STATIC_MUTEX(s_Mutex)
Modified on Thu Dec 07 10:09:59 2023 by modify_doxy.py rev. 669887