NCBI C++ ToolKit
assembly_cache.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: assembly_cache.cpp 41426 2018-07-25 16:26:46Z katargir $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Roman Katargin
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
36 #include <serial/serial.hpp>
37 #include <serial/objistrasnb.hpp>
38 
41 
43 
46 
47 //#define USE_DEV_TMS
48 
49 static string s_GetAssmAccsParams(const string& seqAcc)
50 {
51  string params =
52  "&id=" + NStr::URLEncode(seqAcc) +
53  "&req=getassmaccs" +
54  "&ofmt=asnb";
55 
56  return params;
57 }
58 
59 static string s_GetAssmInfoParams(const string& assmaccs)
60 {
61  string params =
62  "&assmaccs=" + NStr::URLEncode(assmaccs) +
63  "&mode=AssemblyOnly" +
64  "&req=getassminfos" +
65  "&ofmt=asnb";
66 
67  return params;
68 }
69 
70 static string s_GetHost()
71 {
72 #ifdef USE_DEV_TMS
73  return "dev.ncbi.nlm.nih.gov";
74 #else
75  return "www.ncbi.nlm.nih.gov";
76 #endif
77 }
78 
79 static string s_GetPath()
80 {
81  return "/projects/sviewer/seqconfig.cgi";
82 }
83 
85 {
86 }
87 
89 
91 {
92  static CAssemblyCache instance;
93  return instance;
94 }
95 
96 bool CAssemblyCache::GetAssemblies(const string& seqAcc, list<CRef<objects::CGC_Assembly> >& assAcc)
97 {
98  if (seqAcc.empty())
99  return true;
100 
101  {
102  std::unique_lock<std::mutex> lock(m_DataMutex);
103  x_StartThread();
104  auto it = m_MolMap.find(seqAcc);
105  if (it != m_MolMap.end()) {
106  for (const auto& a : it->second)
107  assAcc.push_back(m_AssMap[a]);
108  return true;
109  }
110  }
111  x_Load(seqAcc);
112  return false;
113 }
114 
115 bool CAssemblyCache::GetBestAssemblyAcc(const string& seqAcc, string& best_acc)
116 {
117  if (seqAcc.empty())
118  return true;
119 
120  {
121  std::unique_lock<std::mutex> lock(m_DataMutex);
122  x_StartThread();
123  auto it = m_BestAccMap.find(seqAcc);
124  if (it != m_BestAccMap.end()) {
125  best_acc = it->second;
126  return true;
127  }
128  }
129  x_Load(seqAcc);
130  return false;
131 }
132 
134 {
135  if (m_WorkerThread) {
136  {
137  std::unique_lock<std::mutex> locker(m_QueueMutex);
138  m_StopRequested = true;
139  }
140  m_QueueCheck.notify_one();
141  m_WorkerThread.Reset();
142  }
143 }
144 
145 static void s_GetAccessions(const string& seqAcc, list<string>& accessions, string& best_acc)
146 {
147  string host = s_GetHost();
148  string path = s_GetPath();
149  string params = s_GetAssmAccsParams(seqAcc);
150  string url = "https://" + host + path + "?" + params;
151  string errMsg = "s_GetAccessions: " + url;
152 
153  try {
154  CGuiHttpSessionRequest httpRequest(url);
155  unique_ptr<CObjectIStream> obj_strm(new CObjectIStreamAsnBinary(httpRequest.GetResponseStream()));
156 
157  CRef<CSV_AssmAccs> res(new CSV_AssmAccs());
158  *obj_strm >> *res;
159 
160  if (res) {
161  if (res->CanGetAssm_accs())
162  accessions = res->GetAssm_accs();
163  if (res->CanGetAssm_acc_best())
164  best_acc = res->GetAssm_acc_best();
165  }
166  } NCBI_CATCH(errMsg);
167 }
168 
169 static void s_GetAssemblies(const vector<string>& accessions,
170  list<CRef<objects::CGC_Assembly> >& assemblies)
171 {
172  string assmaccs;
173  for (const auto& acc : accessions) {
174  if (!assmaccs.empty()) assmaccs += ",";
175  assmaccs += acc;
176  }
177 
178  string host = s_GetHost();
179  string path = s_GetPath();
180  string params = s_GetAssmInfoParams(assmaccs);
181  string url = "https://" + host + path + "?" + params;
182  string errMsg = "s_GetAssemblies: " + url;
183 
184  try {
185  CGuiHttpSessionRequest httpRequest(url);
186  unique_ptr<CObjectIStream> obj_strm(new CObjectIStreamAsnBinary(httpRequest.GetResponseStream()));
188  *obj_strm >> *res;
189 
190  if (res && res->CanGetAssemblies())
191  assemblies = res->GetAssemblies();
192  } NCBI_CATCH(errMsg);
193 }
194 
196 {
197  for (;;) {
198  std::unique_lock<std::mutex> locker(m_QueueMutex);
199  auto dataReady = [this]() { return !m_ToLoad.empty() || m_StopRequested; };
200  if (!dataReady())
201  m_QueueCheck.wait(locker, dataReady);
202 
203  if (m_StopRequested)
204  break;
205 
206  list<string> toLoadCopy = std::move(m_ToLoad);
207  locker.unlock();
208 
209  for (const auto& seqAcc : toLoadCopy) {
210  {
211  std::unique_lock<std::mutex> lock(m_DataMutex);
212  if (m_BestAccMap.find(seqAcc) != m_BestAccMap.end())
213  continue;
214  }
215 
216  list<string> accessions;
217  string best_acc;
218  vector<string> toLoad;
219  s_GetAccessions(seqAcc, accessions, best_acc);
220 
221  {
222  std::unique_lock<std::mutex> lock(m_DataMutex);
223  if (!best_acc.empty()) {
224  m_BestAccMap[seqAcc] = best_acc;
225  } else if (!accessions.empty()) {
226  m_BestAccMap[seqAcc] = accessions.front();
227  }
228  for (const auto& acc : accessions) {
229  if (m_AssMap.find(acc) == m_AssMap.end())
230  toLoad.push_back(acc);
231  }
232  }
233 
234  list<CRef<objects::CGC_Assembly> > assemblies;
235  if (!toLoad.empty())
236  s_GetAssemblies(toLoad, assemblies);
237 
238  {
239  std::unique_lock<std::mutex> lock(m_DataMutex);
240  for (auto a : assemblies)
241  m_AssMap[a->GetAccession()] = a;
242 
243  auto& mol = m_MolMap[seqAcc];
244  for (const auto& acc : accessions) {
245  if (m_AssMap.find(acc) != m_AssMap.end())
246  mol.push_back(acc);
247  }
248  }
249 
250  {
251  std::unique_lock<std::mutex> locker(m_QueueMutex);
252  if (m_StopRequested)
253  break;
254  }
255  }
256  }
257 }
258 
260 {
261  if (!m_WorkerThread) {
262  m_WorkerThread.Reset(new CWorkerThread(*this));
263  m_WorkerThread->Run();
264  }
265 }
266 
267 void CAssemblyCache::x_Load(const string& seqAcc)
268 {
269  std::unique_lock<std::mutex> locker(m_QueueMutex);
270  list<string>::const_iterator it2 = find(m_ToLoad.begin(), m_ToLoad.end(), seqAcc);
271  if (it2 == m_ToLoad.end()) {
272  m_ToLoad.push_back(seqAcc);
273  m_QueueCheck.notify_one();
274  }
275 }
276 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
USING_SCOPE(objects)
static string s_GetHost()
static string s_GetPath()
static string s_GetAssmInfoParams(const string &assmaccs)
static void s_GetAccessions(const string &seqAcc, list< string > &accessions, string &best_acc)
static string s_GetAssmAccsParams(const string &seqAcc)
static void s_GetAssemblies(const vector< string > &accessions, list< CRef< objects::CGC_Assembly > > &assemblies)
#define false
Definition: bool.h:36
bool GetBestAssemblyAcc(const string &seqAcc, string &best_acc)
std::mutex m_QueueMutex
map< string, vector< string > > m_MolMap
void x_Load(const string &seqAcc)
std::condition_variable m_QueueCheck
map< string, string > m_BestAccMap
list< string > m_ToLoad
bool GetAssemblies(const string &seqAcc, list< CRef< objects::CGC_Assembly > > &assAcc)
map< string, CRef< objects::CGC_Assembly > > m_AssMap
static CAssemblyCache & GetInstance()
CRef< CWorkerThread > m_WorkerThread
std::mutex m_DataMutex
CGC_Assemblies –.
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CSV_AssmAccs –.
Definition: SV_AssmAccs.hpp:66
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:580
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6058
unsigned int a
Definition: ncbi_localip.c:102
Modified on Thu Nov 30 04:56:26 2023 by modify_doxy.py rev. 669887