NCBI C++ ToolKit
cn3d_cache.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cn3d_cache.cpp 92728 2021-02-09 16:34:43Z hurwitz $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * implements a basic cache for structures
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 
43 
45 
46 // for file/directory manipulation stuff
47 #ifdef __WXMSW__
48 #include <windows.h>
49 #include <wx/msw/winundef.h>
50 #endif
51 #include <wx/wx.h>
52 #include <wx/datetime.h>
53 #include <wx/file.h>
54 #include <wx/filename.h>
55 
56 #include "cn3d_cache.hpp"
57 #include "cn3d_tools.hpp"
58 #include "asn_reader.hpp"
59 
62 
63 
64 BEGIN_SCOPE(Cn3D)
65 
66 static string GetCacheFilePath(int mmdbID, EModel_type modelType)
67 {
68  string cachePath;
70  wxString cacheFile;
71  cacheFile.Printf("%s%c%i.%i", cachePath.c_str(), wxFILE_SEP_PATH, mmdbID, modelType);
72  cachePath = cacheFile.c_str();
73  } else
74  ERRORMSG("Can't get cache folder from registry");
75  return cachePath;
76 }
77 
78 static bool CreateCacheFolder(void)
79 {
80  string cacheFolder;
81  if (!RegistryGetString(REG_CACHE_SECTION, REG_CACHE_FOLDER, &cacheFolder)) return false;
82  if (wxDirExists(cacheFolder.c_str())) return true;
83  bool okay = wxMkdir(cacheFolder.c_str());
84  TRACEMSG((okay ? "created" : "failed to create") << " folder " << cacheFolder);
85  return okay;
86 }
87 
88 static void ExtractBioseqs(list < CRef < CSeq_entry > >& seqEntries, BioseqRefList *sequences)
89 {
90  list < CRef < CSeq_entry > >::iterator e, ee = seqEntries.end();
91  for (e=seqEntries.begin(); e!=ee; ++e) {
92  if ((*e)->IsSeq())
93  sequences->push_back(CRef<CBioseq>(&((*e)->SetSeq())));
94  else
95  ExtractBioseqs((*e)->SetSet().SetSeq_set(), sequences);
96  }
97 }
98 
100  CRef < CBiostruc >& biostruc, BioseqRefList *sequences)
101 {
102  if (!mime.IsStrucseq()) {
103  ERRORMSG("ExtractBiostrucAndBioseqs() - expecting strucseq mime");
104  return false;
105  }
106 
107  // copy mime's biostruc into existing object
108  biostruc.Reset(&(mime.SetStrucseq().SetStructure()));
109 
110  // extract Bioseqs
111  if (sequences) {
112  sequences->clear();
113  ExtractBioseqs(mime.SetStrucseq().SetSequences(), sequences);
114  }
115 
116  return true;
117 }
118 
120 {
121  // try to load from cache
122  INFOMSG("looking for " << mmdbID << " (model type " << (int) modelType << ") in cache:");
123  string err, cacheFile = GetCacheFilePath(mmdbID, modelType);
125  SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
126  bool gotFile = ReadASNFromFile(cacheFile.c_str(), mime.GetPointer(), true, &err);
128  if (!gotFile) {
129  WARNINGMSG("failed to load " << mmdbID
130  << " (model type " << (int) modelType << ") from cache: " << err);
131  return NULL;
132  }
133 
134  // if successful, 'touch' the file to mark it as recently used
135  INFOMSG("loaded " << cacheFile);
136  wxFileName fn(cacheFile.c_str());
137  if (!fn.Touch())
138  WARNINGMSG("error touching " << cacheFile);
139 
140  return mime.Release();
141 }
142 
143 // If assemblyId = -1, use the predefined 'default' assembly.
144 // Otherwise, get the specific assembly requested, where
145 // assemblyId = 0 means the ASU, and PDB-defined assemblies
146 // are indexed sequentially from 1.
148  const string& uid, int mmdbID, EModel_type modelType, int assemblyId = 0)
149 {
150  string host, path, args;
151 
152  if (assemblyId == 0) {
153  // construct URL [mmdbsrv.cgi]
154 
155 #ifdef _USE_TEST_MMDBSRV_
156  // this is for a test release for Gabi for testing long pdb chain ids. Dave 10/19/20.
157  // this is from Dachuan, showing what the test URL looks like, and an example.
158  // https://dev.ncbi.nlm.nih.gov/Structure/pdbtest/[mmdb|cdd|vast|vastplus|wrbsp]/[*].cgi
159  // https://dev.ncbi.nlm.nih.gov/Structure/pdbtest/mmdb/mmdbsrv.cgi
160  host = "dev.ncbi.nlm.nih.gov";
161  path = "/Structure/pdbtest/mmdb/mmdbsrv.cgi";
162 #else
163  // this is the original, prior to making the test release for Gabi.
164  host = "www.ncbi.nlm.nih.gov";
165  path = "/Structure/mmdb/mmdbsrv.cgi";
166 #endif
167 
168  args = "save=Save&dopt=j&uid=";
169  if (mmdbID > 0)
170  args += NStr::IntToString(mmdbID);
171  else // assume PDB id
172  args += uid;
173  args += "&Complexity=";
174  switch (modelType) {
175  case eModel_type_ncbi_all_atom: args += "3"; break;
176  case eModel_type_pdb_model: args += "4"; break;
178  default:
179  args += "2"; break;
180  }
181 
182  // this is new 2/8/21. DIH.
183  // we now want Cn3D to request "unsanitized" data from mmdbsrv.
184  args += "&sanitize=0";
185 
186 #ifdef _USE_TEST_MMDBSRV_
187  // This is for long chain-id testing. This is for Gabi's test release.
188  // This is no longer used.
189  // args += "&readfile=1";
190 #endif
191  }
192 
193  else {
194  // construct URL [mmdb_strview.cgi]
195  host = "www.ncbi.nlm.nih.gov";
196  path = "/Structure/mmdb/mmdb_strview.cgi";
197  args = "program=cn3d&display=1&uid=";
198  if (mmdbID > 0)
199  args += NStr::IntToString(mmdbID);
200  else // assume PDB id
201  args += uid;
202  args += "&complexity=";
203  switch (modelType) {
204  case eModel_type_ncbi_vector: args += "1"; break;
205  case eModel_type_ncbi_all_atom: args += "3"; break;
206  case eModel_type_pdb_model: args += "4"; break;
208  default:
209  args += "2"; break;
210  }
211  args += "&buidx=" + NStr::IntToString(assemblyId);
212  }
213 
214  // load from network
215  INFOMSG("Trying to load structure data from " << host << path << '?' << args);
216  string err;
218 
219  if (!GetAsnDataViaHTTPS(host, path, args, mime.GetPointer(), &err) ||
220  !mime->IsStrucseq()) {
221  ERRORMSG("Failed to read structure " << uid << " from network\nreason: " << err);
222  return NULL;
223 
224  } else {
225  // get MMDB ID from biostruc if not already known
226  if (mmdbID == 0) {
227  if (mime->GetStrucseq().GetStructure().GetId().front()->IsMmdb_id())
228  mmdbID = mime->GetStrucseq().GetStructure().GetId().front()->GetMmdb_id().Get();
229  else {
230  ERRORMSG("Can't get MMDB ID from Biostruc!");
231  return mime.Release();
232  }
233  }
234 
235  bool cacheEnabled;
236  if (RegistryGetBoolean(REG_CACHE_SECTION, REG_CACHE_ENABLED, &cacheEnabled) && cacheEnabled) {
237  // add to cache
238  if (CreateCacheFolder() &&
239  WriteASNToFile(GetCacheFilePath(mmdbID, modelType).c_str(), *mime, true, &err)) {
240  INFOMSG("stored " << mmdbID << " (model type " << (int) modelType << ") in cache");
241  // trim cache to appropriate size if we've added a new file
242  int size;
245  } else {
246  WARNINGMSG("Failed to write structure to cache folder");
247  if (err.size() > 0) WARNINGMSG("reason: " << err);
248  }
249  }
250  }
251 
252  return mime.Release();
253 }
254 
256 {
257  // determine whether this is an integer MMDB ID or alphanumeric PDB ID
258  int mmdbID = 0;
259  if (uid.size() == 4 && (isalpha((unsigned char) uid[1]) || isalpha((unsigned char) uid[2]) || isalpha((unsigned char) uid[3]))) {
260  TRACEMSG("Fetching PDB " << uid);
261  } else { // mmdb id
262  unsigned long tmp;
263  if (wxString(uid.c_str()).ToULong(&tmp)) {
264  mmdbID = (int) tmp;
265  } else {
266  ERRORMSG("LoadStructureViaCache() - invalid uid " << uid);
267  return NULL;
268  }
269  TRACEMSG("Fetching MMDB " << mmdbID);
270  }
271 
272  // try loading from local cache folder first, if cache enabled in registry (but only with known mmdb id)
273  bool cacheEnabled;
274  CNcbi_mime_asn1 *mime = NULL;
275  if (mmdbID > 0 &&
277  cacheEnabled)
278  mime = GetStructureFromCacheFolder(mmdbID, modelType);
279 
280  // otherwise, load via HTTP (and save in cache folder)
281  if (!mime)
282  mime = GetStructureViaHTTPAndAddToCache(uid, mmdbID, modelType, assemblyId);
283 
284  return mime;
285 }
286 
287 bool LoadStructureViaCache(const std::string& uid, ncbi::objects::EModel_type modelType, int assemblyId,
288  CRef < CBiostruc >& biostruc, BioseqRefList *sequences)
289 {
290  CRef < CNcbi_mime_asn1 > mime(LoadStructureViaCache(uid, modelType, assemblyId));
291 
292  // debugging
293  // string errStr;
294  // WriteASNToFile("mime_data.txt", mime.GetObject(), false, &errStr);
295 
296  return (mime.NotEmpty() && ExtractBiostrucAndBioseqs(*mime, biostruc, sequences));
297 }
298 
299 void TruncateCache(unsigned int maxSize)
300 {
301  string cacheFolder;
303  !wxDirExists(cacheFolder.c_str())) {
304  WARNINGMSG("can't find cache folder");
305  return;
306  }
307  INFOMSG("truncating cache to " << maxSize << " MB");
308 
309  wxString cacheFolderFiles;
310  cacheFolderFiles.Printf("%s%c*", cacheFolder.c_str(), wxFILE_SEP_PATH);
311 
312  // empty directory if maxSize <= 0
313  if (maxSize <= 0) {
314  wxString f;
315  while ((f=wxFindFirstFile(cacheFolderFiles, wxFILE)).size() > 0) {
316  if (!wxRemoveFile(f))
317  WARNINGMSG("can't remove file " << f);
318  }
319  return;
320  }
321 
322  // otherwise, add up file sizes and keep deleting oldest until total size <= max
323  unsigned long totalSize = 0;
324  wxString oldestFileName;
325  do {
326 
327  // if totalSize > 0, then we've already scanned the folder and know it's too big,
328  // so delete oldest file
329  if (totalSize > 0 && !wxRemoveFile(oldestFileName))
330  WARNINGMSG("can't remove file " << oldestFileName);
331 
332  // loop through files, finding oldest and calculating total size
333  totalSize = 0;
334  time_t oldestFileDate = wxDateTime::GetTimeNow(), date;
335  wxString file = wxFindFirstFile(cacheFolderFiles, wxFILE);
336  for (; file.size() > 0; file = wxFindNextFile()) {
337  date = wxFileModificationTime(file);
338  if (date < oldestFileDate) {
339  oldestFileDate = date;
340  oldestFileName = file;
341  }
342  wxFile wx_file(file, wxFile::read);
343  if (wx_file.IsOpened()) {
344  totalSize += wx_file.Length();
345  wx_file.Close();
346  } else
347  WARNINGMSG("wxFile failed to open " << file);
348  }
349  INFOMSG("total size: " << totalSize << " oldest file: " << oldestFileName.c_str());
350 
351  } while (totalSize > maxSize * 1024 * 1024);
352 }
353 
354 END_SCOPE(Cn3D)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool GetAsnDataViaHTTPS(const std::string &host, const std::string &path, const std::string &args, ASNClass *asnObject, std::string *err, bool binaryData=true)
Definition: asn_reader.hpp:182
void TruncateCache(unsigned int maxSize)
Definition: cn3d_cache.cpp:299
USING_SCOPE(objects)
static CNcbi_mime_asn1 * GetStructureFromCacheFolder(int mmdbID, EModel_type modelType)
Definition: cn3d_cache.cpp:119
bool ExtractBiostrucAndBioseqs(CNcbi_mime_asn1 &mime, CRef< CBiostruc > &biostruc, BioseqRefList *sequences)
Definition: cn3d_cache.cpp:99
static CNcbi_mime_asn1 * GetStructureViaHTTPAndAddToCache(const string &uid, int mmdbID, EModel_type modelType, int assemblyId=0)
Definition: cn3d_cache.cpp:147
static string GetCacheFilePath(int mmdbID, EModel_type modelType)
Definition: cn3d_cache.cpp:66
static void ExtractBioseqs(list< CRef< CSeq_entry > > &seqEntries, BioseqRefList *sequences)
Definition: cn3d_cache.cpp:88
static bool CreateCacheFolder(void)
Definition: cn3d_cache.cpp:78
CNcbi_mime_asn1 * LoadStructureViaCache(const std::string &uid, ncbi::objects::EModel_type modelType, int assemblyId)
Definition: cn3d_cache.cpp:255
USING_NCBI_SCOPE
Definition: cn3d_cache.cpp:60
std::list< ncbi::CRef< ncbi::objects::CBioseq > > BioseqRefList
Definition: cn3d_cache.hpp:61
bool RegistryGetInteger(const string &section, const string &name, int *value)
Definition: cn3d_tools.cpp:228
bool RegistryGetBoolean(const string &section, const string &name, bool *value)
Definition: cn3d_tools.cpp:250
bool RegistryGetString(const string &section, const string &name, string *value)
Definition: cn3d_tools.cpp:263
#define TRACEMSG(stream)
Definition: cn3d_tools.hpp:83
#define INFOMSG(stream)
Definition: cn3d_tools.hpp:84
static const std::string REG_CACHE_MAX_SIZE
Definition: cn3d_tools.hpp:189
static const std::string REG_CACHE_FOLDER
Definition: cn3d_tools.hpp:188
static const std::string REG_CACHE_ENABLED
Definition: cn3d_tools.hpp:187
#define WARNINGMSG(stream)
Definition: cn3d_tools.hpp:85
#define ERRORMSG(stream)
Definition: cn3d_tools.hpp:86
static const std::string REG_CACHE_SECTION
Definition: cn3d_tools.hpp:186
Include a standard set of the NCBI C++ Toolkit most basic headers.
static bool ReadASNFromFile(const char *filename, ASNClass *ASNobject, bool isBinary, std::string *err)
static bool WriteASNToFile(const char *filename, const ASNClass &ASNobject, bool isBinary, std::string *err, ncbi::EFixNonPrint fixNonPrint=ncbi::eFNP_Default)
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
Definition: ncbiobj.hpp:846
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
EModel_type
Access to EModel_type's attributes (values, names) as defined in spec.
Definition: Model_type_.hpp:63
@ eModel_type_ncbi_all_atom
Definition: Model_type_.hpp:66
@ eModel_type_ncbi_backbone
Definition: Model_type_.hpp:65
@ eModel_type_ncbi_vector
Definition: Model_type_.hpp:64
@ eModel_type_pdb_model
Definition: Model_type_.hpp:67
void SetStructure(TStructure &value)
Assign a value to Structure data member.
TStrucseq & SetStrucseq(void)
Select the variant.
bool IsStrucseq(void) const
Check if variant Strucseq is selected.
TSequences & SetSequences(void)
Assign a value to Sequences data member.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
FILE * file
const struct ncbi::grid::netcache::search::fields::SIZE size
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static char tmp[2048]
Definition: utf8.c:42
Modified on Sat Dec 09 04:48:12 2023 by modify_doxy.py rev. 669887