NCBI C++ ToolKit
snp_client.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: snp_client.cpp 101025 2023-10-17 16:37:22Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Aleksey Grichenko, Eugene Vasilchenko
27  *
28  * File Description: client for reading SNP data
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "snp_client.hpp"
35 #include "pubseq_gateway.hpp"
43 #include <objects/seq/seq__.hpp>
45 
46 
50 
52 
53 
55 
56 // Blob id:
57 // sat = 2001-2099 : SNP NA version 1 - 999
58 // or, for primary SNP track:
59 // sat = 3001-3099 : SNP NA version 1 - 999
60 // subsat : NA accession number
61 // or, for primary SNP graph track:
62 // NA accession number + kSNPSubSatGraph(=1000000000)
63 // satkey : SequenceIndex + 1000000*FilterIndex;
64 // satkey bits 24-30:
65 
66 
67 const int kSNPSatBase = 2000;
68 const int kSNPSatPrimary = 3000;
69 const int kSNPSubSatGraph = 1000000000;
70 const int kNAVersionMin = 1;
71 const int kNAVersionMax = 999;
72 const int kSeqIndexCount = 1000000;
73 const int kFilterIndexCount = 2000;
74 const int kFilterIndexMaxLength = 4;
75 
76 
77 static string s_GetAccVer(const CSeq_id_Handle& id)
78 {
79  string ret;
80  CConstRef<CSeq_id> seq_id(id.GetSeqId());
81  if (!seq_id) return ret;
82  CConstRef<CTextseq_id> text_id(id.GetSeqId()->GetTextseq_Id());
83  if (!text_id) return ret;
84  if (text_id->CanGetAccession() && text_id->CanGetVersion() &&
85  !text_id->GetAccession().empty() && text_id->GetVersion() > 0) {
86  ret = text_id->GetAccession() + '.' + NStr::NumericToString(text_id->GetVersion());
87  }
88  return ret;
89 }
90 
91 
92 static const char kFileEnd[] = "|||";
93 static const char kFilterPrefixChar = '#';
94 
95 
96 static size_t s_ExtractFilterIndex(string& s)
97 {
98  size_t size = s.size();
99  size_t pos = size;
100  while (pos && isdigit(s[pos - 1])) {
101  --pos;
102  }
103  size_t num_len = size - pos;
104  if (!num_len || num_len > kFilterIndexMaxLength ||
105  !pos || s[pos] == '0' || s[pos - 1] != kFilterPrefixChar) {
106  return 0;
107  }
108  size_t index = NStr::StringToNumeric<size_t>(s.substr(pos));
109  if (!CSNPBlobId::IsValidFilterIndex(index)) {
110  return 0;
111  }
112  // internally filter index is zero-based, but in accession it's one-based
113  --index;
114  // remove filter index from accession
115  s.resize(pos - 1);
116  return index;
117 }
118 
119 
120 static string s_AddFilterIndex(const string& s, size_t filter_index)
121 {
123  str << s << kFilterPrefixChar << (filter_index + 1);
125 }
126 
127 
128 template<class Container>
129 typename Container::value_type::TObjectType& s_AddObject(Container& container)
130 {
131  typename Container::value_type obj(new typename Container::value_type::TObjectType);
132  container.push_back(obj);
133  return *obj;
134 }
135 
136 
138 {
139  unique_ptr<CSNPBlobId> blob_id;
140  int split_version = 0;
141 
142  SParsedId2Info(const string& str) {
143  try {
144  string id2info = str;
145  size_t pos = id2info.find_last_of(".");
146  if (pos == NPOS || pos + 1 >= id2info.size()) return;
147  split_version = NStr::StringToNumeric<int>(id2info.substr(pos + 1));
148  id2info.resize(pos);
149  pos = id2info.find_last_of(".");
150  if (pos == NPOS) return;
151  NStr::StringToNumeric<int>(id2info.substr(pos + 1)); // tse-version is always 0
152  id2info.resize(pos);
153  blob_id.reset(new CSNPBlobId(id2info));
154  }
155  catch (...) {
156  split_version = 0;
157  }
158  }
159 };
160 
161 
163 
164 
165 /////////////////////////////////////////////////////////////////////////////
166 // CSNPBlobId
167 /////////////////////////////////////////////////////////////////////////////
168 
169 
171 {
172  FromString(str);
173 }
174 
175 
176 CSNPBlobId::CSNPBlobId(const CSNPFileInfo& file, const CSeq_id_Handle& seq_id, size_t filter_index)
177  : m_NAIndex(0),
178  m_NAVersion(0),
179  m_IsPrimaryTrack(false),
180  m_IsPrimaryTrackGraph(false),
181  m_SeqIndex(0),
182  m_FilterIndex(Uint4(filter_index)),
183  m_Accession(file.GetAccession()),
184  m_SeqId(seq_id)
185 {
186  // non-SatId
187 }
188 
189 
190 CSNPBlobId::CSNPBlobId(const CSNPFileInfo& file, size_t seq_index, size_t filter_index)
191  : m_NAIndex(0),
192  m_NAVersion(0),
193  m_IsPrimaryTrack(false),
194  m_IsPrimaryTrackGraph(false),
195  m_SeqIndex(Uint4(seq_index)),
196  m_FilterIndex(Uint4(filter_index))
197 {
198  if (file.IsValidNA()) {
199  SetSatNA(file.GetAccession());
200  }
201  else {
202  // non-SatId
203  m_Accession = file.GetAccession();
204  }
205  SetSeqAndFilterIndex(seq_index, filter_index);
206 }
207 
208 
209 CSNPBlobId::CSNPBlobId(const CSNPDbSeqIterator& seq, size_t filter_index)
210 {
211  SetSatNA(seq.GetDb().GetDbPath());
212  SetSeqAndFilterIndex(seq.GetVDBSeqIndex(), filter_index);
213 }
214 
215 
217 {
218 }
219 
220 
221 bool CSNPBlobId::IsValidNAIndex(size_t na_index)
222 {
223  return na_index > 0 && na_index < 1000000000;
224 }
225 
226 
227 bool CSNPBlobId::IsValidNAVersion(size_t na_version)
228 {
229  return na_version >= kNAVersionMin && na_version <= kNAVersionMax;
230 }
231 
232 
233 bool CSNPBlobId::IsValidSeqIndex(size_t seq_index)
234 {
235  return seq_index < kSeqIndexCount;
236 }
237 
238 
239 bool CSNPBlobId::IsValidFilterIndex(size_t filter_index)
240 {
241  return filter_index < kFilterIndexCount;
242 }
243 
244 
245 void CSNPBlobId::SetNAIndex(size_t na_index)
246 {
247  _ASSERT(IsValidNAIndex(na_index));
248  m_NAIndex = Uint4(na_index);
249 }
250 
251 
253 {
254  return IsValidNAIndex(GetNAIndex());
255 }
256 
257 
258 int CSNPBlobId::GetSatBase(void) const
259 {
261 }
262 
263 
265 {
266  return IsPrimaryTrackGraph() ? kSNPSubSatGraph : 0;
267 }
268 
269 
270 void CSNPBlobId::SetNAVersion(size_t na_version)
271 {
272  _ASSERT(IsValidNAVersion(na_version));
273  m_NAVersion = Uint2(na_version);
274 }
275 
276 
277 bool CSNPBlobId::IsSatId(void) const
278 {
279  return m_NAIndex != 0;
280 }
281 
282 
284 {
286  return Int4(GetSatBase() + GetNAVersion());
287 }
288 
289 
291 {
293  return Int4(GetSubSatBase() + GetNAIndex());
294 }
295 
296 
298 {
302 }
303 
304 
305 bool CSNPBlobId::IsValidSat(void) const
306 {
307  return IsValidNAVersion(GetNAVersion());
308 }
309 
310 
311 pair<size_t, size_t> CSNPBlobId::ParseNA(CTempString acc)
312 {
313  pair<size_t, size_t> ret(0, 0);
314  // NA123456789.1
315  if (acc.size() < 13 || acc.size() > 15 ||
316  acc[0] != 'N' || acc[1] != 'A' || acc[11] != '.') {
317  return ret;
318  }
319  size_t na_index = NStr::StringToNumeric<size_t>(acc.substr(2, 9),
321  if (!IsValidNAIndex(na_index)) {
322  return ret;
323  }
324  size_t na_version = NStr::StringToNumeric<size_t>(acc.substr(12),
326  if (!IsValidNAVersion(na_version)) {
327  return ret;
328  }
329  ret.first = na_index;
330  ret.second = na_version;
331  return ret;
332 }
333 
334 
335 string CSNPBlobId::GetSatNA(void) const
336 {
338  str << "NA" << setw(9) << setfill('0') << GetNAIndex()
339  << '.' << GetNAVersion();
341 }
342 
343 
345 {
346  pair<size_t, size_t> na = ParseNA(acc);
347  SetNAIndex(na.first);
348  SetNAVersion(na.second);
349 }
350 
351 
352 void CSNPBlobId::SetSeqAndFilterIndex(size_t seq_index,
353  size_t filter_index)
354 {
355  _ASSERT(IsValidSeqIndex(seq_index));
356  _ASSERT(IsValidFilterIndex(filter_index));
357  m_SeqIndex = Uint4(seq_index);
358  m_FilterIndex = Uint4(filter_index);
359 }
360 
361 
363 {
364  return IsValidSeqIndex(GetSeqIndex()) &&
366 }
367 
368 
370 {
371  _ASSERT(!IsSatId());
372  return m_SeqId;
373 }
374 
375 
376 string CSNPBlobId::GetAccession(void) const
377 {
378  if (m_Accession.empty()) {
379  return GetSatNA();
380  }
381  else {
382  return m_Accession;
383  }
384 }
385 
386 
388 {
390  m_IsPrimaryTrack = true;
391  m_IsPrimaryTrackGraph = false;
392 }
393 
394 
396 {
398  m_IsPrimaryTrack = true;
399  m_IsPrimaryTrackGraph = true;
400 }
401 
402 
403 string CSNPBlobId::ToString(void) const
404 {
406  if (IsSatId()) {
407  out << GetSat() << '/' << GetSubSat() << '.' << GetSatKey();
408  }
409  else {
410  out << m_Accession;
411  out << kFilterPrefixChar << (GetFilterIndex() + 1);
412  out << kFileEnd << m_SeqId;
413  }
415 }
416 
417 
419 {
420  if (str.empty() || !isdigit(Uchar(str[0]))) {
421  return false;
422  }
423 
424  size_t dot1 = str.find('/');
425  if (dot1 == NPOS) {
426  return false;
427  }
428  size_t dot2 = str.find('.', dot1 + 1);
429  if (dot2 == NPOS) {
430  return false;
431  }
432  size_t sat = NStr::StringToNumeric<size_t>(str.substr(0, dot1),
434  bool is_primary_track = sat >= kSNPSatPrimary;
435  size_t na_version = sat - (is_primary_track ? kSNPSatPrimary : kSNPSatBase);
436  if (!IsValidNAVersion(na_version)) {
437  return false;
438  }
439  size_t subsat = NStr::StringToNumeric<size_t>(str.substr(dot1 + 1, dot2 - dot1 - 1),
441  bool is_primary_track_graph = is_primary_track && subsat >= kSNPSubSatGraph;
442  size_t na_index = subsat - (is_primary_track_graph ? kSNPSubSatGraph : 0);
443  if (!IsValidNAIndex(na_index)) {
444  return false;
445  }
446 
447  size_t satkey = NStr::StringToNumeric<size_t>(str.substr(dot2 + 1),
449  size_t seq_index = satkey % kSeqIndexCount;
450  size_t filter_index = satkey / kSeqIndexCount;
451  if (!IsValidSeqIndex(seq_index) || !IsValidFilterIndex(filter_index)) {
452  return false;
453  }
454 
455  m_NAIndex = Uint4(na_index);
456  m_NAVersion = Uint2(na_version);
457  m_SeqIndex = Uint4(seq_index);
458  m_FilterIndex = Uint4(filter_index);
459  m_IsPrimaryTrack = is_primary_track;
460  m_IsPrimaryTrackGraph = is_primary_track_graph;
461  m_Accession.clear();
462  m_SeqId.Reset();
463 
464  _ASSERT(IsSatId());
465  return true;
466 }
467 
468 
470 {
471  if (FromSatString(str)) {
472  return;
473  }
474  m_NAIndex = 0;
475  m_NAVersion = 0;
476  m_SeqIndex = 0;
477  m_FilterIndex = 0;
478  m_IsPrimaryTrack = false;
479  m_IsPrimaryTrackGraph = false;
480  m_Accession.clear();
481  m_SeqId.Reset();
482  _ASSERT(!IsSatId());
483 
484  SIZE_TYPE div = str.rfind(kFileEnd);
485  if (div == NPOS) {
486  NCBI_THROW_FMT(CSraException, eOtherError,
487  "Bad CSNPBlobId: " << str);
488  }
489  m_Accession = str.substr(0, div);
490  m_SeqId = CSeq_id_Handle::GetHandle(str.substr(div + strlen(kFileEnd)));
492 }
493 
494 
495 /////////////////////////////////////////////////////////////////////////////
496 // CSNPSeqInfo
497 /////////////////////////////////////////////////////////////////////////////
498 
499 
501  : m_File(file),
502  m_SeqIndex(it.GetVDBSeqIndex()),
503  m_FilterIndex(0),
504  m_IsPrimaryTrack(false),
505  m_IsPrimaryTrackGraph(false)
506 {
507  if (!file->IsValidNA()) {
508  m_SeqId = it.GetSeqIdHandle();
509  }
510 }
511 
512 
514 {
515  _ASSERT(m_File);
516  if (!m_SeqId) {
518  }
520 }
521 
522 
523 void CSNPSeqInfo::SetFilterIndex(size_t filter_index)
524 {
525  if (!CSNPBlobId::IsValidFilterIndex(filter_index)) {
526  filter_index = 0;
527  }
528  m_FilterIndex = filter_index;
529 }
530 
531 
533 {
534  SetFilterIndex(blob_id.GetFilterIndex());
535  m_IsPrimaryTrack = blob_id.IsPrimaryTrack();
537 }
538 
539 
541 {
543  if (!m_SeqId) {
545  }
546  else {
548  }
549  if (m_FilterIndex) {
551  }
552  return it;
553 }
554 
555 
556 string CSNPSeqInfo::GetAnnotName(void) const
557 {
558  // primary SNP track features have hard-coded name from EADB
559  if (m_IsPrimaryTrack) {
560  return "SNP";
561  }
562  else {
564  }
565 }
566 
567 
568 void CSNPSeqInfo::LoadBlob(SSNPData& data, bool split_enabled)
569 {
570  CRef<CSeq_entry> tse;
571  CRef<CID2S_Split_Info> split_info;
572 
574  string base_name = GetAnnotName();
575  CSNPDbSeqIterator::TFlags flags = CSNPDbSeqIterator::fDefaultFlags;
576  if ( m_IsPrimaryTrack ) {
577  // primary track has graphs in a separate TSE
578  if ( m_IsPrimaryTrackGraph ) {
580  }
581  else {
583  }
584  }
585  if (split_enabled) {
586  auto split = it.GetSplitInfoAndVersion(base_name, flags);
587  data.m_SplitInfo = split.first;
588  data.m_SplitVersion = split.second;
589  }
590  else {
591  data.m_TSE = it.GetEntry(base_name, flags);
592  }
593 }
594 
596 {
597  string base_name = GetAnnotName();
599  data.m_Chunk = it.GetChunkForVersion(base_name, chunk_id, data.m_SplitVersion);
600 }
601 
602 
603 /////////////////////////////////////////////////////////////////////////////
604 // CSNPFileInfo
605 /////////////////////////////////////////////////////////////////////////////
606 
607 
609  : m_IsValidNA(false),
610  m_RemainingOpenRetries(client.m_Config.m_FileOpenRetry)
611 {
612  x_Initialize(client, acc);
613 }
614 
615 
616 void CSNPFileInfo::x_Initialize(CSNPClient& client, const string& csra)
617 {
618  m_FileName = csra;
622  if (!m_IsValidNA) {
623  // remove directory part, if any
624  SIZE_TYPE sep = m_Accession.find_last_of("/\\");
625  if (sep != NPOS) {
626  m_Accession.erase(0, sep + 1);
627  }
628  }
629  m_AnnotName = client.m_Config.m_AnnotName;
630  if (m_AnnotName.empty()) {
632  }
633 }
634 
635 
636 string CSNPFileInfo::GetSNPAnnotName(size_t filter_index) const
637 {
638  return s_AddFilterIndex(GetBaseAnnotName(), filter_index);
639 }
640 
641 
643 {
644  CRef<CSNPSeqInfo> ret;
645  CSNPDbSeqIterator seq_it(m_SNPDb, seq_id);
646  if (seq_it) {
647  ret = new CSNPSeqInfo(this, seq_it);
648  }
649  return ret;
650 }
651 
652 
654 {
655  CSNPDbSeqIterator seq_it(m_SNPDb, seq_index);
656  _ASSERT(seq_it);
657  CRef<CSNPSeqInfo> ret(new CSNPSeqInfo(this, seq_it));
658  return ret;
659 }
660 
661 
663 {
664  CRef<CSNPSeqInfo> ret;
665  if (blob_id.IsSatId()) {
666  ret = GetSeqInfo(blob_id.GetSeqIndex());
667  }
668  else {
669  ret = GetSeqInfo(blob_id.GetSeqId());
670  }
671  if (ret) {
672  ret->SetFromBlobId(blob_id);
673  }
674  return ret;
675 }
676 
677 
678 /////////////////////////////////////////////////////////////////////////////
679 // CSNPClient
680 /////////////////////////////////////////////////////////////////////////////
681 
682 
684 {
685  // If resolving is enabled, do not check ids now.
686  if (request.m_SeqIdResolve &&
687  (!request.m_SeqId.empty() || !request.m_SeqIds.empty())) return true;
688  if (!request.m_SeqId.empty()) {
689  try {
690  if (IsValidSeqId(CSeq_id_Handle::GetHandle(request.m_SeqId))) return true;
691  }
692  catch (exception& e) {}
693  }
694  for (auto& id : request.m_SeqIds) {
695  try {
696  if (IsValidSeqId(CSeq_id_Handle::GetHandle(id))) return true;
697  }
698  catch (exception& e) {}
699  }
700  return false;
701 }
702 
703 
705 
706 bool CSNPClient::IsValidSeqId(const CSeq_id_Handle& idh) const
707 {
708  if (!idh) return false;
709  try {
710  // check type
712  // check version
713  if ( idh.IsAccVer() ) {
714  // fully qualified refseq seq-id (Seq-id.other)
715  return true;
716  }
717  }
718  }
719  catch (...) {
720  }
721  if (m_Config.m_AllowNonRefSeq) return true;
722  return false;
723 }
724 
725 
726 bool CSNPClient::IsValidSeqId(const string& id, int id_type, int version) const
727 {
728  if ( id.empty() ) return false;
729  // preliminary check type
730  if ( id_type == CSeq_id::e_Other) {
731  try {
732  CSeq_id seq_id(id);
733  // check type
734  if ( (seq_id.IdentifyAccession() & kRefSeqAccFlags) == kRefSeqAccFlags ) {
735  // check version
736  if ( auto text_id = seq_id.GetTextseq_Id() ) {
737  if ( text_id->IsSetVersion() || version > 0 ) {
738  // fully qualified refseq seq-id (Seq-id.other)
739  return true;
740  }
741  }
742  }
743  }
744  catch (...) {
745  }
746  }
747  if (m_Config.m_AllowNonRefSeq) return true;
748  return false;
749 }
750 
751 
753  : m_Config(config),
754  m_Mgr(new CVDBMgr),
755  m_SNPDbCache(config.m_GCSize, config.m_FileReopenTime, config.m_FileRecheckTime)
756 {
757  if (m_Config.m_AddPTIS) {
759  }
760 }
761 
762 
764 {
765 }
766 
767 
769 {
771  {{
772  CRef<CSNPFileInfo> delete_info; // delete stale file info after releasing mutex
773  auto slot = m_SNPDbCache.GetSlot(acc);
774  TSNPDbCache::CSlot::TSlotMutex::TWriteLockGuard guard(slot->GetSlotMutex());
775  info = slot->GetObject<CSNPFileInfo>();
776  if ( info && slot->IsExpired(m_SNPDbCache, acc) ) {
777  PSG_INFO("PSGS_SNP: GetFileInfo: opened " << acc << " has expired");
778  slot->ResetObject();
779  delete_info.Swap(info);
780  }
781  if ( !info ) {
782  info = new CSNPFileInfo(*this, acc);
783  slot->UpdateExpiration(m_SNPDbCache, acc);
784  slot->SetObject(info);
785  }
786  if ( !info->m_SNPDb && info->m_RemainingOpenRetries > 0 ) {
787  try {
788  --info->m_RemainingOpenRetries;
789  psg_time_point_t start = psg_clock_t::now();
790  info->m_SNPDb = CSNPDb(*m_Mgr, info->m_FileName);
792  }
793  catch ( CSraException& exc ) {
794  if ( exc.GetErrCode() == exc.eNotFoundDb ||
795  exc.GetErrCode() == exc.eProtectedDb ) {
796  // no such SNP table
797  info->m_RemainingOpenRetries = 0; // no more opening retries
798  }
799  else {
800  // problem in VDB or WGS reader
801  PSG_ERROR("PSGS_SNP: Exception while opening SNP DB " << acc << ": " << exc);
802  if ( info->m_RemainingOpenRetries > 0 ) {
803  throw;
804  }
805  else {
806  // assume the file is not SNP file
807  PSG_ERROR("PSGS_SNP: assume DB " << acc << " is not SNP");
808  }
809  }
810  }
811  catch ( CException& exc ) {
812  // problem in VDB or WGS reader
813  PSG_ERROR("PSGS_SNP: Exception while opening SNP DB " << acc << ": " << exc);
814  if ( info->m_RemainingOpenRetries > 0 ) {
815  throw;
816  }
817  else {
818  // assume the file is not SNP file
819  PSG_ERROR("PSGS_SNP: assume DB " << acc << " is not SNP");
820  }
821  }
822  catch ( exception& exc ) {
823  // problem in VDB or WGS reader
824  PSG_ERROR("PSGS_SNP: Exception while opening SNP DB " << acc << ": " << exc.what());
825  if ( info->m_RemainingOpenRetries > 0 ) {
826  throw;
827  }
828  else {
829  // assume the file is not SNP file
830  PSG_ERROR("PSGS_SNP: assume DB " << acc << " is not SNP");
831  }
832  }
833  }
834  }}
835  if ( !info->m_SNPDb ) {
836  return null;
837  }
838  return info;
839 }
840 
841 
843 {
844  return GetFileInfo(blob_id.GetAccession())->GetSeqInfo(blob_id);
845 }
846 
847 
848 vector<string> CSNPClient::WhatNACanProcess(SPSGS_AnnotRequest& annot_request,
849  TProcessorPriority priority) const
850 {
851  vector<string> can_process;
852  if (HaveValidSeq_id(annot_request)) {
853  for (const auto& name : (priority==-1?
854  annot_request.m_Names:
855  annot_request.GetNotProcessedName(priority)) ) {
856  if (m_Config.m_AddPTIS && name == "SNP") {
857  can_process.push_back(name);
858  continue;
859  }
860  string acc = name;
861  size_t filter_index = s_ExtractFilterIndex(acc);
862  if (filter_index == 0 && acc.size() == name.size()) {
863  // filter specification is required
864  continue;
865  }
866  if (CSNPBlobId::IsValidNA(acc)) {
867  can_process.push_back(name);
868  }
869  }
870  }
871  return can_process;
872 }
873 
874 
876 {
877  switch (request.GetRequestType()) {
879  return !WhatNACanProcess(request.GetRequest<SPSGS_AnnotRequest>(), priority).empty();
880  }
883  try {
884  CSNPBlobId blob_id(blob_request.m_BlobId.GetId());
885  }
886  catch (CSraException&) {
887  // Not a valid SNP blob id
888  return false;
889  }
890  return true;
891  }
893  SPSGS_TSEChunkRequest& chunk_request = request.GetRequest<SPSGS_TSEChunkRequest>();
894  if (chunk_request.m_Id2Chunk < 0) return false;
895  SParsedId2Info parsed_info(chunk_request.m_Id2Info);
896  return parsed_info.blob_id.get();
897  }
898  default:
899  return false;
900  }
901  return false;
902 }
903 
904 
905 vector<SSNPData> CSNPClient::GetAnnotInfo(const CSeq_id_Handle& id,
906  const string& name, CSeq_id::ESNPScaleLimit scale_limit)
907 {
908  vector<SSNPData> ret;
909  try {
910  if (scale_limit == CSeq_id::eSNPScaleLimit_Default) {
911  scale_limit = m_Config.m_SNPScaleLimit;
912  }
913  if (!id.IsAllowedSNPScaleLimit(scale_limit)) return ret;
914  if (m_Config.m_AddPTIS && name == "SNP") {
915  // default SNP track
916  string acc_ver = s_GetAccVer(id);
917  if (acc_ver.empty()) {
918  return ret;
919  }
920  // find default SNP track
921  psg_time_point_t start = psg_clock_t::now();
922  string na_acc = m_PTISClient->GetPrimarySnpTrackForAccVer(acc_ver);
924  na_acc.empty()? eOpStatusNotFound: eOpStatusFound);
925  if (na_acc.empty()) {
926  // no default SNP track
927  return ret;
928  }
929  size_t filter_index = s_ExtractFilterIndex(na_acc);
931  if ( !info ) {
932  return ret; // should it be an error since PTIS says SNPs should exist
933  }
934  CRef<CSNPSeqInfo> seq = info->GetSeqInfo(id);
935  if ( !seq ) {
936  return ret; // should it be an error since PTIS says SNPs should exist
937  }
938  seq->SetFilterIndex(filter_index);
939  {
940  CSNPBlobId blob_id = seq->GetBlobId();
941  blob_id.SetPrimaryTrackFeat();
942  SSNPData data;
943  data.m_BlobId = blob_id.ToString();
944  data.m_Name = name;
945  data.m_AnnotInfo.push_back(x_GetFeatInfo(name, id));
946  ret.push_back(data);
947  }
948  {
949  CSNPBlobId blob_id = seq->GetBlobId();
950  blob_id.SetPrimaryTrackGraph();
951  SSNPData data;
952  data.m_BlobId = blob_id.ToString();
953  data.m_Name = name;
954  // add SNP overview graph type info
955  data.m_AnnotInfo.push_back(x_GetGraphInfo(name, id));
956  // add SNP graph type info
957  string graph_name = CSeq_annot::CombineWithZoomLevel(name, info->GetDb().GetCoverageZoom());
958  data.m_AnnotInfo.push_back(x_GetGraphInfo(graph_name, id));
959  ret.push_back(data);
960  }
961  }
962  else {
963  string acc = name;
964  size_t filter_index = s_ExtractFilterIndex(acc);
965  if (filter_index == 0 && acc.size() == name.size()) {
966  // filter specification is required
967  return ret;
968  }
970  if ( !info ) {
971  return ret;
972  }
973  CRef<CSNPSeqInfo> seq = info->GetSeqInfo(id);
974  if ( !seq ) {
975  return ret;
976  }
977  seq->SetFilterIndex(filter_index);
978  auto blob_id = seq->GetBlobId();
979  SSNPData data;
980  data.m_BlobId = blob_id.ToString();
981  data.m_Name = name;
982  data.m_AnnotInfo.push_back(x_GetFeatInfo(name, id));
983  string overview_name =
984  CSeq_annot::CombineWithZoomLevel(name, info->GetDb().GetOverviewZoom());
985  data.m_AnnotInfo.push_back(x_GetGraphInfo(overview_name, id));
986  string coverage_name =
987  CSeq_annot::CombineWithZoomLevel(name, info->GetDb().GetCoverageZoom());
988  data.m_AnnotInfo.push_back(x_GetGraphInfo(coverage_name, id));
989  ret.push_back(data);
990  }
991  }
992  catch ( exception& exc ) {
993  SSNPData data;
994  data.m_Name = name;
995  data.m_Error = "Exception when handling get_na request: " + string(exc.what());
996  ret.push_back(data);
997  }
998  return ret;
999 }
1000 
1001 
1003 {
1004  SSNPData ret;
1005  CSNPBlobId snp_blob_id(blob_id);
1006  GetSeqInfo(snp_blob_id)->LoadBlob(ret, m_Config.m_Split);
1007  return ret;
1008 }
1009 
1010 
1011 SSNPData CSNPClient::GetChunk(const string& id2info, int chunk_id)
1012 {
1013  SSNPData ret;
1014  SParsedId2Info parsed_info(id2info);
1015  if (!parsed_info.blob_id) return ret;
1016  ret.m_SplitVersion = parsed_info.split_version;
1017  GetSeqInfo(*parsed_info.blob_id)->LoadChunk(ret, chunk_id);
1018  return ret;
1019 }
1020 
1021 
1022 CRef<CID2S_Seq_annot_Info> CSNPClient::x_GetFeatInfo(const string& name, const objects::CSeq_id_Handle& id)
1023 {
1025  annot_info->SetName(name);
1026  auto& feat_type = s_AddObject(annot_info->SetFeat());
1027  feat_type.SetType(CSeqFeatData::e_Imp);
1028  feat_type.SetSubtypes().push_back(CSeqFeatData::eSubtype_variation);
1029  annot_info->SetSeq_loc().SetWhole_seq_id().Assign(*id.GetSeqId());
1030  return annot_info;
1031 }
1032 
1033 
1034 CRef<CID2S_Seq_annot_Info> CSNPClient::x_GetGraphInfo(const string& name, const objects::CSeq_id_Handle& id)
1035 {
1037  annot_info->SetName(name);
1038  annot_info->SetGraph();
1039  annot_info->SetSeq_loc().SetWhole_seq_id().Assign(*id.GetSeqId());
1040  return annot_info;
1041 }
1042 
1043 
1046  EPSGOperationStatus status)
1047 {
1049  GetTiming().Register(nullptr, operation, status, start, 0);
1050 }
1051 
1052 
User-defined methods of the data storage class.
CID2S_Seq_annot_Info –.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
EPSGS_Type GetRequestType(void) const
@ ePSGS_BlobBySatSatKeyRequest
TRequest & GetRequest(void)
static CPubseqGatewayApp * GetInstance(void)
Int4 GetSubSat(void) const
Definition: snp_client.cpp:290
void SetPrimaryTrackFeat()
Definition: snp_client.cpp:387
bool IsValidSatKey(void) const
Definition: snp_client.cpp:362
static bool IsValidSeqIndex(size_t seq_index)
Definition: snp_client.cpp:233
bool m_IsPrimaryTrackGraph
int GetSubSatBase(void) const
Definition: snp_client.cpp:264
bool IsValidSat(void) const
Definition: snp_client.cpp:305
size_t GetFilterIndex(void) const
string ToString(void) const
Get string representation of blob id.
Definition: snp_client.cpp:403
CSNPBlobId(const CTempString &str)
Definition: snp_client.cpp:170
static bool IsValidNAIndex(size_t index)
Definition: snp_client.cpp:221
static bool IsValidNAVersion(size_t version)
Definition: snp_client.cpp:227
void FromString(CTempString str)
Definition: snp_client.cpp:469
Int4 GetSat(void) const
Definition: snp_client.cpp:283
void SetSatNA(CTempString acc)
Definition: snp_client.cpp:344
void SetSeqAndFilterIndex(size_t seq_index, size_t filter_index)
Definition: snp_client.cpp:352
size_t GetNAVersion(void) const
static pair< size_t, size_t > ParseNA(CTempString acc)
Definition: snp_client.cpp:311
void SetNAIndex(size_t na_index)
Definition: snp_client.cpp:245
static bool IsValidNA(pair< size_t, size_t > na)
string m_Accession
Int4 GetSatKey(void) const
Definition: snp_client.cpp:297
void SetNAVersion(size_t na_version)
Definition: snp_client.cpp:270
size_t GetNAIndex(void) const
bool FromSatString(CTempString str)
Definition: snp_client.cpp:418
bool IsPrimaryTrackGraph() const
int GetSatBase(void) const
Definition: snp_client.cpp:258
bool IsValidSubSat(void) const
Definition: snp_client.cpp:252
~CSNPBlobId(void)
Definition: snp_client.cpp:216
bool IsPrimaryTrack() const
CSeq_id_Handle GetSeqId(void) const
Definition: snp_client.cpp:369
static bool IsValidFilterIndex(size_t filter_index)
Definition: snp_client.cpp:239
bool IsSatId(void) const
Definition: snp_client.cpp:277
string GetAccession(void) const
Definition: snp_client.cpp:376
string GetSatNA(void) const
Definition: snp_client.cpp:335
size_t GetSeqIndex(void) const
void SetPrimaryTrackGraph()
Definition: snp_client.cpp:395
CSeq_id_Handle m_SeqId
SSNPProcessor_Config m_Config
Definition: snp_client.hpp:337
bool CanProcessRequest(CPSGS_Request &request, TProcessorPriority priority) const
Definition: snp_client.cpp:875
SSNPData GetChunk(const string &id2info, int chunk_id)
void x_RegisterTiming(psg_time_point_t start, EPSGOperation operation, EPSGOperationStatus status)
~CSNPClient(void)
Definition: snp_client.cpp:763
bool IsValidSeqId(const objects::CSeq_id_Handle &idh) const
friend class CSNPFileInfo
Definition: snp_client.hpp:326
vector< SSNPData > GetAnnotInfo(const objects::CSeq_id_Handle &id, const string &name, objects::CSeq_id::ESNPScaleLimit scale_limit)
Definition: snp_client.cpp:905
CRef< objects::CSnpPtisClient > m_PTISClient
Definition: snp_client.hpp:339
CRef< objects::CID2S_Seq_annot_Info > x_GetGraphInfo(const string &name, const objects::CSeq_id_Handle &id)
CSNPClient(const SSNPProcessor_Config &config)
Definition: snp_client.cpp:752
CRef< CSNPSeqInfo > GetSeqInfo(const CSNPBlobId &blob_id)
Definition: snp_client.cpp:842
CRef< CSNPFileInfo > GetFileInfo(const string &acc)
Definition: snp_client.cpp:768
vector< string > WhatNACanProcess(SPSGS_AnnotRequest &annot_request, TProcessorPriority priority=0) const
Definition: snp_client.cpp:848
TSNPDbCache m_SNPDbCache
Definition: snp_client.hpp:340
shared_ptr< objects::CVDBMgr > m_Mgr
Definition: snp_client.hpp:338
bool HaveValidSeq_id(const SPSGS_AnnotRequest &request) const
Definition: snp_client.cpp:683
SSNPData GetBlobByBlobId(const string &blob_id)
CRef< objects::CID2S_Seq_annot_Info > x_GetFeatInfo(const string &name, const objects::CSeq_id_Handle &id)
pair< CRef< CID2S_Split_Info >, TSplitVersion > GetSplitInfoAndVersion(const string &base_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:2921
void SetTrack(const CSNPDbTrackIterator &track)
Definition: snpread.cpp:773
CSNPDb_Impl & GetDb(void) const
Definition: snpread.hpp:606
const CSeq_id_Handle & GetSeqIdHandle(void) const
Definition: snpread.hpp:503
CRef< CSeq_entry > GetEntry(const string &base_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:2866
CRef< CID2S_Chunk > GetChunkForVersion(const string &base_name, TChunkId chunk_id, TSplitVersion split_version) const
Definition: snpread.cpp:2994
size_t GetVDBSeqIndex(void) const
Definition: snpread.hpp:533
const string & GetDbPath(void) const
Definition: snpread.hpp:233
CRef< CSNPSeqInfo > GetSeqInfo(const CSeq_id_Handle &seq_id)
Definition: snp_client.cpp:642
const string & GetBaseAnnotName(void) const
void x_Initialize(CSNPDataLoader_Impl &impl, const string &file_name)
CSNPFileInfo(CSNPDataLoader_Impl &impl, const string &file_name)
string GetSNPAnnotName(size_t filter_index) const
Definition: snp_client.cpp:636
CSNPSeqInfo(CSNPFileInfo *file, const CSNPDbSeqIterator &it)
Definition: snp_client.cpp:500
bool m_IsPrimaryTrackGraph
CSNPDbSeqIterator GetSeqIterator(void) const
Definition: snp_client.cpp:540
size_t m_FilterIndex
void SetFromBlobId(const CSNPBlobId &blob_id)
Definition: snp_client.cpp:532
CSNPFileInfo * m_File
CSeq_id_Handle m_SeqId
void SetFilterIndex(size_t filter_index)
Definition: snp_client.cpp:523
void LoadBlob(SSNPData &data, bool split_enabled)
Definition: snp_client.cpp:568
string GetAnnotName(void) const
Definition: snp_client.cpp:556
CRef< CSNPBlobId > GetBlobId(void) const
Definition: snp_client.cpp:513
void LoadChunk(SSNPData &data, int chunk_id)
Definition: snp_client.cpp:595
static string CombineWithZoomLevel(const string &acc, int zoom_level)
Combine accession string and zoom level into a string with separator.
Definition: Seq_annot.cpp:254
static CRef< CSnpPtisClient > CreateClient()
Definition: snpptis.cpp:114
@ eProtectedDb
DB is protected.
Definition: exception.hpp:98
@ eNotFoundDb
DB main file not found.
Definition: exception.hpp:92
virtual TErrCode GetErrCode(void) const
Definition: sraread.cpp:163
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CRef< CSlot > GetSlot(const string &acc_or_path)
Definition: vdbcache.cpp:187
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
string
Definition: cgiapp.hpp:687
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void Reset(void)
Reset the handle (remove seq-id reference)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
bool IsAccVer(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
ESNPScaleLimit
SNP annotation scale limits.
Definition: Seq_id.hpp:847
@ fAcc_nuc
Definition: Seq_id.hpp:251
@ eSNPScaleLimit_Default
Definition: Seq_id.hpp:848
void Swap(TThisType &ref)
Swaps the pointer with another reference.
Definition: ncbiobj.hpp:754
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NPOS
Definition: ncbistr.hpp:133
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
CGuard< CRWLock, SSimpleWriteLock< CRWLock > > TWriteLockGuard
Definition: ncbimtx.hpp:934
operation
Bit operations.
Definition: bmconst.h:191
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
FILE * file
static MDB_envinfo info
Definition: mdb_load.c:37
static int version
Definition: mdb_load.c:29
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
#define PSG_ERROR(message)
#define PSG_INFO(message)
int TProcessorPriority
psg_clock_t::time_point psg_time_point_t
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
static CNamedPipeClient * client
USING_SCOPE(objects)
static const char kFileEnd[]
Definition: snp_client.cpp:92
END_NAMESPACE(wgs)
BEGIN_LOCAL_NAMESPACE
Definition: snp_client.cpp:54
static size_t s_ExtractFilterIndex(string &s)
Definition: snp_client.cpp:96
const int kFilterIndexCount
Definition: snp_client.cpp:73
Container::value_type::TObjectType & s_AddObject(Container &container)
Definition: snp_client.cpp:129
const int kSNPSatPrimary
Definition: snp_client.cpp:68
END_LOCAL_NAMESPACE
Definition: snp_client.cpp:162
const int kSNPSubSatGraph
Definition: snp_client.cpp:69
const int kFilterIndexMaxLength
Definition: snp_client.cpp:74
const int kNAVersionMin
Definition: snp_client.cpp:70
const int kSeqIndexCount
Definition: snp_client.cpp:72
END_NCBI_NAMESPACE
const int kSNPSatBase
Definition: snp_client.cpp:67
static const char kFilterPrefixChar
Definition: snp_client.cpp:93
BEGIN_NCBI_NAMESPACE
Definition: snp_client.cpp:47
const int kNAVersionMax
Definition: snp_client.cpp:71
const unsigned int kRefSeqAccFlags
Definition: snp_client.cpp:704
BEGIN_NAMESPACE(psg)
static string s_GetAccVer(const CSeq_id_Handle &id)
Definition: snp_client.cpp:77
static string s_AddFilterIndex(const string &s, size_t filter_index)
Definition: snp_client.cpp:120
vector< string > m_Names
vector< string > GetNotProcessedName(TProcessorPriority priority)
vector< string > m_SeqIds
string GetId(void) const
SParsedId2Info(const string &str)
Definition: snp_client.cpp:142
unique_ptr< CSNPBlobId > blob_id
Definition: snp_client.cpp:139
int m_SplitVersion
Definition: snp_client.hpp:89
objects::CSeq_id::ESNPScaleLimit m_SNPScaleLimit
Definition: snp_client.hpp:73
#define _ASSERT
EPSGOperationStatus
Definition: timing.hpp:60
@ eOpStatusFound
Definition: timing.hpp:61
@ eOpStatusNotFound
Definition: timing.hpp:62
EPSGOperation
Definition: timing.hpp:65
@ eSNP_PTISLookup
Definition: timing.hpp:89
@ eVDBOpen
Definition: timing.hpp:87
Modified on Thu Apr 25 08:17:16 2024 by modify_doxy.py rev. 669887