NCBI C++ ToolKit
bioseq_info.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bioseq_info.cpp 84935 2018-12-28 17:02:51Z satskyse $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Sergey Satskiy
27  *
28  * File Description:
29  *
30  * Synchronous retrieving data from bioseq. tables
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
36 
37 #define CANONICAL_SEQ_ID_CONSISTENCY CassConsistency::CASS_CONSISTENCY_LOCAL_QUORUM
38 #define BIOSEQ_INFO_CONSISTENCY CassConsistency::CASS_CONSISTENCY_LOCAL_QUORUM
39 
41 
42 
43 // NB: the field numbers must be in sync with the SQL statement, see
44 // FetchCanonicalSeqId(...)
45 static void
46 s_GetCSIValues(shared_ptr<CCassQuery> & query,
47  string & accession,
48  int16_t & version,
49  int16_t & seq_id_type)
50 {
51  accession = query->FieldGetStrValue(0);
52  version = query->FieldGetInt16Value(1);
53  seq_id_type = query->FieldGetInt16Value(2);
54 }
55 
56 
58 FetchCanonicalSeqId(shared_ptr<CCassConnection> conn,
59  const string & keyspace,
60  const string & sec_seq_id,
61  int16_t sec_seq_id_type,
62  bool sec_seq_id_type_provided,
63  string & accession,
64  int16_t & version,
65  int16_t & seq_id_type)
66 {
67  static const string s_Select = "SELECT accession, version, seq_id_type FROM ";
68  static const string s_Where_2 = ".SI2CSI WHERE sec_seq_id = ? AND sec_seq_id_type = ?";
69  static const string s_Where_1 = ".SI2CSI WHERE sec_seq_id = ?";
70  shared_ptr<CCassQuery> query = conn->NewQuery();
71 
72  // NB: the sequence of the retrieved fields must be in sync with
73  // s_GetCSIValues(...) function.
74  string sql = s_Select;
75  sql.append(keyspace);
76  if (sec_seq_id_type_provided) {
77  sql.append(s_Where_2);
78  query->SetSQL(sql, 2);
79  query->BindStr(0, sec_seq_id);
80  query->BindInt16(1, sec_seq_id_type);
81  } else {
82  sql.append(s_Where_1);
83  query->SetSQL(sql, 1);
84  query->BindStr(0, sec_seq_id);
85  }
86 
87  query->Query(CANONICAL_SEQ_ID_CONSISTENCY, false, false);
88 
89  if (sec_seq_id_type_provided) {
90  if (query->NextRow() == ar_dataready) {
91  s_GetCSIValues(query, accession, version, seq_id_type);
93  }
94  } else {
95  bool found = false;
96  while (query->NextRow() == ar_dataready) {
97  if (found)
99  s_GetCSIValues(query, accession, version, seq_id_type);
100  found = true;
101  }
102  if (found)
104  }
106 }
107 
108 // Select field numbers; must be in sync with the select statemens
109 static const int fnDateChanged = 0;
110 static const int fnHash = 1;
111 static const int fnIdSync = 2;
112 static const int fnLength = 3;
113 static const int fnMol = 4;
114 static const int fnSat = 5;
115 static const int fnSatKey = 6;
116 static const int fnSeqIds = 7;
117 static const int fnSeqState = 8;
118 static const int fnState = 9;
119 static const int fnTaxId = 10;
120 static const int fnSeqIdType = 11;
121 static const int fnVersion = 12;
122 
123 
124 // NB: the field numbers must be in sync with the SQL statement, see
125 // FetchBioseqInfo(...)
126 static void
127 s_GetBioseqValues(shared_ptr<CCassQuery> & query,
128  SBioseqInfo & bioseq_info)
129 {
130  bioseq_info.m_DateChanged = query->FieldGetInt64Value(fnDateChanged);
131  bioseq_info.m_Hash = query->FieldGetInt32Value(fnHash);
132  bioseq_info.m_IdSync = query->FieldGetInt64Value(fnIdSync);
133  bioseq_info.m_Length = query->FieldGetInt32Value(fnLength);
134  bioseq_info.m_Mol = query->FieldGetInt8Value(fnMol);
135  bioseq_info.m_Sat = query->FieldGetInt16Value(fnSat);
136  bioseq_info.m_SatKey = query->FieldGetInt32Value(fnSatKey);
137  query->FieldGetContainerValue(fnSeqIds,
138  inserter(bioseq_info.m_SeqIds,
139  bioseq_info.m_SeqIds.end()));
140  bioseq_info.m_SeqState = query->FieldGetInt8Value(fnSeqState);
141  bioseq_info.m_State = query->FieldGetInt8Value(fnState);
142  bioseq_info.m_TaxId =query->FieldGetInt32Value(fnTaxId);
143 }
144 
145 
147 FetchBioseqInfo(shared_ptr<CCassConnection> conn,
148  const string & keyspace,
149  bool version_provided,
150  bool seq_id_type_provided,
151  SBioseqInfo & bioseq_info)
152 {
153  // if version is not provided then the latest version has to be taken.
154  // if id_type is not provided then the number of suitable records need to
155  // be checked. It must be exactly 1.
156 
157  shared_ptr<CCassQuery> query = conn->NewQuery();
158 
159  if (version_provided && seq_id_type_provided) {
160  query->SetSQL("SELECT "
161  "date_changed, "
162  "hash, "
163  "id_sync, "
164  "length, "
165  "mol, "
166  "sat, "
167  "sat_key, "
168  "seq_ids, "
169  "seq_state, "
170  "state, "
171  "tax_id "
172  "FROM " +
173  keyspace + ".BIOSEQ_INFO WHERE "
174  "accession = ? AND version = ? AND seq_id_type = ?", 3);
175  query->BindStr(0, bioseq_info.m_Accession);
176  query->BindInt16(1, bioseq_info.m_Version);
177  query->BindInt16(2, bioseq_info.m_SeqIdType);
178  } else if (version_provided) {
179  query->SetSQL("SELECT "
180  "date_changed, "
181  "hash, "
182  "id_sync, "
183  "length, "
184  "mol, "
185  "sat, "
186  "sat_key, "
187  "seq_ids, "
188  "seq_state, "
189  "state, "
190  "tax_id, "
191  "seq_id_type "
192  "FROM " +
193  keyspace + ".BIOSEQ_INFO WHERE "
194  "accession = ? AND version = ?", 2);
195  query->BindStr(0, bioseq_info.m_Accession);
196  query->BindInt16(1, bioseq_info.m_Version);
197  } else {
198  query->SetSQL("SELECT "
199  "date_changed, "
200  "hash, "
201  "id_sync, "
202  "length, "
203  "mol, "
204  "sat, "
205  "sat_key, "
206  "seq_ids, "
207  "seq_state, "
208  "state, "
209  "tax_id, "
210  "seq_id_type, "
211  "version "
212  "FROM " +
213  keyspace + ".BIOSEQ_INFO WHERE "
214  "accession = ?", 1);
215  query->BindStr(0, bioseq_info.m_Accession);
216  }
217 
218 
219  query->Query(BIOSEQ_INFO_CONSISTENCY, false, false);
220 
221  // Case 1: all three fields are provided
222  if (version_provided && seq_id_type_provided) {
223  if (query->NextRow() == ar_dataready) {
224  s_GetBioseqValues(query, bioseq_info);
226  }
228  }
229 
230  // Case 2: accession and version are provided;
231  // check that there is exactly one record
232  if (version_provided) {
233  bool found = false;
234  int selected_seq_id_type = INT_MIN;
235  while (query->NextRow() == ar_dataready) {
236  if (!found) {
237  s_GetBioseqValues(query, bioseq_info);
238  selected_seq_id_type = query->FieldGetInt16Value(fnSeqIdType);
239  found = true;
240  continue;
241  }
243  }
244 
245  if (found) {
246  bioseq_info.m_SeqIdType = selected_seq_id_type;
248  }
250  }
251 
252 
253  // Case 3: accession and seq_id_type are provided.
254  // So the latest version has to be retrieved
255  if (seq_id_type_provided) {
256  bool found = false;
257  int selected_version = INT_MIN;
258  while (query->NextRow() == ar_dataready) {
259  int seq_id_type = query->FieldGetInt16Value(fnSeqIdType);
260  if (seq_id_type != bioseq_info.m_SeqIdType)
261  continue;
262  int version = query->FieldGetInt16Value(fnVersion);
263  if (!found || version > selected_version) {
264  s_GetBioseqValues(query, bioseq_info);
265 
266  found = true;
267  selected_version = version;
268  }
269  }
270 
271  if (found) {
272  bioseq_info.m_Version = selected_version;
274  }
276  }
277 
278  // Case 4: only accession is provided;
279  // select the latest version;
280  // check that there is exactly one seq_it_type
281  bool found = false;
282  int selected_version = INT_MIN;
283  int selected_seq_id_type = INT_MIN;
284  while (query->NextRow() == ar_dataready) {
285  if (!found) {
286  s_GetBioseqValues(query, bioseq_info);
287  selected_version = query->FieldGetInt16Value(fnVersion);
288  selected_seq_id_type = query->FieldGetInt16Value(fnSeqIdType);
289  found = true;
290  continue;
291  }
292 
293  int seq_id_type = query->FieldGetInt16Value(fnSeqIdType);
294  if (selected_seq_id_type == seq_id_type) {
295  // Take the latest version
296  int version = query->FieldGetInt16Value(fnVersion);
297  if (version > selected_version) {
298  s_GetBioseqValues(query, bioseq_info);
299  selected_version = version;
300  }
301  continue;
302  }
304  }
305 
306  if (found) {
307  bioseq_info.m_Version = selected_version;
308  bioseq_info.m_SeqIdType = selected_seq_id_type;
310  }
312 }
313 
314 
315 
#define END_IDBLOB_SCOPE
Definition: IdCassScope.hpp:40
#define BEGIN_IDBLOB_SCOPE
Definition: IdCassScope.hpp:39
@ ar_dataready
Definition: cass_driver.hpp:70
const_iterator end() const
Definition: set.hpp:136
static CS_CONNECTION * conn
Definition: ct_dynamic.c:25
static char sql[1024]
Definition: putdata.c:19
Int2 int16_t
const string version
version string
Definition: variables.hpp:66
static EIO_Status s_Select(size_t n, SSOCK_Poll polls[], const struct timeval *tv, int asis)
Definition: ncbi_socket.c:2419
#define CANONICAL_SEQ_ID_CONSISTENCY
Definition: bioseq_info.cpp:37
static const int fnSeqIdType
static const int fnSatKey
static const int fnMol
static const int fnDateChanged
#define BIOSEQ_INFO_CONSISTENCY
Definition: bioseq_info.cpp:38
static void s_GetBioseqValues(shared_ptr< CCassQuery > &query, SBioseqInfo &bioseq_info)
static const int fnHash
static const int fnIdSync
static const int fnState
CRequestStatus::ECode FetchBioseqInfo(shared_ptr< CCassConnection > conn, const string &keyspace, bool version_provided, bool seq_id_type_provided, SBioseqInfo &bioseq_info)
static BEGIN_IDBLOB_SCOPE void s_GetCSIValues(shared_ptr< CCassQuery > &query, string &accession, int16_t &version, int16_t &seq_id_type)
Definition: bioseq_info.cpp:46
static const int fnLength
static const int fnSeqState
CRequestStatus::ECode FetchCanonicalSeqId(shared_ptr< CCassConnection > conn, const string &keyspace, const string &sec_seq_id, int16_t sec_seq_id_type, bool sec_seq_id_type_provided, string &accession, int16_t &version, int16_t &seq_id_type)
Definition: bioseq_info.cpp:58
static const int fnTaxId
static const int fnVersion
static const int fnSeqIds
static const int fnSat
int8_t m_SeqState
Definition: bioseq_info.hpp:67
int32_t m_TaxId
Definition: bioseq_info.hpp:69
int16_t m_Sat
Definition: bioseq_info.hpp:64
int8_t m_Mol
Definition: bioseq_info.hpp:63
int16_t m_SeqIdType
Definition: bioseq_info.hpp:57
int16_t m_Version
Definition: bioseq_info.hpp:56
int32_t m_SatKey
Definition: bioseq_info.hpp:65
int64_t m_IdSync
Definition: bioseq_info.hpp:61
string m_Accession
Definition: bioseq_info.hpp:55
set< tuple< int16_t, string > > m_SeqIds
Definition: bioseq_info.hpp:66
int8_t m_State
Definition: bioseq_info.hpp:68
int32_t m_Hash
Definition: bioseq_info.hpp:60
int64_t m_DateChanged
Definition: bioseq_info.hpp:59
int32_t m_Length
Definition: bioseq_info.hpp:62
static string query
Modified on Wed Sep 04 15:04:27 2024 by modify_doxy.py rev. 669887