NCBI C++ ToolKit
seqdbgimask.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBGIMASK_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBGIMASK_HPP
3 
4 /* $Id: seqdbgimask.hpp 94741 2021-09-07 12:51:47Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Ning Ma
30  *
31  */
32 
33 /// @file seqdbgimask.hpp
34 /// Defines gi-based mask data files
35 ///
36 /// Defines classes:
37 /// CSeqDBGiMask
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
44 
46 
47 /// Import definitions from the objects namespace.
49 
50 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
51  (!defined(NCBI_COMPILER_MIPSPRO)) )
52 
53 /// CSeqDBGiMask class.
54 ///
55 /// This code supports Gi-based database masks
56 
57 class CSeqDBGiMask : public CObject {
58 public:
59  /// Constructor.
60  ///
61  /// @param atlas
62  /// The atlas layer managing memory lease [in]
63  /// @param mask_name
64  /// The names of the mask files. [in]
65  CSeqDBGiMask(CSeqDBAtlas & atlas,
66  const vector <string> & mask_name);
67 
68  /// Destructor.
72  for (unsigned int i=0; i<m_DataFile.size(); i++) {
73  m_DataLease[i]->Clear();
74  delete m_DataFile[i];
75  delete m_DataLease[i];
76  }
77  };
78 
79  /// Get the mask description for algo id
80  /// @param algo_id The chosen algo id [in]
81  /// @param locked
82  /// The lock holder object for this thread (or NULL). [in]
83  /// @return The description of the masking algo.
84  const string & GetDesc(int algo_id, CSeqDBLockHold & locked);
85 
86  /// Get the mask data for GI
87  /// @param algo_id The chosen algo id [in]
88  /// @param gi The chosen gi [in]
89  /// @param ranges The masks for sequence with gi [out]
90  /// @param locked
91  /// The lock holder object for this thread (or NULL). [in]
92  void GetMaskData(int algo_id,
93  TGi gi,
95  CSeqDBLockHold &locked);
96 
97  /// Get the available mask algorithsm ids
98  /// @param algo The avaiable algo ids [out]
99  void GetAvailableMaskAlgorithms(vector <int> & algo) const {
100  algo.clear();
101  for (unsigned int i=0; i<m_MaskNames.size(); ++i) {
102  algo.push_back(i);
103  }
104  return;
105  }
106 
107  /// Get the mask algorithsm id for a string id
108  /// @param algo_name The algorithm string [in]
109  /// @return the algorithm id corresponding to the string
110  int GetAlgorithmId(const string & algo_name) const {
111  for (unsigned int i=0; i<m_MaskNames.size(); ++i) {
112  if (m_MaskNames[i] == algo_name) return i;
113  }
114  CNcbiOstrstream oss;
115  oss << "Filtering algorithm " << algo_name
116  << " does not exist." << endl;
118  NCBI_THROW(CSeqDBException, eArgErr,
120  }
121 
122  /// Get the mask algorithsm name for a numeric id
123  /// @param algo_id The algorithm id [in]
124  /// @return the algorithm name
125  const string & GetAlgorithmName(int algo_id) const {
126  x_VerifyAlgorithmId(algo_id);
127  return m_MaskNames[algo_id];
128  }
129 
130  /// Get the names of available mask algorithms as string
131  string GetAvailableAlgorithmNames() const {
132  CNcbiOstrstream retval;
133  retval << endl
134  << "Available filtering algorithm(s):"
135  << endl << endl;
136  retval << setw(14) << left << "Algorithm ID"
137  << setw(40) << left << "Algorithm name" << endl;
138  for (unsigned int id=0; id < m_MaskNames.size(); ++id) {
139  retval << " " << setw(10) << left << id
140  << setw(40) << left << m_MaskNames[id] << endl;
141  }
142  return CNcbiOstrstreamToString(retval);
143  }
144 
145 private:
146  /// Sgring format used by gi mask files
147  static const CBlastDbBlob::EStringFormat
149 
150  /// File offset type.
152 
153  /// Prevent copy construction.
155 
156  /// Prevent copy assignment.
158 
159  /// Open file for a chosen algo_id
160  /// @param algo_id The chosen algo_id [in]
161  /// @param locked The lock holder object for this thread. [in]
162  void x_Open(Int4 algo_id);
163 
164  /// Open files and read field data from the atlas.
165  /// @param locked The lock holder object for this thread. [in]
166  void x_ReadFields(void);
167 
168  /// Verify the algorithm exists. If not, raise an exception
169  void x_VerifyAlgorithmId(int algo_id) const {
170  if (algo_id < 0 || algo_id >= (int)m_MaskNames.size()) {
171  CNcbiOstrstream oss;
172  oss << "Filtering algorithm ID " << algo_id
173  << " does not exist." << endl;
175  NCBI_THROW(CSeqDBException, eArgErr,
177  }
178  }
179 
180  /// Get a range of the index or data file.
181  ///
182  /// A range of file is acquired and returned in the provided blob.
183  ///
184  /// @param begin The start offset for this range of data. [in]
185  /// @param end The end (post) offset for this range of data. [in]
186  /// @param select_file Whether to use the index or data file. [in]
187  /// @param lifetime Should the blob maintain the memory mapping? [in]
188  /// @param blob The data will be returned here. [out]
189  /// @param locked The lock holder object for this thread. [in]
190  static void s_GetFileRange(TIndx begin,
191  TIndx end,
193  CSeqDBFileMemMap & lease,
194  CBlastDbBlob & blob);
195 
196 
197  /// Binary search for value associated with a key
198  ///
199  /// @param keys The (sorted) key array [in]
200  /// @param n Number of keys [in]
201  /// @param key The key to search for [in]
202  /// @param idx The index to the key array where key is found. [out]
203  /// @return TRUE if the key is found
204  template<class T> static bool s_BinarySearch(const T *keys,
205  const int n,
206  const T key,
207  int &idx)
208  {
209  int lower(0), upper(n - 1);
210 
211  if (key > keys[upper] || key < keys[lower]) {
212  // out of range
213  idx = -1;
214  return false;
215  }
216 
217  if (key == keys[upper]) {
218  idx = upper;
219  return true;
220  }
221 
222  if (key == keys[lower]) {
223  idx = lower;
224  return true;
225  }
226 
227  idx = (lower + upper) / 2;
228 
229  while (idx != lower) {
230  if (key > keys[idx]) {
231  lower = idx;
232  idx = (lower + upper) / 2;
233  }
234  else if (key < keys[idx]) {
235  upper = idx;
236  idx = (lower + upper) / 2;
237  }
238  else {
239  // value found
240  return true;
241  }
242  }
243  // value not found
244  return false;
245  }
246 
247  /// Reference to the atlas.
249 
250  /// The set of gi masks found in alias description
251  const vector<string> m_MaskNames;
252 
253  /// Index file lease.
255 
256  /// Offset file lease.
258 
259  /// The current used mask id
261 
262  /// Index file.
264 
265  /// Offset file.
267 
268  /// Number of data volumes
270 
271  /// Data file.
272  vector<CSeqDBRawFile *> m_DataFile;
273 
274  /// Data file lease.
275  vector<CSeqDBFileMemMap *> m_DataLease;
276 
277  /// GI size
279 
280  /// Offset size
282 
283  /// Page size
285 
286  /// Number of Gi indices
288 
289  /// Number of Gis
291 
292  /// Mapped Gi index
293  const Uint4 *m_GiIndex;
294 
295  /// Start offset (in the index file) of the offset array.
297 
298  /// The description about the masking algo
299  string m_Desc;
300 
301  /// The create date of the GI mask
302  string m_Date;
303 
304 };
305 
306 #endif
307 
309 
310 #endif // OBJTOOLS_READERS_SEQDB__SEQDBCOL_HPP
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
EStringFormat
String termination style.
Definition: seqdbblob.hpp:233
@ eSizeVar
Write string length as VarInt, then string data.
Definition: seqdbblob.hpp:237
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObject –.
Definition: ncbiobj.hpp:180
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:298
CNcbiStreamoff TIndx
The type used for file offsets.
Definition: seqdbatlas.hpp:302
CSeqDBException.
Definition: seqdbcommon.hpp:73
void Clear()
Clears the memory mapobject.
Definition: seqdbatlas.hpp:734
CSeqDBGiMask class.
Definition: seqdbgimask.hpp:57
void GetMaskData(int algo_id, TGi gi, CSeqDB::TSequenceRanges &ranges, CSeqDBLockHold &locked)
Get the mask data for GI.
Definition: seqdbgimask.cpp:66
CSeqDBGiMask & operator=(CSeqDBGiMask &)
Prevent copy assignment.
CSeqDBRawFile m_OffsetFile
Offset file.
int GetAlgorithmId(const string &algo_name) const
Get the mask algorithsm id for a string id.
const vector< string > m_MaskNames
The set of gi masks found in alias description.
Int4 m_AlgoId
The current used mask id.
void x_ReadFields(void)
Open files and read field data from the atlas.
string m_Desc
The description about the masking algo.
const string & GetAlgorithmName(int algo_id) const
Get the mask algorithsm name for a numeric id.
Int4 m_NumIndex
Number of Gi indices.
Int4 m_OffsetSize
Offset size.
static void s_GetFileRange(TIndx begin, TIndx end, CSeqDBRawFile &file, CSeqDBFileMemMap &lease, CBlastDbBlob &blob)
Get a range of the index or data file.
CSeqDBAtlas::TIndx TIndx
File offset type.
CSeqDBAtlas & m_Atlas
Reference to the atlas.
Int4 m_GiSize
GI size.
vector< CSeqDBRawFile * > m_DataFile
Data file.
Int4 m_PageSize
Page size.
const Uint4 * m_GiIndex
Mapped Gi index.
void x_VerifyAlgorithmId(int algo_id) const
Verify the algorithm exists. If not, raise an exception.
static const CBlastDbBlob::EStringFormat kStringFmt
Sgring format used by gi mask files.
void x_Open(Int4 algo_id)
Open file for a chosen algo_id.
CSeqDBRawFile m_IndexFile
Index file.
~CSeqDBGiMask()
Destructor.
Definition: seqdbgimask.hpp:69
string m_Date
The create date of the GI mask.
CSeqDBGiMask(CSeqDBAtlas &atlas, const vector< string > &mask_name)
Constructor.
Definition: seqdbgimask.cpp:44
CSeqDBFileMemMap m_IndexLease
Index file lease.
const string & GetDesc(int algo_id, CSeqDBLockHold &locked)
Get the mask description for algo id.
Definition: seqdbgimask.cpp:57
vector< CSeqDBFileMemMap * > m_DataLease
Data file lease.
string GetAvailableAlgorithmNames() const
Get the names of available mask algorithms as string.
Int4 m_NumGi
Number of Gis.
void GetAvailableMaskAlgorithms(vector< int > &algo) const
Get the available mask algorithsm ids.
Definition: seqdbgimask.hpp:99
CSeqDBGiMask(const CSeqDBGiMask &)
Prevent copy construction.
static bool s_BinarySearch(const T *keys, const int n, const T key, int &idx)
Binary search for value associated with a key.
Int4 m_IndexStart
Start offset (in the index file) of the offset array.
CSeqDBFileMemMap m_OffsetLease
Offset file lease.
Int4 m_NumVols
Number of data volumes.
CSeqDBLockHold.
Definition: seqdbatlas.hpp:167
Raw file.
Definition: seqdbfile.hpp:64
#define T(s)
Definition: common.h:230
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
FILE * file
int i
yy_size_t n
const struct ncbi::grid::netcache::search::fields::KEY key
ESERV_Algo algo
Defines BLAST database access classes.
The SeqDB memory management layer.
CSeqDBAtlas::TIndx TIndx
Index file.
Definition: seqdbfile.cpp:69
File access objects for CSeqDB.
USING_SCOPE(objects)
Import definitions from the objects namespace.
List of sequence offset ranges.
Definition: seqdb.hpp:236
Modified on Wed Nov 29 02:24:45 2023 by modify_doxy.py rev. 669887