NCBI C++ ToolKit
seqidlist_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Amelia Fong
27 *
28 */
29 
30 #include <ncbi_pch.hpp>
33 
34 
37 
38 
40 {
41 public:
43 
45  int GetIds(vector<CSeqDBGiList::SSiOid> & idlist);
46 
47 private:
48  inline Uint8 x_GetUint8() { Uint8 rv= *((Uint8 *) m_Ptr); m_Ptr +=8; return rv;}
49  inline Uint4 x_GetUint4() { Uint4 rv= *((Uint4 *) m_Ptr); m_Ptr +=4; return rv;}
50  inline char x_GetChar() {char rv = *m_Ptr; m_Ptr++; return rv;}
51  inline void x_GetString(string & rv, Uint4 len) {rv.assign (m_Ptr, len); m_Ptr+= len;}
52 
53  char * m_Ptr;
54  char * m_EndPtr;
56 };
57 
58 CSeqidlistRead::CSeqidlistRead (CMemoryFile & file) : m_Ptr((char*) file.GetPtr()), m_EndPtr((char*) file.GetPtr()) {
59  if(m_Ptr == NULL) {
60  NCBI_THROW(CSeqDBException, eArgErr, "Failed to map seqidlist file ");
61  }
62 
63  char null_byte = x_GetChar();
64  if (null_byte == 0) {
65  m_info.is_v4 = false;
66  Uint8 file_size = file.GetFileSize();
68  if (m_info.file_size != file_size) {
69  NCBI_THROW(CSeqDBException, eArgErr, "Invalid seqidlist file");
70  }
71  m_EndPtr += file_size;
73  Uint4 title_length = x_GetUint4();
74  x_GetString(m_info.title, title_length);
75  char file_create_date_length = x_GetChar();
76  x_GetString(m_info.create_date, file_create_date_length);
78  if(m_info.db_vol_length != 0) {
79  char file_db_create_date_length = x_GetChar();
80  x_GetString(m_info.db_create_date, file_db_create_date_length);
81  Uint4 file_vol_names_length = x_GetUint4();
82  x_GetString(m_info.db_vol_names, file_vol_names_length);
83  }
84  }
85 }
86 
87 int CSeqidlistRead::GetIds(vector<CSeqDBGiList::SSiOid> & idlist)
88 {
89  const unsigned char byte_max = 0xFF;
90  unsigned int i = 0;
91  idlist.clear();
92  idlist.resize(m_info.num_ids);
93  for(; (m_Ptr < m_EndPtr) && (i < m_info.num_ids); i++) {
94  unsigned char id_len = (unsigned char) x_GetChar();
95  if(id_len == byte_max) {
96  Uint4 long_id_len = x_GetUint4();
97  x_GetString(idlist[i].si, long_id_len);
98  }
99  else {
100  x_GetString(idlist[i].si, id_len);
101  }
102  }
103  if(i != m_info.num_ids) {
104  NCBI_THROW(CSeqDBException, eArgErr, "Invalid total num of ids in seqidlist file");
105  }
106 
107  return i;
108 }
109 
110 
111 int CBlastSeqidlistFile::GetSeqidlist(CMemoryFile & file, vector<CSeqDBGiList::SSiOid> & idlist,
112  SBlastSeqIdListInfo & list_info)
113 {
114 
115  CSeqidlistRead list(file);
116  list.GetListInfo(list_info);
117  list.GetIds(idlist);
118 
119  return list_info.num_ids;
120 }
121 
122 int CBlastSeqidlistFile::GetSeqidlistInfo(const string & filename, SBlastSeqIdListInfo & list_info)
123 {
124  string file = SeqDB_ResolveDbPath(filename);
126  CSeqidlistRead list(in);
127  list.GetListInfo(list_info);
128  return static_cast<int>(list_info.num_ids);
129 
130 }
131 
132 void CBlastSeqidlistFile::PrintSeqidlistInfo(const string & filename, CNcbiOstream & os)
133 {
134  SBlastSeqIdListInfo list_info;
135  if (CBlastSeqidlistFile::GetSeqidlistInfo(filename, list_info) > 0) {
136  os <<"Num of Ids: " << list_info.num_ids << "\n";
137  os <<"Title: " << list_info.title << "\n";
138  os <<"Create Date: " << list_info.create_date << "\n";
139  if(list_info.db_vol_length > 0) {
140  os << "DB Info: \n";
141  os << "\t" << "Total Vol Length: " << list_info.db_vol_length << "\n";
142  os << "\t" << "DB Create Date: " << list_info.db_create_date << "\n";
143  os << "\t" << "DB Vols: ";
144  vector<string> vols;
145  NStr::Split(list_info.db_vol_names, " ", vols);
146  for(unsigned int i=0; i < vols.size(); i ++ ) {
147  os << "\n\t\t" << vols[i];
148  }
149  }
150  }
151  else {
152  os << "Seqidlist file is not in blast db version 5 format";
153  }
154  os << endl;
155 }
156 
static void PrintSeqidlistInfo(const string &filename, CNcbiOstream &os)
static int GetSeqidlist(CMemoryFile &file, vector< CSeqDBGiList::SSiOid > &idlist, SBlastSeqIdListInfo &list_info)
Get seqidlist from dbv5 seqidlist file.
static int GetSeqidlistInfo(const string &filename, SBlastSeqIdListInfo &list_info)
Get seqidlist info only from dbv5 seqidlist file.
CMemoryFile –.
Definition: ncbifile.hpp:2861
CSeqDBException.
Definition: seqdbcommon.hpp:73
SBlastSeqIdListInfo m_info
void GetListInfo(SBlastSeqIdListInfo &info)
void x_GetString(string &rv, Uint4 len)
CSeqidlistRead(CMemoryFile &file)
int GetIds(vector< CSeqDBGiList::SSiOid > &idlist)
static const char si[8][64]
Definition: des.c:146
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
FILE * file
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
std::istream & in(std::istream &in_, double &x_)
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
USING_SCOPE(objects)
Blast DB v5 seqid list info.
Modified on Fri Sep 20 14:58:06 2024 by modify_doxy.py rev. 669887