NCBI C++ ToolKit
multisource_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: multisource_util.hpp 84663 2018-11-27 18:22:00Z ucko $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kevin Bealer
27 *
28 * File Description:
29 * Utility functions and classes for multisource app.
30 */
31 
32 #ifndef OBJTOOLS_BLAST_SEQDB_WRITER___MULTISOURCE_UTIL__HPP
33 #define OBJTOOLS_BLAST_SEQDB_WRITER___MULTISOURCE_UTIL__HPP
34 
35 #include <corelib/ncbistd.hpp>
36 
37 // Blast databases
39 
40 // SeqDB interface to blast databases
42 
44 
45 /// Get all keys for a defline.
47 void GetDeflineKeys(const objects::CBlast_def_line & defline,
48  vector<string> & keys);
49 
50 /// CMultisourceException
51 ///
52 /// This exception class is thrown for errors occurring during
53 /// traceback.
54 
56 public:
57  /// Errors are classified into several types.
58  enum EErrCode {
59  /// Argument validation failed.
61  /// Failed to create the output file(s)/directory
62  eOutputFileError
63  };
64 
65  /// Get a message describing the exception.
66  virtual const char* GetErrCodeString() const override
67  {
68  switch ( GetErrCode() ) {
69  case eArg: return "eArgErr";
70  default: return CException::GetErrCodeString();
71  }
72  }
73 
74  /// Include standard NCBI exception behavior.
76 };
77 
78 /// Gi List for database construction.
79 ///
80 /// This GI list is built from the set of identifiers the user has
81 /// specified for inclusion in the resulting database. By using a
82 /// SeqDB GI list, database filtering can be done using SeqDB's
83 /// internal processing machinery.
84 
86 public:
87  /// Construct an empty GI list.
88  CInputGiList(int capacity = 1024)
89  : m_Last(ZERO_GI)
90  {
91  if (capacity > 0) {
92  m_GisOids.reserve(capacity);
93  }
94 
95  // An empty vector is always sorted, right?
96  m_CurrentOrder = eGi;
97  }
98 
99  /// Append a GI.
100  ///
101  /// This method adds a GI to the list.
102  ///
103  /// @param gi A sequence identifier.
104  void AppendGi(TGi gi, int oid = -1)
105  {
106  if (m_CurrentOrder == eGi) {
107  if (m_Last > gi) {
108  m_CurrentOrder = eNone;
109  } else if (m_Last == gi) {
110  return;
111  }
112  }
113 
114  m_GisOids.push_back(SGiOid(gi, oid));
115  m_Last = gi;
116  }
117 
118  /// Append a Seq-id
119  ///
120  /// This method adds a Seq-id to the list.
121  ///
122  /// @param seqid A sequence identifier.
123  void AppendSi(const string &si, int oid = -1)
124  {
125  // This could verify ordering, but since ordering for GIs is
126  // common, and ordering for Seq-ids is rare, for now I'll just
127  // assume that Seq-ids are out-of order. This also fits the
128  // basic practice of not making tiny optimizations in code
129  // paths that are slow.
130 
131  m_CurrentOrder = eNone;
132  string str_id = SeqDB_SimplifyAccession(si);
133  if (str_id != "") m_SisOids.push_back(SSiOid(str_id, oid));
134  }
135 
136 private:
138 };
139 
140 
143  vector<string> & lines);
144 
146 public:
147  CSequenceReturn(CSeqDB & seqdb, const char * buffer)
148  : m_SeqDB(seqdb), m_Buffer(buffer)
149  {
150  }
151 
153  {
154  m_SeqDB.RetSequence(& m_Buffer);
155  }
156 
157 private:
159 
161  const char * m_Buffer;
162 };
163 
164 /// Maps Seq-id key to bitset.
166 
167 /// Map from Seq-id string to set of leaf taxids.
169 
170 /// Map from linkout bit number to list of ids.
172 
174 void MapToLMBits(const TLinkoutMap & gilist, TIdToBits & gi2links);
175 
177 bool CheckAccession(const string & acc,
178  TGi & gi,
179  CRef<objects::CSeq_id> & seqid,
180  bool & specific);
181 
183 void GetSeqIdKey(const objects::CSeq_id & id, string & key);
184 
186 string AccessionToKey(const string & acc);
187 
189 
190 #endif // OBJTOOLS_BLAST_SEQDB_WRITER___MULTISOURCE_UTIL__HPP
191 
User-defined methods of the data storage class.
Gi List for database construction.
void AppendSi(const string &si, int oid=-1)
Append a Seq-id.
CInputGiList(int capacity=1024)
Construct an empty GI list.
void AppendGi(TGi gi, int oid=-1)
Append a GI.
CMultisourceException.
@ eArg
Argument validation failed.
virtual const char * GetErrCodeString() const override
Get a message describing the exception.
NCBI_EXCEPTION_DEFAULT(CMultisourceException, CException)
Include standard NCBI exception behavior.
CSeqDBGiList.
CSeqDB.
Definition: seqdb.hpp:161
CSequenceReturn(CSeqDB &seqdb, const char *buffer)
CSequenceReturn & operator=(CSequenceReturn &)
const char * m_Buffer
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char si[8][64]
Definition: des.c:146
static FILE * f
Definition: readconf.c:23
#define ZERO_GI
Definition: ncbimisc.hpp:1088
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NCBI_XOBJWRITE_EXPORT
Definition: ncbi_export.h:1347
string AccessionToKey(const string &acc)
bool CheckAccession(const string &acc, TGi &gi, CRef< objects::CSeq_id > &seqid, bool &specific)
void GetDeflineKeys(const objects::CBlast_def_line &defline, vector< string > &keys)
Get all keys for a defline.
void MapToLMBits(const TLinkoutMap &gilist, TIdToBits &gi2links)
Read a set of GI lists, each a vector of strings, and combine the bits into the resulting linkbits ma...
map< string, CBlast_def_line::TTaxIds > TIdToLeafs
Map from Seq-id string to set of leaf taxids.
map< string, int > TIdToBits
Maps Seq-id key to bitset.
void ReadTextFile(CNcbiIstream &f, vector< string > &lines)
map< int, vector< string > > TLinkoutMap
Map from linkout bit number to list of ids.
void GetSeqIdKey(const objects::CSeq_id &id, string &key)
const struct ncbi::grid::netcache::search::fields::KEY key
static uint8_t * buffer
Definition: pcre2test.c:1016
Defines BLAST database access classes.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
Structure that holds GI,OID pairs.
Structure that holds Seq-id,OID pairs.
@ eGi
GI Index.
Modified on Fri Sep 20 14:57:54 2024 by modify_doxy.py rev. 669887