NCBI C++ ToolKit
blast_test_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_test_util.hpp 89002 2020-02-11 15:01:11Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_test_util.hpp
31  * Utilities to develop and debug unit tests for BLAST
32  */
33 
34 #ifndef _BLAST_TEST_UTIL_HPP
35 #define _BLAST_TEST_UTIL_HPP
36 
37 #include <string>
38 #include <exception>
39 #include <assert.h>
40 
41 #include <corelib/ncbistd.hpp>
42 #include <serial/serial.hpp>
43 #include <serial/objostr.hpp>
44 #include <util/random_gen.hpp>
45 #include <util/format_guess.hpp>
46 
47 #include <serial/serial.hpp>
48 
50 
51 #ifndef ASSERT
52 #define ASSERT assert
53 #endif
54 
55 // forward declarations
56 namespace ncbi {
57  namespace objects {
58  class CSeq_id;
59  class CSeq_align_set;
60  class CSeqVector;
61  class CScope;
62  class CObjectManager;
63  }
64  namespace blast {
65  struct SSeqLoc;
66  }
67 }
68 
69 namespace TestUtil {
70 
71 // Random integer generator for use with std::generate
72 #if defined(NCBI_COMPILER_ICC) || defined(NCBI_OS_IRIX) \
73  || defined(NCBI_COMPILER_ANY_CLANG)
74 template <int lowest_value = 0, int highest_value = INT_MAX>
75 #else
76 template <int lowest_value = 0, int highest_value = ncbi::CRandom::GetMax()>
77 #endif
78 struct CRandomIntGen {
79  CRandomIntGen() : m_Gen(::time(0)) {}
80  int operator()() {
81  return m_Gen.GetRand(lowest_value, highest_value);
82  }
83 private:
84  ncbi::CRandom m_Gen;
85 };
86 
87 ncbi::objects::CSeq_id* GenerateRandomSeqid_Gi();
88 
89 template <class T>
90 ncbi::CRef<T> ReadObject(const std::string& filename) {
91  ncbi::CNcbiIfstream in(filename.c_str());
92  if ( !in ) {
93  throw std::runtime_error("Failed to open " + filename);
94  }
95  ncbi::CRef<T> retval(new T);
96 
97  switch (ncbi::CFormatGuess().Format(in)) {
98  case ncbi::CFormatGuess::eTextASN:
99  in >> ncbi::MSerial_AsnText >> *retval;
100  break;
101  case ncbi::CFormatGuess::eBinaryASN:
102  in >> ncbi::MSerial_AsnBinary >> *retval;
103  break;
104  case ncbi::CFormatGuess::eXml:
105  in >> ncbi::MSerial_Xml >> *retval;
106  break;
107  default:
108  throw std::runtime_error("Unsupported format");
109  }
110  return retval;
111 }
112 
113 /// Convenience template function to print ASN.1 objects to a new file
114 template <class T>
115 void PrintTextAsn1Object(std::string filename, T* obj) {
116  std::ofstream out(filename.c_str());
117  if ( !out )
118  throw std::runtime_error("Could not open " + filename);
119  out << ncbi::MSerial_AsnText << *obj;
120 }
121 
122 /** Converts bl2seq and blast style seq-align-sets to the seq-align-set format
123  * that the new formatter understands (same flat format as C toolkit
124  * seq-aligns) */
125 ncbi::CRef<ncbi::objects::CSeq_align_set>
126 FlattenSeqAlignSet(const ncbi::objects::CSeq_align_set& sset);
127 
128 /// Assumes that the sas argument is a bl2seq and blast style seq-align-set
129 void PrintFormattedSeqAlign(std::ostream& out,
130  const ncbi::objects::CSeq_align_set* sas,
131  ncbi::objects::CScope& scope);
132 
133 /// Endianness independent hash function.
134 ///
135 /// This function computes a hash value for an array of any primitive
136 /// type. The hash assumes the data is the array is in "host" order
137 /// with respect to endianness and should produce the same value on
138 /// any platform for the same numerical values of the array
139 /// elements.<P>
140 ///
141 /// The algorithm attempts to be robust against changes in values in
142 /// the array, the length of the array, zeroes appended to the array),
143 /// and will not normally be fooled by naturally occurring patterns in
144 /// the buffer. 9However, it is not intended to be secure against
145 /// deliberate attempts to produce a collision).<P>
146 ///
147 /// The size of an element of the array must be uniform and is
148 /// specified as an argument to the function. It must exactly divide
149 /// the byte length of the array. If the size element is specified as
150 /// 1, no swapping will be done. This can be used to hash a string.
151 ///
152 /// @param buffer
153 /// Points to the beginning of the array.
154 /// @param byte_length
155 /// The length of the array in bytes.
156 /// @param swap_size
157 /// The size of one array element (specify 1 to disable swapping).
158 /// @param hash_seed.
159 /// The starting value of the hash.
161  Uint4 byte_length,
162  Uint4 swap_size = 1,
163  Uint4 hash_seed = 1);
164 
165 /** Class which registers the BLAST database and Genbank data loaders as a
166  * non-default data loaders with the object manager upon construction.
167  * Designed so that the scopes created by this object are configured properly
168  * to obtain the sequences in the expected priorities in the BLAST code.
169  */
170 class CBlastOM
171 {
172 public:
173  enum ELocation {
175  eLocal
176  };
177 
178  typedef ncbi::CBlastDbDataLoader::EDbType EDbType;
179 
181 
182  /// Create a new scope with the default set to the BLAST database data
183  /// loader for the BLAST database specified in the constructor (if found),
184  /// then set to the Genbank data loader
185  ncbi::CRef<ncbi::objects::CScope> NewScope();
186 
187  /// Removes the BLAST database data loader from the object manager.
189 
190 private:
191  ncbi::CRef<ncbi::objects::CObjectManager> m_ObjMgr;
194 
196  EDbType dbtype,
198 
200 };
201 
202 }
203 
204 #endif // _BLAST_TEST_UTIL_HPP
Data loader implementation that uses the blast databases.
CObjectManager –.
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
Class which registers the BLAST database and Genbank data loaders as a non-default data loaders with ...
void x_InitBlastDatabaseDataLoader(const std::string &dbname, EDbType dbtype, ELocation location)
CBlastOM(const std::string &dbname, EDbType db_type, ELocation location=eLocal)
ncbi::CRef< ncbi::objects::CObjectManager > m_ObjMgr
ncbi::CBlastDbDataLoader::EDbType EDbType
std::string m_GbLoaderName
std::string m_BlastDbLoaderName
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
ncbi::CRef< ncbi::objects::CScope > NewScope()
Create a new scope with the default set to the BLAST database data loader for the BLAST database spec...
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define T(s)
Definition: common.h:230
std::ofstream out("events_result.xml")
main entry point for tests
static const char location[]
Definition: config.c:97
string
Definition: cgiapp.hpp:687
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_Xml
Definition: serialbase.hpp:698
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
void PrintFormattedSeqAlign(ostream &out, const CSeq_align_set *sas, CScope &scope)
objects::CSeq_id * GenerateRandomSeqid_Gi()
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc) THROWS((ncbi voi PrintTextAsn1Object)(std::string filename, T *obj)
Convenience template function to print ASN.1 objects to a new file.
CRef< CSeq_align_set > FlattenSeqAlignSet(const CSeq_align_set &sset)
ncbi::CRef< T > ReadObject(const std::string &filename)
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
Magic spell ;-) needed for some weird compilers... very empiric.
std::istream & in(std::istream &in_, double &x_)
Format
Definition: njn_ioutil.hpp:52
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
Modified on Sat Apr 13 11:48:23 2024 by modify_doxy.py rev. 669887