NCBI C++ ToolKit
blast_test_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_test_util.hpp 100942 2023-10-03 17:36:50Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_test_util.hpp
31  * Utilities to develop and debug unit tests for BLAST
32  */
33 
34 #ifndef _BLAST_TEST_UTIL_HPP
35 #define _BLAST_TEST_UTIL_HPP
36 
37 #include <string>
38 #include <exception>
39 #include <ctime>
40 
41 #include <corelib/ncbistd.hpp>
43 #include <serial/serial.hpp>
44 #include <serial/objostr.hpp>
45 #include <util/random_gen.hpp>
46 #include <util/format_guess.hpp>
47 
48 #include <serial/serial.hpp>
49 
50 // NewBlast includes
55 
57 
59 
60 // forward declarations
61 namespace ncbi {
62  namespace objects {
63  class CSeq_id;
64  class CSeq_align_set;
65  class CSeqVector;
66  class CScope;
67  class CObjectManager;
68  }
69  namespace blast {
70  struct SSeqLoc;
71  }
72 }
73 
74 namespace TestUtil {
75 
76 std::vector<EBlastProgramType> GetAllBlastProgramTypes();
77 
78 ncbi::objects::CSeq_id* GenerateRandomSeqid_Gi();
79 
80 template <class T>
81 ncbi::CRef<T> ReadObject(const std::string& filename) {
82  ncbi::CNcbiIfstream in(filename.c_str());
83  if ( !in ) {
84  throw std::runtime_error("Failed to open " + filename);
85  }
86  ncbi::CRef<T> retval(new T);
87 
88  switch (ncbi::CFormatGuess().Format(in)) {
89  case ncbi::CFormatGuess::eTextASN:
90  in >> ncbi::MSerial_AsnText >> *retval;
91  break;
92  case ncbi::CFormatGuess::eBinaryASN:
93  in >> ncbi::MSerial_AsnBinary >> *retval;
94  break;
95  case ncbi::CFormatGuess::eXml:
96  in >> ncbi::MSerial_Xml >> *retval;
97  break;
98  default:
99  throw std::runtime_error("Unsupported format");
100  }
101  return retval;
102 }
103 
104 void CheckForBlastSeqSrcErrors(const BlastSeqSrc* seqsrc)
105  THROWS((ncbi::blast::CBlastException));
106 
107 /// Convenience template function to print ASN.1 objects to a new file
108 template <class T>
109 void PrintTextAsn1Object(std::string filename, T* obj) {
110  std::ofstream out(filename.c_str());
111  if ( !out )
112  throw std::runtime_error("Could not open " + filename);
113  out << ncbi::MSerial_AsnText << *obj;
114 }
115 
116 /** Converts bl2seq and blast style seq-align-sets to the seq-align-set format
117  * that the new formatter understands (same flat format as C toolkit
118  * seq-aligns) */
119 ncbi::CRef<ncbi::objects::CSeq_align_set>
120 FlattenSeqAlignSet(const ncbi::objects::CSeq_align_set& sset);
121 
122 #if 0
123 /// Assumes that the sas argument is a bl2seq and blast style seq-align-set
124 void PrintFormattedSeqAlign(std::ostream& out,
125  const ncbi::objects::CSeq_align_set* sas,
126  ncbi::objects::CScope& scope);
127 #endif
128 
129 void PrintSequence(const Uint1* seq, ncbi::TSeqPos len, std::ostream& out,
130  bool show_markers = true,
131  ncbi::TSeqPos chars_per_line = 80);
132 void PrintSequence(const ncbi::objects::CSeqVector& svector,
133  std::ostream& out, bool show_markers = true,
134  ncbi::TSeqPos chars_per_line = 80);
135 
136 /// Returns character representation of a residue from ncbistdaa
137 char GetResidue(unsigned int res);
138 
139 /// Creates and initializes a BlastQueryInfo structure for a single protein
140 /// sequence
142 CreateProtQueryInfo(unsigned int query_size);
143 
144 /// Endianness independent hash function.
145 ///
146 /// This function computes a hash value for an array of any primitive
147 /// type. The hash assumes the data is the array is in "host" order
148 /// with respect to endianness and should produce the same value on
149 /// any platform for the same numerical values of the array
150 /// elements.<P>
151 ///
152 /// The algorithm attempts to be robust against changes in values in
153 /// the array, the length of the array, zeroes appended to the array),
154 /// and will not normally be fooled by naturally occurring patterns in
155 /// the buffer. 9However, it is not intended to be secure against
156 /// deliberate attempts to produce a collision).<P>
157 ///
158 /// The size of an element of the array must be uniform and is
159 /// specified as an argument to the function. It must exactly divide
160 /// the byte length of the array. If the size element is specified as
161 /// 1, no swapping will be done. This can be used to hash a string.
162 ///
163 /// @param buffer
164 /// Points to the beginning of the array.
165 /// @param byte_length
166 /// The length of the array in bytes.
167 /// @param swap_size
168 /// The size of one array element (specify 1 to disable swapping).
169 /// @param hash_seed.
170 /// The starting value of the hash.
172  Uint4 byte_length,
173  Uint4 swap_size = 1,
174  Uint4 hash_seed = 1);
175 
176 }
177 
178 #endif // _BLAST_TEST_UTIL_HPP
Data loader implementation that uses the blast databases.
Definitions used throughout BLAST.
Declares the BLAST exception class.
Definitions and functions associated with the BlastQueryInfo structure.
Declaration of ADT to retrieve sequences for the BLAST engine.
Definitions of special type used in BLAST.
CObjectManager –.
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define T(s)
Definition: common.h:230
Ensure direct dependencies on enough of the core xncbi library to satisfy shared libraries that depen...
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
string
Definition: cgiapp.hpp:687
#define THROWS(x)
Definition: ncbiexpt.hpp:75
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_Xml
Definition: serialbase.hpp:698
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
int len
void PrintFormattedSeqAlign(ostream &out, const CSeq_align_set *sas, CScope &scope)
objects::CSeq_id * GenerateRandomSeqid_Gi()
vector< EBlastProgramType > GetAllBlastProgramTypes()
BlastQueryInfo * CreateProtQueryInfo(unsigned int query_size)
Creates and initializes a BlastQueryInfo structure for a single protein sequence.
char GetResidue(unsigned int res)
Returns character representation of a residue from ncbistdaa.
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc) THROWS((ncbi voi PrintTextAsn1Object)(std::string filename, T *obj)
Convenience template function to print ASN.1 objects to a new file.
CRef< CSeq_align_set > FlattenSeqAlignSet(const CSeq_align_set &sset)
ncbi::CRef< T > ReadObject(const std::string &filename)
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc)
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
void PrintSequence(const Uint1 *seq, TSeqPos len, ostream &out, bool show_markers, TSeqPos chars_per_line)
Magic spell ;-) needed for some weird compilers... very empiric.
std::istream & in(std::istream &in_, double &x_)
Format
Definition: njn_ioutil.hpp:52
static pcre_uint8 * buffer
Definition: pcretest.c:1051
The query related information.
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
Modified on Wed Apr 17 13:08:29 2024 by modify_doxy.py rev. 669887