NCBI C++ ToolKit
blast_test_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_test_util.cpp 88781 2020-01-15 18:13:22Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_test_util.cpp
31  * Utilities to develop and debug unit tests for BLAST
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include "blast_test_util.hpp"
36 #include <corelib/ncbimisc.hpp>
37 #include <corelib/ncbitype.h>
38 #include <util/random_gen.hpp>
39 
40 // Serialization includes
41 #include <serial/serial.hpp>
42 #include <serial/objistr.hpp>
43 
44 // Object manager includes
45 #include <objmgr/bioseq_handle.hpp>
46 #include <objmgr/seq_vector.hpp>
50 
51 // Object includes
53 
54 // Formatter includes
56 
57 #include <sstream>
58 
59 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
60 #include <corelib/test_boost.hpp>
61 
62 using namespace std;
63 using namespace ncbi;
64 using namespace ncbi::objects;
65 using namespace ncbi::align_format;
66 
67 namespace TestUtil {
68 
69 objects::CSeq_id* GenerateRandomSeqid_Gi()
70 {
71  static CRandom random_gen(static_cast<CRandom::TValue>(time(0)));
72  return new CSeq_id(CSeq_id::e_Gi, random_gen.GetRand(1, 20000000));
73 }
74 
77 {
79 
81  ASSERT((*i)->GetSegs().IsDisc());
82 
83  ITERATE(CSeq_align::C_Segs::TDisc::Tdata, hsp_itr,
84  (*i)->GetSegs().GetDisc().Get()) {
85  retval->Set().push_back((*hsp_itr));
86  }
87  }
88 
89  return retval;
90 }
91 
93  const CSeq_align_set* sas,
94  CScope& scope)
95 {
96  ASSERT(sas);
97 
98  int align_opt = CDisplaySeqalign::eShowMiddleLine |
101 
103 
104  CDisplaySeqalign formatter(*saset, scope);
105  formatter.SetAlignOption(align_opt);
106  formatter.DisplaySeqalign(out);
107 }
108 
109 namespace {
110  union SUnion14 {
111  char end_bytes[4];
113  };
114 };
115 
116 Uint4
118  Uint4 byte_length,
119  Uint4 swap_size,
120  Uint4 hash_seed)
121 {
122  Uint4 hash = hash_seed;
123  Uint4 swap_mask = swap_size - 1;
124 
125  // Check that swapsize is a power of two.
126  _ASSERT((swap_size) && (0 == (swap_mask & swap_size)));
127 
128  // Insure that the byte_length is a multiple of swap_size
129  _ASSERT((byte_length & swap_mask) == 0);
130 
131  SUnion14 swap_test;
132  swap_test.end_bytes[0] = 0x44;
133  swap_test.end_bytes[1] = 0x33;
134  swap_test.end_bytes[2] = 0x22;
135  swap_test.end_bytes[3] = 0x11;
136  Uint4 end_value = swap_test.end_value;
137 
138  if (end_value == 0x11223344) {
139  // Prevent actual swapping on little endian machinery.
140  swap_size = 1;
141  swap_mask = 0;
142  }
143 
144  Uint4 keep_mask = ~ swap_mask;
145 
146  // Logical address is the address if the data was little endian.
147 
148  for(Uint4 logical = 0; logical < byte_length; logical++) {
149  Uint4 physical =
150  (logical & keep_mask) | (swap_mask - (logical & swap_mask));
151 
152  // Alternate addition and XOR. This technique destroys most
153  // of the possible mathematical relationships between similar
154  // input strings.
155 
156  if (logical & 1) {
157  hash += int(buffer[physical]) & 0xFF;
158  } else {
159  hash ^= int(buffer[physical]) & 0xFF;
160  }
161 
162  // 1. "Rotate" by a value relatively prime to 32 (any odd
163  // value), to insure that each input bit will eventually
164  // affect each position.
165  // 2. Add a per-iteration constant to detect changes in length.
166 
167  hash = ((hash << 13) | (hash >> 19)) + 1234;
168  }
169 
170  return hash;
171 }
172 
173 CBlastOM::CBlastOM(const string& dbname, EDbType dbtype, ELocation location)
174 : m_ObjMgr(CObjectManager::GetInstance())
175 {
178 }
179 
180 void
182 {
183  try {
184  CRef<CReader> reader(new CId2Reader);
185  reader->SetPreopenConnection(false);
188  .GetLoader()->GetName();
189  } catch (const CException& e) {
190  m_GbLoaderName.erase();
191  ERR_POST(Warning << e.GetMsg());
192  }
193 }
194 
195 void
197  EDbType dbtype,
199 {
200  try {
201  if (location == eLocal) {
203  (*m_ObjMgr, dbname, dbtype, true,
206  } else {
208  (*m_ObjMgr, dbname, dbtype, true,
211  }
212  } catch (const CSeqDBException& e) {
213 
214  // if the database isn't found, ignore the exception as the Genbank
215  // data loader will be the fallback (just issue a warning)
216 
217  if (e.GetMsg().find("No alias or index file found ") != NPOS) {
218  ERR_POST(Warning << e.GetMsg());
219  }
220 
221  }
222 }
223 
225 {
226  CRef<CScope> retval(new CScope(*m_ObjMgr));
227 
228  if (!m_BlastDbLoaderName.empty()) {
230  }
231  if (!m_GbLoaderName.empty()) {
232  retval->AddDataLoader(m_GbLoaderName, 2);
233  }
234  return retval;
235 }
236 
238 {
239  if (!m_BlastDbLoaderName.empty()) {
241  }
242 }
243 
244 }
245 
Sequence alignment display tool.
Data loader implementation that uses the blast databases remotely.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
void SetAlignOption(int option)
Set functions.
Definition: showalign.hpp:284
void DisplaySeqalign(CNcbiOstream &out)
call this to display seqalign
Definition: showalign.cpp:1915
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CObjectManager –.
CRandom::
Definition: random_gen.hpp:66
void SetPreopenConnection(bool preopen=true)
Definition: reader.cpp:207
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
CScope –.
Definition: scope.hpp:92
CSeqDBException.
Definition: seqdbcommon.hpp:73
void x_InitBlastDatabaseDataLoader(const std::string &dbname, EDbType dbtype, ELocation location)
ncbi::CRef< ncbi::objects::CObjectManager > m_ObjMgr
ncbi::CBlastDbDataLoader::EDbType EDbType
std::string m_GbLoaderName
std::string m_BlastDbLoaderName
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
ncbi::CRef< ncbi::objects::CScope > NewScope()
Create a new scope with the default set to the BLAST database data loader for the BLAST database spec...
std::ofstream out("events_result.xml")
main entry point for tests
static const char location[]
Definition: config.c:97
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
TLoader * GetLoader(void) const
Get pointer to the loader.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool RevokeDataLoader(CDataLoader &loader)
Revoke previously registered data loader.
@ kPriority_NotSet
Deprecated: use kPriority_Default instead.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
Uint4 TValue
Type of the generated integer value and/or the seed value.
Definition: random_gen.hpp:69
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
#define NPOS
Definition: ncbistr.hpp:133
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
void PrintFormattedSeqAlign(ostream &out, const CSeq_align_set *sas, CScope &scope)
objects::CSeq_id * GenerateRandomSeqid_Gi()
CRef< CSeq_align_set > FlattenSeqAlignSet(const CSeq_align_set &sset)
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
Magic spell ;-) needed for some weird compilers... very empiric.
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Miscellaneous common-use basic types and functionality.
Defines Limits for the types used in NCBI C/C++ toolkit.
Uint4 end_value
char end_bytes[4]
Utilities to develop and debug unit tests for BLAST.
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Definition: _hash_fun.h:40
#define _ASSERT
Utility stuff for more convenient using of Boost.Test library.
Modified on Wed May 29 18:39:47 2024 by modify_doxy.py rev. 669887