NCBI C++ ToolKit
blast_mt_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Amelia Fong
27 *
28 * File Description:
29 * Unit test module for MT .
30 *
31 * ===========================================================================
32 */
33 
34 
35 #include <ncbi_pch.hpp>
36 #include <corelib/ncbiexpt.hpp>
37 #include <corelib/ncbiutil.hpp>
38 #include <corelib/ncbistre.hpp>
39 
40 
42 #include <objmgr/scope.hpp>
43 #include <objmgr/util/sequence.hpp>
44 #include <objmgr/bioseq_handle.hpp>
45 #include <objmgr/util/sequence.hpp>
46 
50 
57 
58 
60 
61 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
62 #include <corelib/test_boost.hpp>
63 
64 #include <common/test_assert.h> /* This header must go last */
65 
66 
67 using namespace ncbi;
68 using namespace ncbi::blast;
69 using namespace ncbi::objects;
70 
71 
72 #ifdef NCBI_THREADS
73 BOOST_AUTO_TEST_SUITE(blast_mt)
74 
75 void s_GenerateGiList(CRef<CSeqDB> & seqdb, vector<TGi> & gis)
76 {
77  gis.clear();
78  int num_seqs = seqdb->GetNumSeqs();
79  int kNumTestOids = MIN(5000, num_seqs/4);
80  while(gis.size() <= kNumTestOids) {
81  int oid = rand() % num_seqs;
82  TGi gi;
83  seqdb->OidToGi(oid, gi);
84  if(gi > 0){
85  gis.push_back(gi);
86  }
87  else {
88  continue;
89  }
90  }
91 }
92 
93 void s_GenerateAccsList(CRef<CSeqDB> & seqdb, vector<string> & ids)
94 {
95  ids.clear();
96  int num_seqs = seqdb->GetNumSeqs();
97  int kNumTestOids = MIN(5000, num_seqs/4);
98  while(ids.size() <= kNumTestOids) {
99  int oid = rand() % num_seqs;
100  list<CRef<CSeq_id> > t = seqdb->GetSeqIDs(oid);
101  CRef<CSeq_id> t_id = t.back();
102  ids.push_back(t_id->GetSeqIdString());
103  if(!t.empty()){
104  CRef<CSeq_id> t_id = t.back();
105  ids.push_back(t_id->GetSeqIdString());
106  }
107  else {
108  continue;
109  }
110  }
111 }
112 
113 
114 class CDLTestThread : public CThread
115 {
116 public:
117  CDLTestThread(vector<TGi> & gis, string & db, bool isProtein):
118  m_gis(gis), m_db(db), m_isProtein(isProtein){ }
119 
120  virtual void* Main(void) {
121  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
123  CRef<CScope> scope(new CScope(*obj_mgr));
127  scope->AddDataLoader(ld);
128  for (int i=0; i < m_gis.size(); i++) {
129  CSeq_id id (CSeq_id::e_Gi, m_gis[i]);
130  CBioseq_Handle bioseq_handle = scope->GetBioseqHandle(id);
131  CBioseq_Handle::TId tmp = bioseq_handle.GetId();
132  ITERATE(CBioseq_Handle::TId, itr, bioseq_handle.GetId()) {
133  CConstRef<CSeq_id> next_id = itr->GetSeqId();
134  }
135  wait(random());
136  }
137  return NULL;
138  }
140 private:
141  vector<TGi> m_gis;
142  string m_db;
144 };
145 
146 void s_MTDataLoaderTest(string & db, bool isProtein)
147 {
148  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
149  vector<TGi> gis;
150  s_GenerateGiList(seqdb, gis);
151 
152  const int kNumThreads=64;
153  vector<CDLTestThread*> threads;
154  for (int i=0; i < kNumThreads; i++) {
155  threads.push_back(new CDLTestThread(gis, db, isProtein));
156  }
157  for (int i=0; i < kNumThreads; i++) {
158  threads[i]->Run();
159  }
160  for (int i=0; i < kNumThreads; i++) {
161  threads[i]->Join();
162  }
163 }
164 
165 BOOST_AUTO_TEST_CASE(MT_DataLoaderForEachThread_16S)
166 {
167  string db = "rRNA_typestrains/16S_ribosomal_RNA";
168  bool isProtein = false;
169  BOOST_REQUIRE_NO_THROW(s_MTDataLoaderTest(db, isProtein));
170 }
171 
172 BOOST_AUTO_TEST_CASE(MT_DataLoaderForEachThread_RefseqProt)
173 {
174  string db = "refseq_select_prot";
175  bool isProtein = true;
176  BOOST_REQUIRE_NO_THROW(s_MTDataLoaderTest(db, isProtein));
177 }
178 
179 /****************************************************************************************/
180 class CDLTest2Thread : public CThread
181 {
182 public:
183  CDLTest2Thread(vector<TGi> & gis, string & ld):
184  m_gis(gis), m_dataloader(ld){ }
185 
186  virtual void* Main(void) {
188  CRef<CScope> scope(new CScope(*obj_mgr));
189  scope->AddDataLoader(m_dataloader);
190  for (int i=0; i < m_gis.size(); i++) {
191  CSeq_id id (CSeq_id::e_Gi, m_gis[i]);
192  CBioseq_Handle bioseq_handle = scope->GetBioseqHandle(id);
193  CConstRef<CBioseq> t = bioseq_handle.GetCompleteBioseq();
194  CBioseq_Handle::TId tmp = bioseq_handle.GetId();
195  wait(random());
196  }
197  return NULL;
198  }
200 private:
201  vector<TGi> m_gis;
202  string m_dataloader;
203 };
204 
205 void s_MTDataLoaderTest2(string & db, bool isProtein)
206 {
207  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
212 
213  vector<TGi> gis;
214  s_GenerateGiList(seqdb, gis);
215 
216  const int kNumThreads=64;
217  vector<CDLTest2Thread*> threads;
218  for (int i=0; i < kNumThreads; i++) {
219  threads.push_back(new CDLTest2Thread(gis, ld));
220  }
221  for (int i=0; i < kNumThreads; i++) {
222  threads[i]->Run();
223  }
224  for (int i=0; i < kNumThreads; i++) {
225  threads[i]->Join();
226  }
227 
228 }
229 
230 BOOST_AUTO_TEST_CASE(MT_SingleDataLoaderForAllThreads_16s)
231 {
232  {
233  string db = "rRNA_typestrains/16S_ribosomal_RNA";
234  bool isProtein = false;
235  s_MTDataLoaderTest2(db, isProtein);
236  }
237 }
238 
239 BOOST_AUTO_TEST_CASE(MT_SingleDataLoaderForAllThreads_ProtSelect)
240 {
241  {
242  string db = "refseq_select_prot";
243  bool isProtein = true;
244  s_MTDataLoaderTest2(db, isProtein);
245  }
246 }
247 
248 /***********************************************************************/
249 class CSeqDBTestThread : public CThread
250 {
251 public:
252  CSeqDBTestThread(string & db, bool isProtein, vector<string> & ids):
253  m_db(db), m_isProtein(isProtein), m_Ids(ids){}
254 
255  virtual void* Main(void) {
256  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
257  for (int i=0; i < m_Ids.size(); i++) {
258  vector<int> oids;
259  string output;
260  seqdb->AccessionToOids(m_Ids[i], oids);
261  seqdb->GetSequenceAsString(oids[0], output);
262  }
263  return NULL;
264  }
266 private:
267  string m_db;
269  vector<string> m_Ids;
270 };
271 
272 void s_MTSeqDBTest(string & db, bool isProtein)
273 {
274  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
275  int num_seqs = seqdb->GetNumSeqs();
276  int kNumTestOids = MIN(5000, num_seqs/4);
277  vector<string> ids;
278  s_GenerateAccsList(seqdb, ids);
279 
280  const int kNumThreads=64;
281  vector<CSeqDBTestThread*> threads;
282  for (int i=0; i < kNumThreads; i++) {
283  threads.push_back(new CSeqDBTestThread(db, isProtein, ids));
284  }
285  for (int i=0; i < kNumThreads; i++) {
286  threads[i]->Run();
287  }
288  for (int i=0; i < kNumThreads; i++) {
289  threads[i]->Join();
290  }
291 
292 }
293 
294 BOOST_AUTO_TEST_CASE(SeqDBTest_AccsToOid_nr)
295 {
296  {
297  string db = "nr";
298  bool isProtein = true;
299  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest(db, isProtein));
300  }
301 }
302 
303 BOOST_AUTO_TEST_CASE(SeqDBTest_AccsToOid_nt)
304 {
305  {
306  string db = "nt";
307  bool isProtein = false;
308  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest(db, isProtein));
309  }
310 }
311 
312 /*********************************************************************/
313 
314 
316 {
317 public:
318  CSeqDBTest2Thread(string & db, bool isProtein, vector<int> & oids):
319  m_db(db), m_isProtein(isProtein), m_Oids(oids){ }
320 
321  virtual void* Main(void) {
322  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
323  for(int i =0; i < m_Oids.size(); i++) {
324  const char* buf;
325  seqdb->GetSequence(m_Oids[i], &buf);
326  }
327  return NULL;
328  }
330 private:
331  string m_db;
333  vector<int> & m_Oids;
334 };
335 
336 void s_MTSeqDBTest2(string & db, bool isProtein)
337 {
338  const int kNumThreads=64;
339  const int kNumTestOids = 5000;
340  vector<CSeqDBTest2Thread*> threads;
341  vector<int> oids;
342  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
343  seqdb->SetNumberOfThreads(1, true);
344  int num_seqs = seqdb->GetNumSeqs();
345  for(int i=0; i < kNumTestOids; i++) {
346  int oid = rand() % num_seqs;
347  oids.push_back(oid);
348  }
349 
350  for (int i=0; i < kNumThreads; i++) {
351  threads.push_back(new CSeqDBTest2Thread(db, isProtein, oids));
352  }
353  for (int i=0; i < kNumThreads; i++) {
354  threads[i]->Run();
355  }
356  for (int i=0; i < kNumThreads; i++) {
357  threads[i]->Join();
358  }
359 
360 }
361 
363 {
364  {
365  string db = "nr";
366  bool isProtein = true;
367  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest2(db, isProtein));
368  }
369 }
370 
372 {
373 public:
374  CSeqDBTest3Thread(CRef<CSeqDB> seqdb, vector<int> & oids):
375  m_SeqDB(seqdb), m_Oids(oids){ }
376 
377  virtual void* Main(void) {
378  for(int i =0; i < m_Oids.size(); i++) {
379  const char* buf;
380  m_SeqDB->GetSequence(m_Oids[i], &buf);
381  wait (rand());
382  m_SeqDB->RetSequence(&buf);
383  }
384  return NULL;
385  }
387 private:
389  vector<int> & m_Oids;
390 };
391 
392 void s_MTSeqDBTest3(string & db, bool isProtein)
393 {
394  const int kNumThreads=64;
395  const int kNumTestOids = 5000;
396  vector<CSeqDBTest3Thread*> threads;
397  vector<int> oids;
398  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
399  seqdb->SetNumberOfThreads(kNumThreads);
400 
401  int num_seqs = seqdb->GetNumSeqs();
402  for(int i=0; i < kNumTestOids; i++) {
403  int oid = rand() % num_seqs;
404  oids.push_back(oid);
405  }
406 
407  for (int i=0; i < kNumThreads; i++) {
408  threads.push_back(new CSeqDBTest3Thread(seqdb, oids));
409  }
410  for (int i=0; i < kNumThreads; i++) {
411  threads[i]->Run();
412  }
413  for (int i=0; i < kNumThreads; i++) {
414  threads[i]->Join();
415  }
416 
417 }
418 
420 {
421  {
422  string db = "nt";
423  bool isProtein = false;
424  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest3(db, isProtein));
425  }
426 }
428 #endif
Produce formatted blast output for command line applications.
void s_GenerateGiList(CRef< CSeqDB > &seqdb, vector< TGi > &gis)
void s_MTSeqDBTest2(string &db, bool isProtein)
void s_MTDataLoaderTest(string &db, bool isProtein)
void s_MTSeqDBTest(string &db, bool isProtein)
BOOST_AUTO_TEST_CASE(MT_DataLoaderForEachThread_16S)
void s_MTSeqDBTest3(string &db, bool isProtein)
void s_GenerateAccsList(CRef< CSeqDB > &seqdb, vector< string > &ids)
void s_MTDataLoaderTest2(string &db, bool isProtein)
Declares the CBlastNucleotideOptionsHandle class.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
build_archive declarations
CBioseq_Handle –.
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:886
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Definition: bdbloader.cpp:164
CDLTest2Thread(vector< TGi > &gis, string &ld)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< TGi > m_gis
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CDLTestThread(vector< TGi > &gis, string &db, bool isProtein)
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSeqDBTest2Thread(string &db, bool isProtein, vector< int > &oids)
vector< int > & m_Oids
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< int > & m_Oids
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CSeqDBTest3Thread(CRef< CSeqDB > seqdb, vector< int > &oids)
CSeqDBTestThread(string &db, bool isProtein, vector< string > &ids)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< string > m_Ids
CSeqDB.
Definition: seqdb.hpp:161
void GetSequenceAsString(int oid, CSeqUtil::ECoding coding, string &output, TSeqRange range=TSeqRange()) const
Get a sequence in a given encoding.
Definition: seqdb.cpp:1141
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Definition: seqdb.cpp:765
@ eNucleotide
Definition: seqdb.hpp:175
@ eProtein
Definition: seqdb.hpp:174
int GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdb.cpp:670
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
Definition: seqdb.cpp:530
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
Definition: seqdb.cpp:870
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
Definition: seqdb.cpp:1321
static SQLCHAR output[256]
Definition: print.c:5
static char tmp[3200]
Definition: utf8.c:42
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
vector< CSeq_id_Handle > TId
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const TId & GetId(void) const
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
char * buf
int i
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
Magic spell ;-) needed for some weird compilers... very empiric.
EIPRangeType t
Definition: ncbi_localip.c:101
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Defines NCBI C++ exception handling.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Utility stuff for more convenient using of Boost.Test library.
Modified on Fri Sep 20 14:57:44 2024 by modify_doxy.py rev. 669887