NCBI C++ ToolKit
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Amelia Fong
27 *
28 * File Description:
29 * Unit test module for MT .
30 *
31 * ===========================================================================
32 */
35 #include <ncbi_pch.hpp>
36 #include <corelib/ncbiexpt.hpp>
37 #include <corelib/ncbiutil.hpp>
38 #include <corelib/ncbistre.hpp>
42 #include <objmgr/scope.hpp>
43 #include <objmgr/util/sequence.hpp>
44 #include <objmgr/bioseq_handle.hpp>
45 #include <objmgr/util/sequence.hpp>
62 #include <corelib/test_boost.hpp>
64 #include <common/test_assert.h> /* This header must go last */
67 using namespace ncbi;
68 using namespace ncbi::blast;
69 using namespace ncbi::objects;
72 #ifdef NCBI_THREADS
75 void s_GenerateGiList(CRef<CSeqDB> & seqdb, vector<TGi> & gis)
76 {
77  gis.clear();
78  int num_seqs = seqdb->GetNumSeqs();
79  int kNumTestOids = MIN(5000, num_seqs/4);
80  while(gis.size() <= kNumTestOids) {
81  int oid = rand() % num_seqs;
82  TGi gi;
83  seqdb->OidToGi(oid, gi);
84  if(gi > 0){
85  gis.push_back(gi);
86  }
87  else {
88  continue;
89  }
90  }
91 }
93 void s_GenerateAccsList(CRef<CSeqDB> & seqdb, vector<string> & ids)
94 {
95  ids.clear();
96  int num_seqs = seqdb->GetNumSeqs();
97  int kNumTestOids = MIN(5000, num_seqs/4);
98  while(ids.size() <= kNumTestOids) {
99  int oid = rand() % num_seqs;
100  list<CRef<CSeq_id> > t = seqdb->GetSeqIDs(oid);
101  CRef<CSeq_id> t_id = t.back();
102  ids.push_back(t_id->GetSeqIdString());
103  if(!t.empty()){
104  CRef<CSeq_id> t_id = t.back();
105  ids.push_back(t_id->GetSeqIdString());
106  }
107  else {
108  continue;
109  }
110  }
111 }
114 class CDLTestThread : public CThread
115 {
116 public:
117  CDLTestThread(vector<TGi> & gis, string & db, bool isProtein):
118  m_gis(gis), m_db(db), m_isProtein(isProtein){ }
120  virtual void* Main(void) {
121  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
123  CRef<CScope> scope(new CScope(*obj_mgr));
127  scope->AddDataLoader(ld);
128  for (int i=0; i < m_gis.size(); i++) {
129  CSeq_id id (CSeq_id::e_Gi, m_gis[i]);
130  CBioseq_Handle bioseq_handle = scope->GetBioseqHandle(id);
131  CBioseq_Handle::TId tmp = bioseq_handle.GetId();
132  ITERATE(CBioseq_Handle::TId, itr, bioseq_handle.GetId()) {
133  CConstRef<CSeq_id> next_id = itr->GetSeqId();
134  }
135  wait(random());
136  }
137  return NULL;
138  }
140 private:
141  vector<TGi> m_gis;
142  string m_db;
144 };
146 void s_MTDataLoaderTest(string & db, bool isProtein)
147 {
148  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
149  vector<TGi> gis;
150  s_GenerateGiList(seqdb, gis);
152  const int kNumThreads=64;
153  vector<CDLTestThread*> threads;
154  for (int i=0; i < kNumThreads; i++) {
155  threads.push_back(new CDLTestThread(gis, db, isProtein));
156  }
157  for (int i=0; i < kNumThreads; i++) {
158  threads[i]->Run();
159  }
160  for (int i=0; i < kNumThreads; i++) {
161  threads[i]->Join();
162  }
163 }
165 BOOST_AUTO_TEST_CASE(MT_DataLoaderForEachThread_16S)
166 {
167  string db = "rRNA_typestrains/16S_ribosomal_RNA";
168  bool isProtein = false;
169  BOOST_REQUIRE_NO_THROW(s_MTDataLoaderTest(db, isProtein));
170 }
172 BOOST_AUTO_TEST_CASE(MT_DataLoaderForEachThread_RefseqProt)
173 {
174  string db = "refseq_select_prot";
175  bool isProtein = true;
176  BOOST_REQUIRE_NO_THROW(s_MTDataLoaderTest(db, isProtein));
177 }
179 /****************************************************************************************/
180 class CDLTest2Thread : public CThread
181 {
182 public:
183  CDLTest2Thread(vector<TGi> & gis, string & ld):
184  m_gis(gis), m_dataloader(ld){ }
186  virtual void* Main(void) {
188  CRef<CScope> scope(new CScope(*obj_mgr));
189  scope->AddDataLoader(m_dataloader);
190  for (int i=0; i < m_gis.size(); i++) {
191  CSeq_id id (CSeq_id::e_Gi, m_gis[i]);
192  CBioseq_Handle bioseq_handle = scope->GetBioseqHandle(id);
193  CConstRef<CBioseq> t = bioseq_handle.GetCompleteBioseq();
194  CBioseq_Handle::TId tmp = bioseq_handle.GetId();
195  wait(random());
196  }
197  return NULL;
198  }
200 private:
201  vector<TGi> m_gis;
202  string m_dataloader;
203 };
205 void s_MTDataLoaderTest2(string & db, bool isProtein)
206 {
207  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
213  vector<TGi> gis;
214  s_GenerateGiList(seqdb, gis);
216  const int kNumThreads=64;
217  vector<CDLTest2Thread*> threads;
218  for (int i=0; i < kNumThreads; i++) {
219  threads.push_back(new CDLTest2Thread(gis, ld));
220  }
221  for (int i=0; i < kNumThreads; i++) {
222  threads[i]->Run();
223  }
224  for (int i=0; i < kNumThreads; i++) {
225  threads[i]->Join();
226  }
228 }
230 BOOST_AUTO_TEST_CASE(MT_SingleDataLoaderForAllThreads_16s)
231 {
232  {
233  string db = "rRNA_typestrains/16S_ribosomal_RNA";
234  bool isProtein = false;
235  s_MTDataLoaderTest2(db, isProtein);
236  }
237 }
239 BOOST_AUTO_TEST_CASE(MT_SingleDataLoaderForAllThreads_ProtSelect)
240 {
241  {
242  string db = "refseq_select_prot";
243  bool isProtein = true;
244  s_MTDataLoaderTest2(db, isProtein);
245  }
246 }
248 /***********************************************************************/
249 class CSeqDBTestThread : public CThread
250 {
251 public:
252  CSeqDBTestThread(string & db, bool isProtein, vector<string> & ids):
253  m_db(db), m_isProtein(isProtein), m_Ids(ids){}
255  virtual void* Main(void) {
256  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
257  for (int i=0; i < m_Ids.size(); i++) {
258  vector<int> oids;
259  string output;
260  seqdb->AccessionToOids(m_Ids[i], oids);
261  seqdb->GetSequenceAsString(oids[0], output);
262  }
263  return NULL;
264  }
266 private:
267  string m_db;
269  vector<string> m_Ids;
270 };
272 void s_MTSeqDBTest(string & db, bool isProtein)
273 {
274  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
275  int num_seqs = seqdb->GetNumSeqs();
276  int kNumTestOids = MIN(5000, num_seqs/4);
277  vector<string> ids;
278  s_GenerateAccsList(seqdb, ids);
280  const int kNumThreads=64;
281  vector<CSeqDBTestThread*> threads;
282  for (int i=0; i < kNumThreads; i++) {
283  threads.push_back(new CSeqDBTestThread(db, isProtein, ids));
284  }
285  for (int i=0; i < kNumThreads; i++) {
286  threads[i]->Run();
287  }
288  for (int i=0; i < kNumThreads; i++) {
289  threads[i]->Join();
290  }
292 }
294 BOOST_AUTO_TEST_CASE(SeqDBTest_AccsToOid_nr)
295 {
296  {
297  string db = "nr";
298  bool isProtein = true;
299  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest(db, isProtein));
300  }
301 }
303 BOOST_AUTO_TEST_CASE(SeqDBTest_AccsToOid_nt)
304 {
305  {
306  string db = "nt";
307  bool isProtein = false;
308  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest(db, isProtein));
309  }
310 }
312 /*********************************************************************/
316 {
317 public:
318  CSeqDBTest2Thread(string & db, bool isProtein, vector<int> & oids):
319  m_db(db), m_isProtein(isProtein), m_Oids(oids){ }
321  virtual void* Main(void) {
322  CRef<CSeqDB> seqdb(new CSeqDB(m_db, m_isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
323  for(int i =0; i < m_Oids.size(); i++) {
324  const char* buf;
325  seqdb->GetSequence(m_Oids[i], &buf);
326  }
327  return NULL;
328  }
330 private:
331  string m_db;
333  vector<int> & m_Oids;
334 };
336 void s_MTSeqDBTest2(string & db, bool isProtein)
337 {
338  const int kNumThreads=64;
339  const int kNumTestOids = 5000;
340  vector<CSeqDBTest2Thread*> threads;
341  vector<int> oids;
342  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
343  seqdb->SetNumberOfThreads(1, true);
344  int num_seqs = seqdb->GetNumSeqs();
345  for(int i=0; i < kNumTestOids; i++) {
346  int oid = rand() % num_seqs;
347  oids.push_back(oid);
348  }
350  for (int i=0; i < kNumThreads; i++) {
351  threads.push_back(new CSeqDBTest2Thread(db, isProtein, oids));
352  }
353  for (int i=0; i < kNumThreads; i++) {
354  threads[i]->Run();
355  }
356  for (int i=0; i < kNumThreads; i++) {
357  threads[i]->Join();
358  }
360 }
363 {
364  {
365  string db = "nr";
366  bool isProtein = true;
367  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest2(db, isProtein));
368  }
369 }
372 {
373 public:
374  CSeqDBTest3Thread(CRef<CSeqDB> seqdb, vector<int> & oids):
375  m_SeqDB(seqdb), m_Oids(oids){ }
377  virtual void* Main(void) {
378  for(int i =0; i < m_Oids.size(); i++) {
379  const char* buf;
380  m_SeqDB->GetSequence(m_Oids[i], &buf);
381  wait (rand());
382  m_SeqDB->RetSequence(&buf);
383  }
384  return NULL;
385  }
387 private:
389  vector<int> & m_Oids;
390 };
392 void s_MTSeqDBTest3(string & db, bool isProtein)
393 {
394  const int kNumThreads=64;
395  const int kNumTestOids = 5000;
396  vector<CSeqDBTest3Thread*> threads;
397  vector<int> oids;
398  CRef<CSeqDB> seqdb(new CSeqDB(db, isProtein?CSeqDB::eProtein:CSeqDB::eNucleotide));
399  seqdb->SetNumberOfThreads(kNumThreads);
401  int num_seqs = seqdb->GetNumSeqs();
402  for(int i=0; i < kNumTestOids; i++) {
403  int oid = rand() % num_seqs;
404  oids.push_back(oid);
405  }
407  for (int i=0; i < kNumThreads; i++) {
408  threads.push_back(new CSeqDBTest3Thread(seqdb, oids));
409  }
410  for (int i=0; i < kNumThreads; i++) {
411  threads[i]->Run();
412  }
413  for (int i=0; i < kNumThreads; i++) {
414  threads[i]->Join();
415  }
417 }
420 {
421  {
422  string db = "nt";
423  bool isProtein = false;
424  BOOST_REQUIRE_NO_THROW(s_MTSeqDBTest3(db, isProtein));
425  }
426 }
428 #endif
Produce formatted blast output for command line applications.
void s_GenerateGiList(CRef< CSeqDB > &seqdb, vector< TGi > &gis)
void s_MTSeqDBTest2(string &db, bool isProtein)
void s_MTDataLoaderTest(string &db, bool isProtein)
void s_MTSeqDBTest(string &db, bool isProtein)
void s_MTSeqDBTest3(string &db, bool isProtein)
void s_GenerateAccsList(CRef< CSeqDB > &seqdb, vector< string > &ids)
void s_MTDataLoaderTest2(string &db, bool isProtein)
Declares the CBlastNucleotideOptionsHandle class.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
build_archive declarations
CBioseq_Handle –.
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:886
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Definition: bdbloader.cpp:164
CDLTest2Thread(vector< TGi > &gis, string &ld)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< TGi > m_gis
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CDLTestThread(vector< TGi > &gis, string &db, bool isProtein)
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSeqDBTest2Thread(string &db, bool isProtein, vector< int > &oids)
vector< int > & m_Oids
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< int > & m_Oids
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CSeqDBTest3Thread(CRef< CSeqDB > seqdb, vector< int > &oids)
CSeqDBTestThread(string &db, bool isProtein, vector< string > &ids)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
vector< string > m_Ids
Definition: seqdb.hpp:161
void GetSequenceAsString(int oid, CSeqUtil::ECoding coding, string &output, TSeqRange range=TSeqRange()) const
Get a sequence in a given encoding.
Definition: seqdb.cpp:1141
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Definition: seqdb.cpp:765
@ eNucleotide
Definition: seqdb.hpp:175
@ eProtein
Definition: seqdb.hpp:174
int GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdb.cpp:670
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
Definition: seqdb.cpp:530
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
Definition: seqdb.cpp:870
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
Definition: seqdb.cpp:1321
static SQLCHAR output[256]
Definition: print.c:5
static char tmp[3200]
Definition: utf8.c:42
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
vector< CSeq_id_Handle > TId
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const TId & GetId(void) const
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
char * buf
int i
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
Magic spell ;-) needed for some weird compilers... very empiric.
EIPRangeType t
Definition: ncbi_localip.c:101
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Defines NCBI C++ exception handling.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Utility stuff for more convenient using of Boost.Test library.
Modified on Fri Sep 20 14:57:44 2024 by rev. 669887