58 virtual void Run() = 0;
73 cout <<
" GetSequence() provides a basic interface to fetch\n"
74 <<
" a sequence from a SeqDB object given an OID.\n";
85 if (
nr.GiToOid(gi, oid)) {
87 unsigned length =
nr.GetSequence(oid, &
buffer);
92 for(
unsigned i = 0;
i<length;
i++) {
98 cout <<
" Found " << count_a
99 <<
" alanines in sequence with GI " << gi
104 cout <<
" Failed: could not find GI " << gi << endl;
121 cout <<
" CheckOrFindOID() provides a simple OID based iteration\n"
122 <<
" over the database. The method works well as the test\n"
123 <<
" clause of a for loop. This example counts the number\n"
124 <<
" of sequences in the \"swissprot\" database, displaying\n"
125 <<
" the count and the combined length of the first 1000.\n";
137 if (oid_count++ < 1000) {
142 int measured = (oid_count > 1000) ? 1000 : oid_count;
144 cout <<
" Number of swissprot sequences in (from iteration): "
145 << oid_count << endl;
147 cout <<
" Number of sequences in swissprot (from index file): "
150 cout <<
" Combined length of the first " << measured
151 <<
" sequences: " << length_1000 << endl;
167 cout <<
" GetNextOIDChunk() provides versatile iteration meant\n"
168 <<
" for multithreaded applications. Each thread fetches\n"
169 <<
" a set of OIDs to work with, only returning for more\n"
170 <<
" when done with that set. SeqDB guarantees that all\n"
171 <<
" OIDs will be assigned, and no OID will be returned\n"
172 <<
" more than once.\n\n"
173 <<
" The data will be returned in one of two forms, either\n"
174 <<
" as a pair of numbers representing a range of OIDs, or\n"
175 <<
" in a vector. The number of OIDs desired is indicated\n"
176 <<
" by setting the size of the vector on input.\n";
191 int at_a_time = 1000;
197 int begin(0), end(0);
236 switch(sp.
GetNextOIDChunk(begin, end, at_a_time, oids, & local_state)) {
238 for(
int index = 0; index < (
int)oids.size(); index++) {
239 x_UseOID(sp, oids[index], oid_count, length_1000);
245 for(
int oid = begin; oid < end; oid++) {
246 x_UseOID(sp, oid, oid_count, length_1000);
248 done = (begin == end);
255 unsigned measured = (oid_count > 1000) ? 1000 : oid_count;
257 cout <<
" Sequences in swissprot (counted during iteration): "
258 << oid_count << endl;
259 cout <<
" Sequences in swissprot (from database index file): "
261 cout <<
" Combined length of the first " << measured
262 <<
" sequences: " << length_1000 << endl;
274 if (oid_count++ < 1000) {
290 virtual void*
Main(
void);
291 virtual void OnExit(
void);
294 static void Init(
CSeqDB& db,
bool oid_shuffle,
bool use_ambigs)
329 static const int kLoops = 1;
332 static char* lets =
NULL;
337 for (
int i = 0;
i < 4; ++
i) {
338 mask[
i] =
static_cast<char>(0x03 << (
i << 1));
339 lets[
i] =
static_cast<char>((
letter & 0x03) << (
i << 1));
354 for (
int loop = 0; loop < kLoops; ++loop) {
357 for (
int i = 0;
i < length; ++
i) {
375 for (
int loop = 0; loop < kLoops; ++loop) {
379 for (
int i = 0;
i < length; ++
i) {
385 int nbytes = (length + 3) / 4;
386 for (
int i = 0;
i < nbytes; ++
i) {
388 for (
int j = 0; j < 4; ++j) {
389 if ((c &
mask[j]) == lets[j]) {
416 ) : m_Index(index), m_AtATime(at_a_time), m_MaxLength(max_length),
423 long* retval =
new long;
436 int oid_begin(0), oid_end(0);
463 for (
int oid = oid_begin; oid < oid_end; ++oid) {
478 for (
int oid = oid_begin; oid < oid_end; ++oid) {
485 shuffle(oids.begin(), oids.end(), default_random_engine());
487 ITERATE(vector<int>, oid, oids) {
494 std::ostringstream oss;
495 oss <<
"Thread " <<
m_Index <<
" says: "
496 <<
count <<
" occurrences of A" << endl;
499 cout << oss.str() << flush;
584 cout <<
" GetNextOIDChunk() provides versatile iteration meant\n"
585 <<
" for multithreaded applications. Each thread fetches\n"
586 <<
" a set of OIDs to work with, only returning for more\n"
587 <<
" when done with that set. SeqDB guarantees that all\n"
588 <<
" OIDs will be assigned, and no OID will be returned\n"
589 <<
" more than once.\n\n"
590 <<
" The data will be returned in one of two forms, either\n"
591 <<
" as a pair of numbers representing a range of OIDs, or\n"
592 <<
" in a vector. The number of OIDs desired is indicated\n"
593 <<
" by setting the size of the vector on input.\n";
603 cout <<
"Sequence type is PROTEIN" << endl;
605 cout <<
"Sequence type is NUCLEOTIDE" << endl;
613 vector<string> paths;
614 bool recursive =
true;
616 cout <<
"Volume paths:" << endl;
617 ITERATE(vector<string>, path, paths) {
618 cout <<
"\t" << *path << endl;
640 vector<CSeqDBDemo_Thread*> threads;
650 threads.push_back(thread);
658 vector<CSeqDBDemo_Thread*>::iterator
thr;
659 while (!threads.empty()) {
662 vector<CSeqDBDemo_Thread*>,
667 if (!
th->IsRunning()) {
675 (*thr)->Join(
reinterpret_cast<void**
>(&retval));
677 cout <<
"Thread " << (*thr)->GetIndex() <<
" returned "
686 cout <<
"Threads combined returned " << sumval << endl;
710 cout <<
" SeqidToBioseq() provides a basic interface to fetch\n"
711 <<
" sequences from a SeqDB object. Given a Seq-id, the\n"
712 <<
" method returns the first matching CBioseq found in\n"
713 <<
" the database.\n";
721 string str(
"gi|129295");
729 cout <<
" Length of sequence \"" <<
str
732 cout <<
" Failed: could not get length from CSeq_inst."
736 cout <<
" Failed: could not get CSeq_inst from CBioseq."
740 cout <<
" Failed: could not get CBioseq from SeqDB." << endl;
748 int main(
int argc,
char ** argv)
772 list< CRef<ISeqDBDemoCase> > demo_list;
776 bool display_help =
false;
780 for (
int arg = 1; arg < argc; arg++) {
781 char* args = argv[arg];
782 if (
string(args) ==
"-db") {
784 cout <<
"** No database name specified. **\n" << endl;
789 }
else if (
string(args) ==
"-num_threads") {
791 cout <<
"** Number of threads not given. **\n" << endl;
796 }
else if (
string(args) ==
"-oid_batch") {
798 cout <<
"** Number of oids not given. **\n" << endl;
803 }
else if (
string(args) ==
"-list_vols") {
805 }
else if (
string(args) ==
"-no_mmap") {
807 }
else if (
string(args) ==
"-shuffle_oids") {
809 }
else if (
string(args) ==
"-use_ambigs") {
812 TDemoSet::iterator it = demo_set.find(
string(args));
813 if (it == demo_set.end()) {
814 cout <<
"** Sorry, option [" << argv[arg]
815 <<
"] was not found. **\n" << endl;
818 cout <<
"Queueing test [" << argv[arg] <<
"]:" << endl;
819 demo_list.push_back(it->second);
825 cout <<
" This application is meant to be read (as source code),\n"
826 <<
" stepped through in a debugger, or as a minimal test app.\n"
827 <<
" It demonstrates use of the CSeqDB library API to perform\n"
828 <<
" simple and/or common blast database operations.\n";
836 cout <<
"\nAvailable options:\n\n";
839 cout << demo->first <<
":\n";
840 demo->second->DisplayHelp();
Demo for chunk iteration methods (single-threaded).
void x_UseOID(CSeqDB &sp, int oid, int &oid_count, int &length_1000)
Use this OID as part of the set.
virtual void DisplayHelp()
Show description for this test case.
virtual void Run()
Run this test case.
virtual ~CSeqDBDemo_ChunkIteration()
Destructor.
Demo for GetSequence() methods.
virtual ~CSeqDBDemo_GetSequence()
Destructor.
virtual void Run()
Run this test case.
virtual void DisplayHelp()
Show description for this test case.
Demo for fetching a bioseq from a seqid methods.
virtual void DisplayHelp()
Show description for this test case.
virtual ~CSeqDBDemo_SeqidToBioseq()
Destructor.
virtual void Run()
Run this test case.
Demo for simple (single threaded) iteration methods.
virtual void Run()
Run this test case.
virtual void DisplayHelp()
Show description for this test case.
virtual ~CSeqDBDemo_SimpleIteration()
Destructor.
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
const int GetIndex(void) const
long x_UseOID(int letter, int oid)
Use this OID as part of the set.
static bool sm_oid_shuffle
static bool sm_is_protein
bool IsRunning(void) const
static void Init(CSeqDB &db, bool oid_shuffle, bool use_ambigs)
static bool sm_use_ambigs
CSeqDBDemo_Thread(int index, int at_a_time, int max_length)
virtual void OnExit(void)
Override this to execute finalization code.
Demo for chunk iteration methods (multi-threaded).
static void SetListVols(void)
static int sm_OidBatchSize
static void SetUseAmbigs(void)
static void SetNoMmap(void)
virtual ~CSeqDBDemo_Threaded()
Destructor.
virtual void Run()
Run this test case.
static bool sm_OidShuffle
static void SetNumThreads(const int nthreads)
virtual void DisplayHelp()
Show description for this test case.
static void SetDbName(const string &dbname)
static void SetOidShuffle(void)
static void SetOidBatchSize(const int num)
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
int GetNumSeqs() const
Returns the number of sequences available.
EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state=NULL)
Return a chunk of OIDs, and update the OID bookmark.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
int GetAmbigSeq(int oid, const char **buffer, int nucl_code) const
Get a pointer to sequence data with ambiguities.
virtual void DisplayHelp()=0
Show description for this test case.
virtual ~ISeqDBDemoCase()
Destructor.
virtual void Run()=0
Run this test case.
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
const TInst & GetInst(void) const
Get the Inst member data.
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
TLength GetLength(void) const
Get the Length member data.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
void SleepMilliSec(unsigned long ml_sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Multi-threading – mutexes; rw-locks; semaphore.
Defines BLAST database access classes.
int main(int argc, char **argv)
Run one or more test cases.
DEFINE_STATIC_MUTEX(s_mutex)
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
static SLJIT_INLINE sljit_ins nr(sljit_gpr dst, sljit_gpr src)
CRef< CTestThread > thr[k_NumThreadsMax]
static Uint4 letter(char c)