32 #define NCBI_TEST_APPLICATION
52 #ifndef SKIP_DOXYGEN_PROCESSING
55 # define DEV_NULL "nul:"
57 # define DEV_NULL "/dev/null"
65 template<
class A,
class B,
class C,
class D,
class E>
69 oss <<
a <<
b << c << d << e;
79 const char * slice (0);
81 const char *
whole (0);
89 s_ToString(
"Checking NcbiNA8 subsequence range [", begin,
",", end,
"].");
94 BOOST_REQUIRE_MESSAGE(0 == memcmp(slice,
whole + begin, sliceL), op);
103 bool success = db.
GiToOid(nt_gi, oid);
106 oss <<
"GI " << nt_gi <<
" was not found in nt";
108 BOOST_REQUIRE_MESSAGE(success,
msg);
116 for(
int i = 1;
i<length;
i *= 2) {
117 for(
int j = 0; j<length; j +=
i) {
134 return (oid & 1) != 0;
137 return (oid & 1) == 0;
150 for(
unsigned d = 2; d < oid; d++) {
197 unsigned exp_count(0);
205 BOOST_REQUIRE_EQUAL(
first, lowest);
206 BOOST_REQUIRE_EQUAL(
last, highest);
207 BOOST_REQUIRE_EQUAL(
count, exp_count);
210 template<
class NUM,
class DIF>
218 cout <<
"\nMismatch: line " <<
lineno
230 const signed char *
buf = (
const signed char *) buf_in;
246 template<
class ASNOBJ>
264 Int4 num1(0), num2(0);
268 BOOST_REQUIRE(num1 >= 1);
269 BOOST_REQUIRE_EQUAL(num1, num2);
279 Int4 num1(0), num2(0);
283 BOOST_REQUIRE(num1 >= 1);
284 BOOST_REQUIRE_EQUAL(num1, num2);
289 bool caught_exception =
false;
295 Int4 num1(0), num2(0);
299 BOOST_REQUIRE(num1 >= 1);
300 BOOST_REQUIRE_EQUAL(num1, num2);
302 caught_exception =
true;
305 if (! caught_exception) {
306 BOOST_ERROR(
"ConstructMissing() did not throw an exception of type CSeqDBException.");
312 bool caught_exception =
false;
317 caught_exception =
true;
320 if (! caught_exception) {
321 BOOST_ERROR(
"InvalidSeqType() did not throw an exception of type CSeqDBException.");
332 BOOST_REQUIRE(num1 >= 1);
337 bool caught_exception =
false;
345 BOOST_REQUIRE(num1 >= 1);
347 caught_exception =
true;
350 if (! caught_exception) {
351 BOOST_ERROR(
"InvalidPath() did not throw an exception of type CSeqDBException.");
365 BOOST_REQUIRE_EQUAL(
int(100), nseqs);
366 BOOST_REQUIRE_EQUAL(
Uint8(51718), vlength);
370 BOOST_REQUIRE_EQUAL(
string(
"Another test DB for CPPUNIT, SeqDB."),
377 int oid_values[] = { 0, 100000 };
378 for (
auto end_oid : oid_values) {
386 BOOST_REQUIRE(vol2 < vol1);
387 BOOST_REQUIRE_EQUAL(seq2, seq1 - 1);
395 BOOST_REQUIRE(vol3 < vol2);
396 BOOST_REQUIRE_EQUAL(seq3, seq2 - 1);
405 BOOST_CHECK_EQUAL(0
U, vol4);
406 BOOST_CHECK_EQUAL(0, seq4);
410 BOOST_CHECK_EQUAL(10, seq4);
411 BOOST_CHECK(vol4 > 0);
420 Int4 nseqs(0), noids(0);
421 Uint8 vlength(0), tlength(0);
427 BOOST_REQUIRE_EQUAL(
int(100), nseqs);
428 BOOST_REQUIRE_EQUAL(
int(100), noids);
429 BOOST_REQUIRE_EQUAL(
Uint8(26945), tlength);
430 BOOST_REQUIRE_EQUAL(
Uint8(26945), vlength);
434 BOOST_REQUIRE_EQUAL(
string(
"Test database for BLAST unit tests"),
442 char * bufp_blst = 0;
443 char * bufp_ncbi = 0;
463 BOOST_REQUIRE_EQUAL(
Uint4(30118382ul), hashval_blst);
464 BOOST_REQUIRE_EQUAL(
Uint4(3084382219ul), hashval_ncbi);
471 char * bufp_blst = 0;
472 char * bufp_ncbi = 0;
492 BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval_blst);
493 BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval_ncbi);
500 const char * bufp1 = 0;
501 const char * bufp2 = 0;
511 BOOST_REQUIRE_EQUAL(
Uint4(30118382ul), hashval1);
512 BOOST_REQUIRE_EQUAL(
Uint4(3084382219ul), hashval2);
519 const char * bufp1 = 0;
520 const char * bufp2 = 0;
530 BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval1);
531 BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval2);
542 " accession \"BP722514\",\n"
547 " title \"Xenopus laevis NBRP cDNA clone:XL452f07ex, 3' end\",\n"
549 " type str \"ASN1_BlastDefLine\",\n"
552 " label str \"ASN1_BlastDefLine\",\n"
555 " '30803080A0801A3158656E6F707573206C6165766973204E4252502063444E4120\n"
556 "636C6F6E653A584C34353266303765782C20332720656E640000A1803080AB80020402BEFD4300\n"
557 "00AC803080A1801A0842503732323531340000A38002010100000000000000000000A280020100\n"
568 " seq-data ncbi4na '11428288218841844814141422818811214421121482118428221114\n"
569 "82211121141881228484211141128842148481121112222F882124422141148188842112118488\n"
570 "41114822882844214144144148281181'H\n"
586 " accession \"EAI08555\",\n"
591 " title \"unknown [environmental sequence]\",\n"
593 " type str \"ASN1_BlastDefLine\",\n"
596 " label str \"ASN1_BlastDefLine\",\n"
599 " '30803080A0801A20756E6B6E6F776E205B656E7669726F6E6D656E74616C207365\n"
600 "7175656E63655D0000A1803080AB80020402A37A630000A4803080A1801A084541493038353535\n"
601 "0000A38002010100000000000000000000A280020100000000000000'H\n"
611 " seq-data ncbistdaa '0C0A0A0606090B0909060909060B09060909131004160F090A0A0A\n"
612 "0A0B0A0B0D0D010B0D110D0606090F12090D0A0B0904050D0A160D0B0B05051009100B1005'H\n"
624 string expected = (
"Blast-def-line-set ::= {\n"
626 " title \"Xenopus laevis NBRP cDNA clone:XL452f05ex, 3' end\",\n"
630 " accession \"BP722512\",\n"
646 string expected = (
"Blast-def-line-set ::= {\n"
648 " title \"similar to KIAA0960 protein [Mus musculus]\",\n"
652 " accession \"XP_357594\",\n"
668 list< CRef< CSeq_id > > seqids =
685 BOOST_REQUIRE_EQUAL(
Uint4(136774894ul), h);
693 list< CRef< CSeq_id > > seqids =
710 BOOST_REQUIRE_EQUAL(
Uint4(2942938647ul), h);
744 int plen(0), nlen(0);
754 for(
i = 0;
i < plen;
i++) {
764 for(
i = 0;
i < nlen;
i++) {
778 BOOST_REQUIRE_EQUAL(
int(100), nlen);
779 BOOST_REQUIRE_EQUAL(
int(100), plen);
780 BOOST_REQUIRE_EQUAL(
Uint8(26945), ptot);
781 BOOST_REQUIRE_EQUAL(
Uint8(51718), ex_tot);
782 BOOST_REQUIRE_EQUAL(
Uint8(51726), ap_tot);
790 const char * bufp = 0;
796 BOOST_REQUIRE_EQUAL(
Uint4(1128126064ul), hashval);
804 const char * bufp = 0;
809 BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval);
818 int nr_seqs(0), nr_oids(0), sp_seqs(0), sp_oids(0);
819 Uint8 nr_tlen(0), nr_vlen(0), sp_tlen(0), sp_vlen(0);
827 BOOST_REQUIRE_EQUAL(nr_seqs, nr_oids);
828 BOOST_REQUIRE_EQUAL(nr_tlen, nr_vlen);
830 BOOST_REQUIRE_GT(nr_seqs, sp_seqs);
831 BOOST_REQUIRE_NE(nr_oids, sp_oids);
832 BOOST_REQUIRE_GT(nr_tlen, sp_tlen);
833 BOOST_REQUIRE_NE(nr_vlen, sp_vlen);
843 const char * seqid_list[] = {
844 "AAA03612.1",
"prf||1922246A",
"P51728.1",
"AAB84238.1",
"BAA25256.1",
"AAC15878.1",
845 "1A8U_A",
"AAC82254.1",
"AAD31141.1",
"1R24_A",
"AAF63214.1",
"AAF95963.1",
846 "WP_003095644.1",
"AAC59341.1"
850 1153908, 507276, 851580, 200775, 1028308, 939134,
851 199107, 511756, 27645, 429124, 575812, 648744,
856 199, 233, 186, 441, 96, 206,
857 277, 205, 110, 206, 510, 293,
866 BOOST_REQUIRE((L_seqid == L_len) && (L_len == L_pig));
868 for(
size_t i = 0;
i<L_seqid;
i++) {
869 string arr_seqid(seqid_list[
i]);
870 int arr_pig(pig_list[
i]), arr_len(len_list[
i]);
871 vector<int> seqid2oid;
872 int pig2oid = 0, oid2pig=0, oid2len =0;
873 nr.AccessionToOids(arr_seqid, seqid2oid);
875 BOOST_REQUIRE(
nr.PigToOid(arr_pig, pig2oid));
877 BOOST_CHECK_EQUAL(pig2oid, seqid2oid[0]);
878 BOOST_REQUIRE(pig2oid !=
int(-1));
880 oid2len =
nr.GetSeqLength(pig2oid);
881 BOOST_REQUIRE(
nr.OidToPig (pig2oid, oid2pig));
883 BOOST_REQUIRE_EQUAL(arr_len, oid2len);
884 BOOST_REQUIRE_EQUAL(arr_pig, oid2pig);
890 const string kDb(
"nr");
895 const Uint4 NUM_ITEMS = 6;
897 const char ** str_list[NUM_ITEMS];
900 {
"AAP90615.1",
"AAP90628.1",
"AAP90641.1",
"AAP90654.1",
"AAP90667.1", 0 };
904 {
"1NPQ",
"1NPQ_A",
"1NPQ_B", 0 };
908 {
"1LCT_A",
"1LCT", 0 };
910 {
"1GWB_A",
"1GWB_B",
"1GWB", 0 };
919 Uint4 * len_list[NUM_ITEMS];
921 Uint4 l0[] = { 261, 0 };
922 Uint4 l1[] = { 232, 0 };
923 Uint4 l2[] = { 17, 90, 0 };
924 Uint4 l3[] = { 17, 90, 0 };
925 Uint4 l4[] = { 333, 0 };
926 Uint4 l5[] = { 281, 0 };
940 BOOST_REQUIRE_EQUAL(NUM_ITEMS, L_str);
941 BOOST_REQUIRE_EQUAL(NUM_ITEMS, L_len);
943 for(
Uint4 i = 0;
i< NUM_ITEMS;
i++) {
949 for(
const char ** strp = str_list[
i]; *strp; strp++) {
951 nr.AccessionToOids(*strp, oids);
953 BOOST_REQUIRE_MESSAGE(! oids.empty(),
"Failed to find accession "
954 << *strp <<
" in " <<
kDb);
956 ITERATE(vector<int>, iter, oids) {
965 str_iter = str_oids.
begin();
969 Uint4 * llp = len_list[
i];
976 oid_len.
insert(
nr.GetSeqLength(*iter));
981 oid_iter = oid_len.
begin();
982 exp_iter = exp_len.
begin();
984 while(oid_iter != oid_len.
end()) {
985 BOOST_REQUIRE(exp_iter != exp_len.
end());
986 BOOST_REQUIRE_EQUAL(*oid_iter, *exp_iter);
1001 const char *
dbname =
"nt";
1003 bool is_prot =
false;
1007 oss_fn <<
"." <<
dbname <<
"." << gi;
1009 vector<char> seqdb_data;
1010 vector<char> expected_data;
1019 bool gi_trans = db.
GiToOid(gi, oid);
1021 BOOST_REQUIRE(gi_trans);
1030 BOOST_REQUIRE(! bs.
Empty());
1034 BOOST_REQUIRE_EQUAL(
int(seqdb_data.size()), 872);
1037 string expected_bs =
1042 " accession \"AJ389663\",\n"
1050 " seq-data ncbi4na '42184822114812141288821418148411122424118442821881118214\n"
1051 "824144882288141824882211822512824418112848442118828141428118121842111211428224\n"
1052 "122228888112244444411141424288881881418211112211842444888848282442118222428211\n"
1053 "288884484128284418112888484284182421244222824142244241248182888211184828422281\n"
1054 "821128881482488124841818422811241448848812444811244441182144488241882244141444\n"
1055 "142184141112442812212182211441144214214424242111881222128222442124444144814841\n"
1056 "241111181124184244412828182414422224811824411841481212888111822888112414418211\n"
1057 "884414442114828448422142142242448118822142822118142481818811148848842148811111\n"
1058 "428248148844182824444411442814244864242248844424822812842824122841228122442244\n"
1059 "814888484222414484282884128414848282444841224424148881288841111118814148428211\n"
1060 "142144228848422422241181484484218441181184411414412282448828188884884488882441\n"
1061 "124841448118418811414441214124444421688248188424424281414484111882884412242242\n"
1062 "11412441281284241114218884221142184888821881FFF1141124111482141448824114124182\n"
1063 "141812248244814882841221811124FFF241284424182243241148812812818412824424442142\n"
1064 "228214441112211148288844488224444411481844884F11142841112881114411884124411444\n"
1065 "212212214414844142284244288118884128211212444111128212224422244121224841441884\n"
1066 "121418841414282888282418824484448448448421844224882881488448441424188848284488\n"
1067 "11882241811241124141282814228428111814822A224188242228182482442144412882881414\n"
1068 "441241484424818142212424141884142118112144828484184222881418488244442242124242\n"
1069 "428121284114411821421248284228222844222411144488444811222428411228228824842814\n"
1070 "441884444288481188488222218411241441188222148114242414821811428242488418812482\n"
1071 "228422288848121212242224824281281221188414244888128414441211441884422224124144\n"
1072 "24282244248282842448A88842241411284222211148421284'H\n"
1077 "GCATGTCCAAGTACAGACTTTCAGATAGTGAAACCGCGAATGGCTCATTAAATCAGTCGA"
1078 "GGTTCCTTAGATCGTTCCAATCCRACTCGGATAACTGTGGCAATTCTAGAGCTAATACAT"
1079 "GCAAACAAGCTCCGACCCCTTTTAACCGGGGGGAAAGAGCGCTTTTATTAGATCAAAACC"
1080 "AATGCGGGTTTTGTCTCGGCAATCCCGCTCAACTTTTGGTGACTCTGGATAACTTTGTGC"
1081 "TGATCGCACGGCCCTCGAGCCGGCGACGTATCTTTCAAATGTCTGCCCTATCAACTTTAG"
1082 "TCGTTACGTGATATGCCTAACGAGGTTGTTACGGGTAACGGGGAATCAGGGTTCGATTCC"
1083 "GGAGAGGGAGCATGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTAC"
1084 "CCACTCCCGGCACGGGGAGGTAGTGACGAAAAATAACGATGCGGGACTCTATCGAGGCCC"
1085 "CGTAATCGGAATGAGTACACTTTAAATCCTTTAACGAGGATCAATTGGAGGGCAAGTCTG"
1086 "GTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAA"
1087 "AAGCTCGTAGTTGGATCTCGGGGGAAGGCTAGCGGTSGCGCCGTTGGGCGTCCTACTGCT"
1088 "CGACCTGACCTACCGGCCGGTAGTTTGTGCCCGAGGTGCTCTTGACTGAGTGTCTCGGGT"
1089 "GACCGGCGAGTTTACTTTGAAAAAATTAGAGTGCTCAAAGCAGGCCTTGTGCCGCCCGAA"
1090 "TAGTGGTGCATGGAATAATGGAAGAGGACCTCGGTTCTATTTTGTTGGTTTTCGGAACGT"
1091 "GAGGTAATGATTAAGAGGGACAGACGGGGGCA";
1093 expected_data.assign(data_str.data(),
1094 data_str.data() + data_str.size());
1096 vector<char> seqdb_tmp;
1105 seqdb_tmp.swap(seqdb_data);
1107 BOOST_REQUIRE_EQUAL(expected_bs, seqdb_bs);
1108 BOOST_REQUIRE_EQUAL(expected_data.size(), seqdb_data.size());
1110 Uint4 num_diffs = 0;
1112 for(
Uint4 i = 0;
i < expected_data.size();
i++) {
1113 unsigned R = unsigned(expected_data[
i]) & 0xFF;
1114 unsigned S = unsigned(seqdb_data[
i]) & 0xFF;
1120 cout <<
"At location " << dec <<
i <<
", Readdb has: " <<
hex <<
int(
R) <<
" whereas SeqDB has: " <<
hex <<
int(
S);
1123 cout <<
" (R += " << (
R -
S) <<
")\n";
1125 cout <<
" (S += " << (
S -
R) <<
")\n";
1133 cout <<
"Num diffs: " << dec << num_diffs << endl;
1136 BOOST_REQUIRE_EQUAL((
int) 0, (
int)num_diffs);
1142 bool caught_exception =
false;
1151 BOOST_REQUIRE_EQUAL((
int) 11112222,
len);
1153 caught_exception =
true;
1156 if (! caught_exception) {
1157 BOOST_ERROR(
"GetLenHighOID() did not throw an exception of type CSeqDBException.");
1164 bool caught_exception =
false;
1170 BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1172 caught_exception =
true;
1175 if (! caught_exception) {
1176 BOOST_ERROR(
"GetLenNegOID() did not throw an exception of type CSeqDBException.");
1183 bool caught_exception =
false;
1194 BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1196 caught_exception =
true;
1199 if (! caught_exception) {
1200 BOOST_ERROR(
"GetSeqHighOID() did not throw an exception of type CSeqDBException.");
1207 bool caught_exception =
false;
1215 BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1217 caught_exception =
true;
1220 if (! caught_exception) {
1221 BOOST_ERROR(
"GetSeqNegOID() did not throw an exception of type CSeqDBException.");
1228 bool caught_exception =
false;
1237 nr.GetOidAtOffset(0, vlength + 1);
1239 caught_exception =
true;
1242 if (! caught_exception) {
1243 BOOST_ERROR(
"Offset2OidBadOffset() did not throw an exception of type CSeqDBException.");
1250 bool caught_exception =
false;
1258 nr.GetOidAtOffset(noids + 1, 0);
1260 caught_exception =
true;
1263 if (! caught_exception) {
1264 BOOST_ERROR(
"Offset2OidBadOid() did not throw an exception of type CSeqDBException.");
1271 Uint4 segments = 1000;
1274 string dbname((
i == 0) ?
"nr" :
"nt");
1283 Uint8 vol_length(0);
1287 for(
Uint4 j = 0; j < segments; j++) {
1288 Uint8 range_target = (vol_length * j) / segments;
1292 double range_ratio = double(range_target) / vol_length;
1293 double oid_ratio = double(oid_here) / num_oids;
1294 double percent_diff = 100.0 *
fabs(oid_ratio - range_ratio);
1303 BOOST_REQUIRE(prev_oid <= oid_here);
1304 BOOST_REQUIRE(percent_diff <= 30.0);
1306 prev_oid = oid_here;
1349 vector<string>
names;
1351 names.push_back(
"p,nr");
1352 names.push_back(
"n,nt");
1353 names.push_back(
"n,pdbnt");
1354 names.push_back(
"p,pdb");
1355 names.push_back(
"p,CDSEARCH/oasis_pfam");
1358 BOOST_REQUIRE(s->length() > 2);
1360 char prot_nucl = (*s)[0];
1361 string dbname(*s, 2, s->length()-2);
1372 const char * mask_name[] = {
1373 "range",
"odd",
"even",
"prime",
"ERROR"
1407 for(
int i = 0; ranges[
i];
i += 2) {
1408 unsigned first = ranges[
i];
1409 unsigned second = ranges[
i+1];
1418 int obegin(0), oend(0);
1421 int lowest(INT_MAX);
1437 unsigned num_found(0);
1440 num_found = (
int) oids.size();
1442 ITERATE(vector<int>, iter, oids) {
1443 if ((*iter) > highest) {
1447 if ((*iter) < lowest) {
1454 num_found = oend-obegin;
1457 if (oend > highest) {
1461 if (obegin < lowest) {
1466 for(
int v = obegin; v < oend; v++) {
1471 if (obegin == oend) {
1486 TGi low_gi = 20*1000*1000;
1487 TGi high_gi = 30*1000*1000;
1493 dbs.push_back(
"data/seqp");
1494 dbs.push_back(
"data/ranges/seqp15");
1495 dbs.push_back(
"data/ranges/twenty");
1496 dbs.push_back(
"data/ranges/twenty15");
1498 for(
Uint4 dbnum = 0; dbnum < dbs.size(); dbnum++) {
1501 bool all_gis_in_range =
true;
1502 bool all_oids_in_range =
true;
1506 if (! (all_oids_in_range || all_gis_in_range)) {
1510 if (all_oids_in_range) {
1511 if ((oid < (low_oid-1)) || ((high_oid-1)) < oid) {
1512 all_oids_in_range =
false;
1516 if (all_gis_in_range) {
1517 list< CRef<CSeq_id> > ids = db.
GetSeqIDs(oid);
1519 bool gi_in_range =
false;
1522 if ((**seqid).IsGi()) {
1523 TGi gi = (**seqid).GetGi();
1525 if ((gi > low_gi) && (gi < high_gi)) {
1532 if (! gi_in_range) {
1533 all_gis_in_range =
false;
1538 bool gis_confined (
false);
1539 bool oids_confined(
false);
1543 gis_confined =
false;
1544 oids_confined =
false;
1548 gis_confined =
false;
1549 oids_confined =
true;
1553 gis_confined =
true;
1554 oids_confined =
false;
1558 gis_confined =
true;
1559 oids_confined =
true;
1563 BOOST_REQUIRE_EQUAL(oids_confined, all_oids_in_range);
1564 BOOST_REQUIRE_EQUAL(gis_confined, all_gis_in_range);
1571 bool caught_exception =
false;
1576 caught_exception =
true;
1579 if (! caught_exception) {
1580 BOOST_ERROR(
"EmptyDBList() did not throw an exception of type CSeqDBException.");
1622 BOOST_REQUIRE_EQUAL(29, found);
1639 BOOST_REQUIRE_EQUAL(29, found);
1654 BOOST_REQUIRE_EQUAL(58, found);
1660 const string kFileName(
"data/prot345t.gil");
1666 BOOST_REQUIRE_EQUAL((
size_t) seqdbgifile.
GetNumGis(), gis.size());
1667 sort(gis.begin(), gis.end());
1671 ifstream gifile(fn.c_str());
1672 BOOST_REQUIRE(gifile);
1674 vector<TGi> reference;
1675 reference.reserve(gis.size());
1676 while ( !gifile.eof() ) {
1679 if (tgi == -1)
break;
1682 sort(reference.begin(), reference.end());
1683 BOOST_REQUIRE_EQUAL(reference.size(), gis.size());
1686 for (
size_t i = 0;
i < reference.size();
i++) {
1688 BOOST_REQUIRE_MESSAGE(reference[
i] == gis[
i],
msg);
1696 dbs.push_back(
"Test/Giardia.01");
1697 dbs.push_back(
"Test/baylor_wgs_contigs.01");
1698 dbs.push_back(dbs[0] +
" " + dbs[1]);
1700 vector< vector<TGi> > gis(dbs.size());
1701 vector< vector<string> > volumes(dbs.size());
1703 for(
int i = 0;
i < (
int)dbs.size();
i++) {
1713 db->
GetGis(oid, gis[
i],
true);
1719 BOOST_REQUIRE(volumes[0] == volumes[1]);
1720 BOOST_REQUIRE(volumes[0] == volumes[2]);
1721 BOOST_REQUIRE_EQUAL(gis[0].
size() + gis[1].
size(), gis[2].
size());
1723 vector<TGi> zero_one(gis[0]);
1724 zero_one.insert(zero_one.end(), gis[1].begin(), gis[1].end());
1726 sort(zero_one.begin(), zero_one.end());
1727 sort(gis[2].begin(), gis[2].end());
1729 BOOST_REQUIRE(zero_one == gis[2]);
1735 TGi gi1a = 446106212;
1738 TGi gi2a = 494110381;
1739 TGi gi2b = 30172867;
1748 bool success = db.
GiToOid(gi1a, oid1);
1749 BOOST_REQUIRE(success);
1751 success = db.
GiToOid(gi2a, oid2);
1752 BOOST_REQUIRE(success);
1754 BOOST_REQUIRE(oid1 != oid2);
1759 BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 44);
1760 BOOST_REQUIRE_EQUAL(gi2taxid[gi1a],
tax1);
1763 BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 23);
1764 BOOST_REQUIRE_EQUAL(gi2taxid[gi2a], tax2a);
1765 BOOST_REQUIRE_EQUAL(gi2taxid[gi2b], tax2b);
1768 BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 67);
1769 BOOST_REQUIRE_EQUAL(gi2taxid[gi1a],
tax1);
1770 BOOST_REQUIRE_EQUAL(gi2taxid[gi2a], tax2a);
1771 BOOST_REQUIRE_EQUAL(gi2taxid[gi2b], tax2b);
1774 #define BEGIN(X) (X)
1775 #define END(X) ((X) + (sizeof (X) / sizeof *(X)))
1780 TGi gi1a = 446106212;
1789 TGi gi2a = 494110381;
1802 bool success = db.
GiToOid(gi1a, oid1);
1803 BOOST_REQUIRE(success);
1805 success = db.
GiToOid(gi2a, oid2);
1806 BOOST_REQUIRE(success);
1808 BOOST_REQUIRE(oid1 != oid2);
1819 BOOST_REQUIRE(gi2taxids.
empty());
1821 BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 44);
1822 BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi1a].
size(), 5);
1823 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1824 gi2taxids[gi1a].begin(), gi2taxids[gi1a].end(),
1829 BOOST_REQUIRE(!gi2taxids.
empty());
1831 BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 23);
1832 BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi2a].
size(), 4);
1833 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1834 gi2taxids[gi2a].begin(), gi2taxids[gi2a].end(),
1835 expected2a.
begin(), expected2a.
end()
1839 BOOST_REQUIRE(!gi2taxids.
empty());
1841 BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 67);
1842 BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi1a].
size(), 5);
1843 BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi2a].
size(), 4);
1844 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1845 gi2taxids[gi1a].begin(), gi2taxids[gi1a].end(),
1848 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1849 gi2taxids[gi2a].begin(), gi2taxids[gi2a].end(),
1850 expected2a.
begin(), expected2a.
end()
1857 TGi gi1a = 446106212;
1905 TGi gi2a = 494110381;
1935 vector<int> expected1;
1937 sort(expected1.begin(), expected1.end());
1939 vector<int> expected2a;
1940 expected2a.assign(
BEGIN(tax2a),
END(tax2a));
1941 sort(expected2a.begin(), expected2a.end());
1945 bool success = db.
GiToOid(gi1a, oid1);
1946 BOOST_REQUIRE(success);
1948 success = db.
GiToOid(gi2a, oid2);
1949 BOOST_REQUIRE(success);
1951 BOOST_REQUIRE(oid1 != oid2);
1953 vector<TTaxId> taxids;
1957 sort(taxids.begin(), taxids.end());
1958 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected1.size());
1959 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1960 taxids.begin(), taxids.end(),
1961 expected1.begin(), expected1.end()
1966 sort(taxids.begin(), taxids.end());
1967 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
1968 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1969 taxids.begin(), taxids.end(),
1970 expected2a.begin(), expected2a.end()
1978 sort(expected2a.begin(), expected2a.end());
1982 sort(taxids.begin(), taxids.end());
1983 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
1984 BOOST_REQUIRE_EQUAL_COLLECTIONS(
1985 taxids.begin(), taxids.end(),
1986 expected2a.begin(), expected2a.end()
1993 TGi gi1a = 446106212;
1995 1386, 1392, 1396, 1428, 1234146
1998 TGi gi2a = 494110381;
2000 1678, 216816, 469594, 1263059
2006 vector<int> expected1;
2008 sort(expected1.begin(), expected1.end());
2010 vector<int> expected2a;
2011 expected2a.assign(
BEGIN(tax2a),
END(tax2a));
2012 sort(expected2a.begin(), expected2a.end());
2016 bool success = db.
GiToOid(gi1a, oid1);
2017 BOOST_REQUIRE(success);
2019 success = db.
GiToOid(gi2a, oid2);
2020 BOOST_REQUIRE(success);
2022 BOOST_REQUIRE(oid1 != oid2);
2024 vector<TTaxId> taxids;
2028 sort(taxids.begin(), taxids.end());
2029 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected1.size());
2030 BOOST_REQUIRE_EQUAL_COLLECTIONS(
2031 taxids.begin(), taxids.end(),
2032 expected1.begin(), expected1.end()
2037 sort(taxids.begin(), taxids.end());
2038 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
2039 BOOST_REQUIRE_EQUAL_COLLECTIONS(
2040 taxids.begin(), taxids.end(),
2041 expected2a.begin(), expected2a.end()
2049 sort(expected2a.begin(), expected2a.end());
2053 sort(taxids.begin(), taxids.end());
2054 BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
2055 BOOST_REQUIRE_EQUAL_COLLECTIONS(
2056 taxids.begin(), taxids.end(),
2057 expected2a.begin(), expected2a.end()
2078 const int kNumTestGis = 3;
2079 const int kGiOids[kNumTestGis] = { 15, 51, 84 };
2085 vector<int> oid_list;
2094 BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2102 BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2110 BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2126 oids1.push_back(oid);
2129 BOOST_REQUIRE(! oids2.empty());
2131 ITERATE(vector<int>, iter, oids1) {
2132 BOOST_REQUIRE(*iter == oids2[0]);
2141 const int kFirstOid(0);
2142 const int kLastOid(100);
2146 vector<int> oid_list;
2151 BOOST_REQUIRE_EQUAL(kFirstOid, start);
2152 BOOST_REQUIRE_EQUAL(kLastOid, end);
2156 BOOST_REQUIRE_EQUAL(kFirstOid, start);
2157 BOOST_REQUIRE_EQUAL(kFirstOid, end);
2162 BOOST_REQUIRE_EQUAL(kFirstOid, start);
2163 BOOST_REQUIRE_EQUAL(kLastOid, end);
2180 BOOST_REQUIRE_EQUAL(
info.taxid, 57176);
2181 BOOST_REQUIRE_EQUAL((
string)
info.scientific_name,
string(
"Aotus vociferans"));
2182 BOOST_REQUIRE_EQUAL((
string)
info.common_name,
string(
"noisy night monkey"));
2183 BOOST_REQUIRE_EQUAL((
string)
info.blast_name,
string(
"primates"));
2184 BOOST_REQUIRE_EQUAL((
string)
info.s_kingdom,
string(
"Eukaryota"));
2187 BOOST_REQUIRE_EQUAL(
info.taxid, 562);
2202 int slen(0),alen(0);
2208 unsigned exp_hash = 705445389u;
2210 BOOST_REQUIRE_EQUAL((290/4) + 1, slen);
2211 BOOST_REQUIRE_EQUAL(20, alen);
2212 BOOST_REQUIRE_EQUAL(exp_hash, h);
2229 ITERATE(vector<int>, oid, oids) {
2230 int slen(0),alen(0);
2236 string A(
buffer + slen, alen);
2240 BOOST_REQUIRE_EQUAL((
int)
A.size(), 0);
2241 BOOST_REQUIRE_EQUAL((
int)
S.size(),
len);
2242 BOOST_REQUIRE_EQUAL((
int) *(
buffer-1), 0);
2243 BOOST_REQUIRE_EQUAL((
int) *(
buffer+slen), 0);
2259 int slen(0),alen(0);
2263 BOOST_REQUIRE_EQUAL((290/4) + 1, slen);
2264 BOOST_REQUIRE_EQUAL(20, alen);
2274 int low(0), high(0),
count(0);
2276 nr.GetPigBounds(& low, & high, &
count);
2278 BOOST_REQUIRE(low < high);
2279 BOOST_REQUIRE(
count);
2286 bool caught_exception =
false;
2297 BOOST_REQUIRE(low < high);
2298 BOOST_REQUIRE(
count);
2303 int low(0), high(0),
count(0);
2307 BOOST_REQUIRE(low < high);
2308 BOOST_REQUIRE(
count);
2310 caught_exception =
true;
2313 if (! caught_exception) {
2314 BOOST_ERROR(
"ExpertIdBoundsNoPig() did not throw an exception of type CSeqDBException.");
2321 typedef pair<bool, string> TStringBool;
2322 typedef vector< TStringBool > TStringBoolVec;
2324 TStringBoolVec paths;
2325 paths.push_back(TStringBool(
true,
"nt.000.nin"));
2326 paths.push_back(TStringBool(
true,
"Test/ITS_RefSeq_Fungi.nal"));
2327 paths.push_back(TStringBool(
true,
"taxdb.bti"));
2328 paths.push_back(TStringBool(
true,
"data/seqp.pin"));
2329 paths.push_back(TStringBool(
false,
"nr.00"));
2333 ITERATE(TStringBoolVec, iter, paths) {
2334 string filename = iter->second;
2336 bool found = ! resolved.empty();
2339 int position = resolved.find(filename);
2341 BOOST_REQUIRE(found);
2344 BOOST_REQUIRE(resolved.size() > filename.size());
2347 BOOST_REQUIRE_EQUAL(position + filename.size(), resolved.size());
2349 BOOST_REQUIRE(! found);
2359 for(
size_t i = 0;
i < gis.size();
i++) {
2378 a3.push_back(special);
2380 for(
Uint4 i = 0; (
i*3) < 500;
i++) {
2389 a5.push_back(special);
2398 if (((
i % 15) == 0) || (gi == special)) {
2399 BOOST_REQUIRE(
true == both.
FindGi(gi));
2401 BOOST_REQUIRE(
false == both.
FindGi(gi));
2419 a3.push_back(special);
2421 for(
Uint4 i = 0; (
i*3) < 500;
i++) {
2430 a5.push_back(special);
2441 for(
int i = 0;
i < (
int)a5.size();
i++) {
2443 BOOST_REQUIRE(
false == both.
FindGi(a5[
i]));
2445 BOOST_REQUIRE(
true == both.
FindGi(a5[
i]));
2452 BOOST_REQUIRE(std::find(a5.begin(), a5.end(), gi) != a5.end());
2469 a3.push_back(special);
2471 for(
int i = 0; (
i*3) < 500;
i++) {
2480 a5.push_back(special);
2489 BOOST_REQUIRE(calc->IsPositive());
2492 for(
int i = 0;
i < 500;
i++) {
2493 bool is_3 = ((
i % 3) == 0) || (
i == special);
2494 bool is_5 = ((
i % 5) == 0) || (
i == special);
2496 if (is_3 && (! is_5)) {
2497 BOOST_REQUIRE(
true == and_not->
FindGi(
i));
2499 BOOST_REQUIRE(
false == and_not->
FindGi(
i));
2517 m2.push_back(special);
2518 m3.push_back(special);
2519 m5.push_back(special);
2520 m7.push_back(special);
2522 for(
int i = 0;
i < 1000;
i++) {
2537 m2.push_back(special);
2538 m3.push_back(special);
2539 m5.push_back(special);
2540 m7.push_back(special);
2575 BOOST_REQUIRE(! not_m5_ornot_m7.
IsPositive());
2593 for(
int i = 0;
i < 1000;
i++) {
2594 bool d2(!(
i%2)), d3(!(
i%3)), d5(!(
i%5)), d7(!(
i%7));
2597 d2 = d3 = d5 = d7 =
true;
2604 bool in_c1 = ( d2 && !d3) || ( d5 && !d7);
2605 bool in_c2 = (!d2 || d3) && ( d5 != d7);
2606 bool in_c3 = ( d2 || !d3) && (!d5 || !d7);
2608 BOOST_REQUIRE_EQUAL(in_c1, c1p->
FindGi(
i));
2609 BOOST_REQUIRE_EQUAL(in_c2, c2p->
FindGi(
i));
2610 BOOST_REQUIRE_EQUAL(in_c3, ! c3n->
FindGi(
i));
2630 46071115, 46071116, 46071117, 46071118, 46071119,
2631 46071120, 46071121, 46071122, 46071123, 46071124,
2632 46071125, 46071126, 46071127, 46071128, 46071129,
2633 46071130, 46071131, 46071132, 46071133, 46071134 };
2635 BOOST_REQUIRE((
sizeof(v1)/
sizeof(
int)) == 20);
2637 vector<int>
all(v1, v1 + 20);
2638 vector<int> mid(v1 + 5, v1 + 15);
2653 string nm =
"data/seqn";
2658 CSeqDB db_A(nm, ty, All);
2659 CSeqDB db_M(nm, ty, Mid);
2660 CSeqDB db_N(nm, ty, Neg);
2661 CSeqDB db_TB(nm, ty, TopBot);
2662 CSeqDB db_NTB(nm, ty, NotTopBot);
2671 bool A_have =
s_DbHasOID(db_A, A_count, oid);
2672 bool M_have =
s_DbHasOID(db_M, M_count, oid);
2673 bool N_have =
s_DbHasOID(db_N, N_count, oid);
2674 bool TB_have =
s_DbHasOID(db_TB, TB_count, oid);
2675 bool NTB_have =
s_DbHasOID(db_NTB, NTB_count, oid);
2677 BOOST_REQUIRE((! M_have) || A_have);
2678 BOOST_REQUIRE(A_have != N_have);
2679 BOOST_REQUIRE((! TB_have) || A_have);
2681 BOOST_REQUIRE((!M_have) || (!N_have));
2682 BOOST_REQUIRE((!M_have) || (!TB_have));
2683 BOOST_REQUIRE((!M_have) || NTB_have);
2685 BOOST_REQUIRE((!N_have) || (!TB_have));
2686 BOOST_REQUIRE((!N_have) || NTB_have);
2688 BOOST_REQUIRE(TB_have != NTB_have);
2693 BOOST_REQUIRE_EQUAL(
NSEQ, 100);
2695 BOOST_REQUIRE_EQUAL(A_count, 20);
2696 BOOST_REQUIRE_EQUAL(M_count, 10);
2697 BOOST_REQUIRE_EQUAL(N_count,
NSEQ-A_count);
2698 BOOST_REQUIRE_EQUAL(TB_count, A_count - M_count);
2699 BOOST_REQUIRE_EQUAL(NTB_count + TB_count, 100);
2703 BOOST_REQUIRE(! idset_TB.
Blank());
2712 const char *s1 = 0, *s2 = 0;
2718 BOOST_REQUIRE(
string(s1) ==
string(s2));
2741 for(
const char ** p =
str; *p; p++) {
2742 if ((*p)[0] ==
'#') {
2765 const char *
str[] =
2779 BOOST_REQUIRE_EQUAL((
int)ids->GetNumSis(), 9);
2783 for(
int i = 0;
i < ids->GetNumSis();
i++) {
2784 BOOST_REQUIRE(ids->GetSiOid(
i).oid == -1);
2791 for(
int i = 0;
i < ids->GetNumSis();
i++) {
2792 BOOST_CHECK_MESSAGE(ids->GetSiOid(
i).oid != -1,
2793 "Seqid " << ids->GetSiOid(
i).si <<
" is unresolved");
2805 BOOST_REQUIRE_EQUAL(k, ids->GetNumSis());
2814 BOOST_REQUIRE( dbp.
GiToOid(gi, the_oid));
2815 BOOST_REQUIRE_EQUAL(oid, the_oid);
2822 BOOST_REQUIRE( dbp.
GiToOid(gi, the_oid));
2823 BOOST_REQUIRE_EQUAL(oid, the_oid);
2831 const char *
str[] = {
2849 "sp|Q63931|CCKR_CAVPO",
2872 BOOST_REQUIRE_EQUAL((
int)ids->GetNumSis(), 12);
2873 BOOST_REQUIRE_EQUAL((
int)ids->GetNumGis(), 13);
2878 for(
i = 0;
i < ids->GetNumSis();
i++) {
2879 BOOST_REQUIRE(ids->GetSiOid(
i).oid == -1);
2881 for(
i = 0;
i < ids->GetNumGis();
i++) {
2882 BOOST_REQUIRE(ids->GetGiOid(
i).oid == -1);
2890 for(
i = 0;
str[
i];
i++) {
2894 if (
str[
i][0] ==
'#') {
2895 int gi = atoi(
str[
i] + 1);
2896 found = ids->GiToOid(gi, oid);
2899 found = ids->SiToOid(str_id, oid);
2902 BOOST_REQUIRE_EQUAL(found,
true);
2904 if (
i >= 0 &&
i < 4) {
2905 BOOST_REQUIRE_EQUAL(oid, -1);
2906 }
else if (
i >= 15 &&
i < 25) {
2908 cout <<
"oid = -1, id=" <<
str[
i] << endl;
2911 BOOST_REQUIRE(oid != -1);
2919 const char * inter[] = {
2925 "gi|28378617",
"ref|NP_785509.1|",
2926 "gi|23474175",
"ref|ZP_00129469.1|",
2927 "gi|27364740",
"ref|NP_760268.1|",
2928 "gi|23113886",
"ref|ZP_00099225.1|",
2929 "gi|28563952",
"ref|NP_788261.1|",
2930 "gi|29788717",
"gb|AAP03339.1|",
2931 "gi|29566344",
"ref|NP_817911.1|",
2932 "gi|28950006",
"emb|CAD70761.1|",
2933 "gi|21305377",
"gb|AAM45611.1|",
2939 for(
const char ** p = inter; *p; p++)
2946 typedef list< CRef<CSeq_id> > TIds;
2950 ITERATE(TIds, iter, the_ids) {
2954 BOOST_REQUIRE(itr != need.
end());
2961 BOOST_REQUIRE(need.
empty());
2972 BOOST_REQUIRE_EQUAL((
string)db.
GetTitle(),
string(
"empty test database"));
2979 vector<TTaxId> taxids;
2989 char * ncbuffer = 0;
3007 BOOST_REQUIRE_EQUAL((
string)db.
GetTitle(),
string(
"empty test database"));
3008 BOOST_REQUIRE_EQUAL((
string)db.
GetDate(),
string(
"Mar 19, 2007 11:38 AM"));
3015 Uint8 seq_total = 0;
3022 BOOST_REQUIRE_EQUAL(oid_count, 0);
3023 BOOST_REQUIRE_EQUAL(seq_total,
Uint8(0));
3026 BOOST_REQUIRE_NO_THROW(db.
Begin());
3032 int begin(0), end(0);
3039 BOOST_REQUIRE_EQUAL(
size_t(0), oids.size());
3041 BOOST_REQUIRE_EQUAL(begin, end);
3045 BOOST_REQUIRE_EQUAL((
string)db.
GetDBNameList(),
string(
"data/empty"));
3050 string acc(
"P01013");
3051 CSeq_id seqid(
"sp|P01013|OVALX_CHICK");
3062 BOOST_REQUIRE_EQUAL(
false, db.
PigToOid(pig, oid));
3063 BOOST_REQUIRE_EQUAL(
false, db.
GiToOid(gi, oid));
3064 BOOST_REQUIRE_EQUAL(
false, db.
GiToPig(gi, pig));
3065 BOOST_REQUIRE_EQUAL(
false, db.
PigToGi(pig, gi));
3067 BOOST_REQUIRE(oids.size() == 0);
3068 BOOST_REQUIRE_NO_THROW(db.
SeqidToOids(seqid, oids));
3069 BOOST_REQUIRE(oids.size() == 0);
3070 BOOST_REQUIRE_EQUAL(
false, db.
SeqidToOid(seqid, oid));
3072 Uint8 residue(12345);
3082 vector<string> paths1;
3083 vector<string> paths2;
3091 BOOST_REQUIRE_EQUAL(paths1.size(),
size_t(1));
3092 BOOST_REQUIRE_EQUAL(paths2.size(),
size_t(1));
3093 BOOST_REQUIRE_EQUAL((
string)paths1[0], (
string)paths2[0]);
3119 BOOST_REQUIRE(!sd.
Empty());
3126 BOOST_REQUIRE(!sd.
Empty());
3135 for(
int di = 0; di < 2; di++) {
3136 CSeqDB & db = di ? db65 : db56;
3138 for(
int oi = 0; oi < 2; oi++) {
3139 list< CRef<CSeq_id> > ids = db.
GetSeqIDs(oi);
3144 while(! ids.empty()) {
3145 const CSeq_id &
id = *ids.front();
3148 id.GetGeneral().GetDb() ==
"BL_ORD_ID") {
3157 BOOST_REQUIRE(
count == 1);
3158 BOOST_REQUIRE(oid == oi);
3171 TGi nucl_gi = 46071107;
3172 string nucl_str = (
"AAGCTCTTCATTGATGGTAGAGAGCCTATTAACAGGCAAC"
3173 "AGTCAATGCTCCAAAGTCCAAACAAGATTACCTGTGCAAA"
3174 "GAACTTGCAGTGTAACAAACCCCNTTCACGGCCAGAAGTA"
3175 "TTTGCAACAATGTTGAAAGTCCTTCTGGCAGAGGAGGAGT"
3178 TGi prot_gi = 43914529;
3179 string prot_str =
"MINKSGYEAKYKKSIKNNEEFWRKEGKRITWIKPYKKIKNVRYS";
3181 int nucl_oid(-1), prot_oid(-1);
3183 N.GiToOid(nucl_gi, nucl_oid);
3184 P.GiToOid(prot_gi, prot_oid);
3187 N.GetSequenceAsString(nucl_oid, nstr);
3188 P.GetSequenceAsString(prot_oid, pstr);
3190 BOOST_REQUIRE_EQUAL((
string)nstr, (
string)nucl_str);
3191 BOOST_REQUIRE_EQUAL((
string)pstr, (
string)prot_str);
3202 BOOST_REQUIRE_EQUAL((
int)
local.GetTotalLength(), 12345);
3203 BOOST_REQUIRE_EQUAL((
int)
local.GetTotalLengthStats(), 23456);
3204 BOOST_REQUIRE_EQUAL((
int)
local.GetNumSeqs(), 123);
3205 BOOST_REQUIRE_EQUAL((
int)
local.GetNumSeqsStats(), 234);
3216 m_Tis.push_back(*ids);
3218 m_Gis.push_back(*ids);
3251 db.
GetGis(oid, gis,
false);
3253 ITERATE(vector<TGi>, iter, gis) {
3292 BOOST_REQUIRE_EQUAL((
int)have_got.
GetNumSeqs(), 100);
3298 BOOST_REQUIRE_EQUAL((
int)have_not.
GetNumSeqs(), 89);
3307 for(
int * idp = gis; *idp; ++idp) {
3311 BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), nlist_gis);
3312 BOOST_REQUIRE_EQUAL(total, nlist_gis);
3319 BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), seqp_gis);
3320 BOOST_REQUIRE_EQUAL(total, seqp_gis-nlist_gis);
3330 BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), 0);
3331 BOOST_REQUIRE_EQUAL(total, -seqp_gis);
3343 list< CRef<CSeq_id> > got_ids = have_got.
GetSeqIDs(oid1);
3344 list< CRef<CSeq_id> > not_ids = have_not.
GetSeqIDs(oid1);
3354 BOOST_REQUIRE_EQUAL(diff, 2);
3374 bool found = have_got.
GiToOid(gis[0], oid);
3375 BOOST_REQUIRE(found);
3377 vector<TGi> gis_w, gis_wo;
3378 have_got.
GetGis(oid, gis_w);
3379 have_not.
GetGis(oid, gis_wo);
3383 int count_w = (
int) gis_w.size();
3384 int count_wo = (
int) gis_wo.size();
3385 BOOST_REQUIRE_EQUAL(count_w, (count_wo+1));
3392 vector<unsigned int> pigs;
3393 pigs.push_back(281224);
3398 string db =
"swissprot";
3399 const int len = 134;
3408 bool found = have_got.
PigToOid(pigs[0], oid);
3409 BOOST_REQUIRE(found);
3413 BOOST_REQUIRE_EQUAL((
unsigned int)pig_w, pigs[0]);
3432 ITERATE(vector<int>, iter, oids) {
3439 BOOST_REQUIRE(found);
3450 ITERATE(vector<int>, iter, oids) {
3457 BOOST_REQUIRE(found);
3466 NStr::Tokenize(
"1234 2468 4936 9872 19744 1234000 "
3467 "1234000000 1234000000000 1234000000000000",
3470 string sides(
"B44448888");
3475 BOOST_REQUIRE_EQUAL(sides.size(), ids.size());
3477 for(
size_t i = 0;
i < ids.size();
i++) {
3478 bool is4(
false), is8(
false);
3495 string idstr = ids[
i];
3500 bool have = db4.TiToOid(idnum, oid);
3501 BOOST_REQUIRE_EQUAL(is4, have);
3502 BOOST_REQUIRE_EQUAL(is4, (oid >= 0));
3504 have = db8.TiToOid(idnum, oid);
3505 BOOST_REQUIRE_EQUAL(is8, have);
3506 BOOST_REQUIRE_EQUAL(is8, (oid >= 0));
3508 CSeq_id seqid(
string(
"gnl|ti|") + idstr);
3511 db4.SeqidToOids(seqid, oids);
3512 BOOST_REQUIRE_EQUAL(is4, (oids.size() == 1));
3514 db8.SeqidToOids(seqid, oids);
3515 BOOST_REQUIRE_EQUAL(is8, (oids.size() == 1));
3530 int oid1(-1), oid2(-1);
3531 bool okay1 = p1.
PigToOid(pig, oid1);
3532 bool okay2 = p2.
PigToOid(pig, oid2);
3534 BOOST_REQUIRE(okay1);
3535 BOOST_REQUIRE(okay2);
3536 BOOST_REQUIRE(oid1 > 0);
3537 BOOST_REQUIRE(oid2 > 0);
3538 BOOST_REQUIRE(oid1 == oid2);
3540 int size1 = p1.
GetHdr(oid1)->
Get().size();
3541 int size2 = p2.
GetHdr(oid2)->
Get().size();
3550 BOOST_CHECK_NE(0, size1);
3551 BOOST_CHECK_NE(0, size2);
3552 BOOST_CHECK_GE(size1, 14);
3553 BOOST_CHECK_GT(size1, (size2 + 5));
3563 ostringstream fasta;
3567 ITERATE(list<string>, iter, ids) {
3571 vector<int> tmp_oids;
3574 BOOST_REQUIRE_MESSAGE(tmp_oids.size(),
3575 string(
"No OIDs found for ")+(*iter));
3577 oids.insert(oids.end(), tmp_oids.begin(), tmp_oids.end());
3582 sort(oids.begin(), oids.end());
3583 oids.erase(unique(oids.begin(), oids.end()), oids.end());
3585 ITERATE(vector<int>, iter, oids) {
3589 string all_fasta = fasta.str();
3590 string msg =
string(
"Error for accession: ") + acc;
3592 BOOST_REQUIRE_MESSAGE(all_fasta.size() == exp_size,
msg);
3593 BOOST_REQUIRE_MESSAGE(exp_oids == oids.size(),
msg);
3610 s_CheckIdLookup(db,
"NP_268346, XP_642837.1, 30262378, ABD21303.1", 4, 5411);
3634 s_CheckIdLookup(db,
"NP_268346, XP_642837.1, 30262378, ABD21303.1", 4, 5411);
3688 string acc(
"1QCF_A");
3691 nr.AccessionToOids(acc, oids);
3693 BOOST_REQUIRE(oids.size());
3696 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
3697 (!defined(NCBI_COMPILER_MIPSPRO)) )
3701 string fname(
"data/user-column");
3702 string vname(
"data/user-column-db");
3703 const string title(
"comedy");
3708 BOOST_REQUIRE_EQUAL(
CR.GetTitle(), title);
3715 BOOST_REQUIRE_EQUAL((
int)
columns.size(), 1);
3716 BOOST_REQUIRE_EQUAL(title,
columns[0]);
3719 BOOST_REQUIRE(comedy_column >= 0);
3727 BOOST_REQUIRE_EQUAL((
int)metadata_db.
size(), 3);
3728 BOOST_REQUIRE_EQUAL(metadata_db.
find(
"created-by")->second,
string(
"unit test"));
3729 BOOST_REQUIRE_EQUAL(metadata_db.
find(
"purpose")->second,
string(
"none"));
3730 BOOST_REQUIRE_EQUAL(metadata_db.
find(
"format")->second,
string(
"text"));
3733 BOOST_REQUIRE(metadata_db == metadata_user);
3741 BOOST_REQUIRE(db.
GetColumnValue(comedy_column,
"format") ==
"text");
3742 BOOST_REQUIRE(db.
GetColumnValue(comedy_column,
"duck soup") ==
"");
3743 BOOST_REQUIRE(
CR.GetValue(
"format") ==
"text");
3744 BOOST_REQUIRE(
CR.GetValue(
"who's on first") ==
"");
3749 vector<string> volumes;
3755 BOOST_REQUIRE(meta_vol0.
find(
"format") != meta_vol0.
end());
3756 BOOST_REQUIRE(meta_vol0.
find(
"format")->second ==
"text");
3760 vector<string> column_data;
3761 column_data.push_back(
"Groucho Marx");
3762 column_data.push_back(
"Charlie Chaplain");
3763 column_data.push_back(
"");
3764 column_data.push_back(
"Abbott and Costello");
3765 column_data.push_back(
"Jackie Gleason");
3766 column_data.push_back(
"Jerry Seinfeld");
3767 column_data.back()[5] = (char) 0;
3771 BOOST_REQUIRE_EQUAL((
int) column_data.size(), db.
GetNumOIDs());
3772 BOOST_REQUIRE_EQUAL((
int) column_data.size(),
CR.GetNumOIDs());
3776 for(
int oid = 0; oid <
count; oid++) {
3778 CR.GetBlob(oid, cr_blob);
3780 BOOST_REQUIRE(db_blob.
Str() == column_data[oid]);
3781 BOOST_REQUIRE(cr_blob.
Str() == column_data[oid]);
3791 string good(
"Z12841.1");
3792 string bad (
"Z12842.1");
3793 string both(
"Z12843.1");
3795 vector<int> o1, o2, o3;
3800 BOOST_REQUIRE(o1.size() == 1);
3801 BOOST_REQUIRE(o2.size() == 0);
3802 BOOST_REQUIRE(o3.size() == 1);
3805 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
3806 (!defined(NCBI_COMPILER_MIPSPRO)) )
3817 BOOST_REQUIRE_EQUAL((
int)algos.size(), 2);
3821 string algo_opts, algo_name;
3825 filtering_algo, algo_name, algo_opts);
3829 BOOST_REQUIRE_EQUAL(algo_opts,
kEmptyStr);
3832 filtering_algo, algo_name, algo_opts);
3834 BOOST_REQUIRE_EQUAL(algo_opts,
string(
"-species Desmodus_rotundus"));
3857 BOOST_REQUIRE_EQUAL(algos.size(), 1U);
3858 BOOST_REQUIRE_EQUAL(11, algos.front());
3862 BOOST_REQUIRE(ranges.
empty());
3969 BOOST_CHECK_EQUAL((
string) nr_sum.
CompareSelf(),
"=A=B=C=a=b=c");
3970 BOOST_CHECK_EQUAL((
string) sp_sum.
CompareSelf(),
"=A=B=C=a=b=c");
3971 BOOST_CHECK_EQUAL((
string) ac_sum.
CompareSelf(),
"+A+B=C+a+b=c");
3972 BOOST_CHECK_EQUAL((
string) sc_sum.
CompareSelf(),
"+A+B=C+a+b=c");
3974 BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(sp_sum),
"+T+F+M+t+f+m");
3975 BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(ac_sum),
"=T+F+M=t+f+m");
3976 BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(sc_sum),
"+T+F+M+t+f+m");
3978 BOOST_CHECK_EQUAL((
string) sp_sum.
Compare(sc_sum),
"=T+F+M=t+f+m");
3979 BOOST_CHECK_EQUAL((
string) ac_sum.
Compare(sc_sum),
"+T+F+M+t+f+m");
3995 string path =
string(
"data") + ch +
"deltaseq";
3996 ifstream
f(path.c_str());
4009 BOOST_REQUIRE_EQUAL(h1, h2);
4021 BOOST_REQUIRE(db.
GiToOid(555, oid));
4024 BOOST_REQUIRE(deflines.
NotEmpty());
4029 BOOST_REQUIRE(deflines.
Empty());
4035 const Int8 kExpectedSize = 1420;
4041 vector<string> gnomon_ids;
4042 gnomon_ids.push_back(
"gnl|GNOMON|334.p");
4043 gnomon_ids.push_back(
"gnl|GNOMON|2334.p");
4044 gnomon_ids.push_back(
"gnl|GNOMON|4334.p");
4045 gnomon_ids.push_back(
"gnl|GNOMON|6334.p");
4046 gnomon_ids.push_back(
"gnl|GNOMON|8334.p");
4049 for (
size_t i = 0;
i < gnomon_ids.size();
i++) {
4053 BOOST_REQUIRE( !oids.empty() );
4054 BOOST_REQUIRE_EQUAL(
i, (
size_t)oids.front());
4060 BOOST_REQUIRE( !oids.empty() );
4061 BOOST_REQUIRE_EQUAL(
i, (
size_t)oids.front());
4067 BOOST_REQUIRE(found);
4068 BOOST_REQUIRE_EQUAL(
i, (
size_t)oid);
4077 string seqidlist_name = seqidlist_tmpfile.
GetFileName();
4078 string blastdb_name = alias_file_tmpfile.
GetFileName() +
".pal";
4081 const string kSeqIdIncluded =
"P01013.1";
4084 ofstream stream(seqidlist_name.c_str());
4085 stream << kSeqIdIncluded << endl;
4089 ofstream stream(blastdb_name.c_str());
4090 stream <<
"TITLE test for 129295 JIRA SB-646" << endl;
4091 stream <<
"DBLIST nr" << endl;
4092 stream <<
"SEQIDLIST " << seqidlist_name << endl;
4100 BOOST_REQUIRE_EQUAL(1U, oids.size());
4102 const string seqid2search =
"WP_138200753.1";
4105 BOOST_CHECK_EQUAL(0
U, oids.size());
4111 int rv = system(
"cp data/swiss_cheese.pal 'data/test space.pal'");
4112 BOOST_REQUIRE_EQUAL(0, rv);
4113 string db_name =
"\"data/test space\"";
4118 BOOST_REQUIRE_EQUAL((
string) dbs_sum.
CompareSelf(),
"+A+B=C+a+b=c");
4138 BOOST_REQUIRE_EQUAL_COLLECTIONS(taxids.
begin(), taxids.
end(),
4176 const int oids[] = {
4177 0x7acee466, 0x4cbc1ab0,
4178 0x7d219922, 0x7e096431,
4179 0x276283ea, 0x13cee382,
4180 0x51f8b267, 0x37183674,
4181 0x03559cd6, 0x6bdcfbb7
4183 const Uint4 nrecs = (
Uint4) (
sizeof oids /
sizeof oids[0]);
4197 #ifndef NCBI_INT8_GI
4199 const Uint4 uint4_gi = 0xFFFFFFFF;
4205 const Int8 big_gi = 0xC0000000;
4206 for (
Uint4 i = 0;
i < nrecs; ++
i) {
4215 BOOST_REQUIRE(oid == oids[
i]);
4217 BOOST_FAIL(
"CSeq_id constructor threw exception");
4232 BOOST_REQUIRE_EQUAL(55, found);
4240 list< CRef<CSeq_id> > ids = db.
GetSeqIDs(1);
4244 if((*itr)->IsGi()) {
4249 fasta_id = (*itr)->AsFastaString();
4253 BOOST_REQUIRE_EQUAL(1 , num_acc);
4254 BOOST_REQUIRE_EQUAL(fasta_id ,
"prf||2209341B");
4265 BOOST_REQUIRE_EQUAL(63, found);
4278 BOOST_REQUIRE_EQUAL(1, found);
4279 BOOST_REQUIRE_EQUAL(3, oid);
4291 BOOST_REQUIRE_EQUAL(2, found);
4297 const unsigned int num_pigs = 5;
4298 const int pigs[num_pigs] = {4377482, 1287445, 2, 6066974, 5303747};
4299 const unsigned int num_valid_pig = 4;
4304 for (
unsigned int i =0;
i < num_pigs;
i++) {
4312 string db_name =
"swissprot";
4320 BOOST_REQUIRE_EQUAL(negative_pig_db.
GetNumSeqs(), (
int) (total_num_seqs - num_valid_pig));
4322 vector<string> seq_ids;
4325 list< CRef<CSeq_id> > ids = pig_db.
GetSeqIDs(oid);
4327 seq_ids.push_back(ids.front()->GetSeqIdString());
4328 BOOST_REQUIRE_EQUAL(oid_found, oid);
4330 BOOST_REQUIRE_EQUAL(seq_ids.size(), num_valid_pig);
4332 for(
unsigned int i=0;
i < seq_ids.size();
i ++){
4335 BOOST_REQUIRE_EQUAL(
not_found.size(), (
unsigned int) 0);
4343 const unsigned int num_pigs = 5;
4344 const int pigs[num_pigs] = {2, 355704, 863725, 1727116, 24036443};
4345 string db_name =
"data/ipg_test";
4351 for (
unsigned int i =0;
i < num_pigs;
i++) {
4363 BOOST_REQUIRE_EQUAL(total_num_seqs, 1);
4365 const int check_oids[1] = {12};
4367 BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4372 for (
unsigned int i =0;
i < num_pigs;
i++) {
4384 BOOST_REQUIRE_EQUAL(total_num_seqs, 3);
4386 const int check_oids[3] = {2, 6, 8};
4388 BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4396 for (
unsigned int i =0;
i < num_pigs;
i++) {
4397 p.push_back(pigs[
i]);
4409 BOOST_REQUIRE_EQUAL(total_num_seqs, 5);
4411 const int check_oids[5] = {0, 1, 3, 5, 7};
4413 BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4422 for (
unsigned int i =0;
i < num_pigs;
i++) {
4423 p.push_back(pigs[
i]);
4434 const int check_oids[2] = {1, 3 };
4436 BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4445 string db_name =
"refseq_mrna";
4452 int total_num_seqs = 0;
4453 Uint8 total_length = 0;
4455 BOOST_REQUIRE(total_num_seqs > 0);
4456 BOOST_REQUIRE(total_length > 0);
4463 for(
int i=0;
i < MAX_FD_COUNT;
i++) {
4483 for (
Int8 i=0;
i < 10000;
i++) {
4498 const int kNumThreads=64;
4499 vector<CTestThread*> threads;
4501 for (
int i=0;
i < kNumThreads;
i++) {
4504 for (
int i=0;
i < kNumThreads;
i++) {
4507 for (
int i=0;
i < kNumThreads;
i++) {
4516 string db_name =
"data/wp_nr_v5";
4519 string acc =
"WP_007051162.1";
4520 vector<TTaxId> tax_ids;
4522 BOOST_REQUIRE_EQUAL(tax_ids.size(), 4);
4523 BOOST_REQUIRE_EQUAL(tax_ids[0], 1678);
4524 BOOST_REQUIRE_EQUAL(tax_ids[3], 1263059);
4528 vector<TTaxId> tax_ids;
4530 BOOST_REQUIRE_EQUAL(tax_ids.size(), 1);
4531 BOOST_REQUIRE_EQUAL(tax_ids[0], 1205679);
4534 string acc =
"junk";
4535 vector<TTaxId> tax_ids;
4537 BOOST_REQUIRE_EQUAL(tax_ids.size(), 0);
4543 string db_name =
"data/test_v4";
4546 string acc =
"pir||T49736";
4547 vector<TTaxId> tax_ids;
4549 BOOST_REQUIRE_EQUAL(tax_ids.size(), 1);
4550 BOOST_REQUIRE_EQUAL(tax_ids[0], 0);
4553 string acc =
"junk";
4554 vector<TTaxId> tax_ids;
4556 BOOST_REQUIRE_EQUAL(tax_ids.size(), 0);
static const char * kFileName
vector< CRef< CSeq_id > > SeqIdList
`Blob' Class for SeqDB (and WriteDB).
CTempString Str() const
Get blob contents as a CTempString.
TTaxIds GetLeafTaxIds() const
void SetLeafTaxIds(const TTaxIds &t)
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
GI list containing the intersection of two other lists of GIs.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CNegativeIdList(const int *ids, bool use_tis)
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
int ChangeOpenedFilseCount(EFilesCount fc)
int GetOpenedFilseCount(void)
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
void GetPigBounds(int *low_id, int *high_id, int *count)
Get PIG Bounds.
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
void HashToOids(unsigned hash, vector< int > &oids)
Get the OIDs for a given sequence hash.
void GetGiBounds(TGi *low_id, TGi *high_id, int *count)
Get GI Bounds.
vector< SGiOid > m_GisOids
Pairs of GIs and OIDs.
int GetNumGis() const
Get the number of GIs in the array.
void GetPigList(vector< TPig > &pigs) const
void GetGiList(vector< TGi > &gis) const
Get the gi list.
void AddTaxIds(const set< TTaxId > &tax_ids)
T GetKey(int index) const
bool FindGi(TGi gi) const
Test for existence of a GI.
vector< SSiOid > m_SisOids
Pairs of Seq-ids and OIDs.
SeqDB ID list for performing boolean set operations.
bool Blank() const
Check if an ID list is blank.
void Compute(EOperation op, const vector< int > &ids, bool positive=true)
Perform a logical operation on a list.
bool IsPositive()
Checks whether a positive GI list was produced.
CRef< CSeqDBNegativeList > GetNegativeList()
Retrieve a negative GI list.
CRef< CSeqDBGiList > GetPositiveList()
Retrieve a positive GI list.
bool IdToOid(Int8 id, TOid &oid)
GI or TI translation.
void UnLease()
Return any memory held by this object to the atlas.
void AddTaxIds(const set< TTaxId > &tax_ids)
void SetGiList(const vector< TGi > &new_list)
Set ID set for this negative list.
void SetPigList(const vector< TPig > &new_list)
vector< TTi > m_Tis
TIs to exclude from the SeqDB instance.
bool FindGi(TGi gi)
Test for existence of a GI.
vector< TGi > m_Gis
GIs to exclude from the SeqDB instance.
Reader for BlastDb format column files.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
int TOID
Sequence type accepted and returned for OID indices.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
bool OidToPig(int oid, int &pig) const
Translate an OID to a PIG.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
void GetGis(int oid, vector< TGi > &gis, bool append=false) const
Gets a list of GIs for an OID.
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
TGi GetSeqGI(int oid) const
Returns the first Gi (if any) of the sequence.
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
bool OidToGi(int oid, TGi &gi) const
Translate an OID to a GI.
const string & GetDBNameList() const
Get list of database names.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Int8 GetDiskUsage() const
Retrieve the disk usage in bytes for this BLAST database.
void ResetInternalChunkBookmark()
Resets this object's internal chunk bookmark, which is used when the oid_state argument to GetNextOID...
EOidListType
Indicates how block of OIDs was returned.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
bool GiToPig(TGi gi, int &pig) const
Translate a GI to a PIG.
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
bool PigToGi(int pig, TGi &gi) const
Translate a PIG to a GI.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
const CSeqDBGiList * GetGiList() const
Get GI list attached to this database.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the sequence closest to the given offset into the database.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence without sequence data.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
void GetMaskAlgorithmDetails(int algorithm_id, objects::EBlast_filter_program &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx=true) const
Returns the sum of the sequence lengths.
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
string GetTitle() const
Returns the database title.
int GetNumSeqs() const
Returns the number of sequences available.
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
Get all tax ids for a seq id.
EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state=NULL)
Return a chunk of OIDs, and update the OID bookmark.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
void ListColumns(vector< string > &titles)
List columns titles found in this database.
void GetTaxIdsForAccession(const string &accs, vector< TTaxId > &taxids)
Get all tax ids for an accessions.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
string GetDate() const
Returns the construction date of the database.
int GetNumSeqsStats() const
Returns the number of sequences available.
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
@ eFilteredRange
Sum of included sequences with OIDs within the iteration range.
@ eFilteredAll
Values from alias files, or summation over all included sequences.
int GetColumnId(const string &title)
Get an ID number for a given column title.
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids) const
Translate a Seq-id to a list of OIDs.
int GetAmbigSeqAlloc(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *masks=NULL) const
Get a pointer to sequence data with ambiguities.
CSeqDBIter Begin() const
Returns a sequence iterator.
const string & GetColumnValue(int column_id, const string &key)
Look up the value for a specific column metadata key.
CRef< CBioseq > GiToBioseq(TGi gi) const
Get a CBioseq for a given GI.
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
int GetSeqLengthApprox(int oid) const
Returns an unbiased, approximate sequence length.
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
CRef< CBioseq > PigToBioseq(int pig) const
Get a CBioseq for a given PIG.
int GetAmbigSeq(int oid, const char **buffer, int nucl_code) const
Get a pointer to sequence data with ambiguities.
void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)
Get masked ranges of a sequence.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
CSeqDBIdSet GetIdSet() const
Get IdSet list attached to this database.
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist=false) const
Get taxid for an OID.
void Append(const char *p)
CSeqIdList(const char **str)
CSimpleGiList(const vector< TGi > &gis)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CSeqDBIsam > m_Isam
CTestThread(CSeqDBAtlas &atlas)
CTmpEnvironmentSetter(const char *name, const char *value=NULL)
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
Defines column reader class for SeqDB.
int GetSeqLength(const CBioseq &bioseq)
static const unsigned long CR
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * expected[]
static const char * str(char *buf, int n)
static const column_t columns[]
#define GI_FROM(T, value)
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
void Set(const string &name, const string &value)
Set an environment variable by name.
const string & Get(const string &name, bool *found=NULL) const
Get environment value by name.
static void Add(const string &path)
Add the name of a dir entry; it will be deleted on (normal) exit.
const string & GetFileName(void) const
Return used file name (generated or given in the constructor).
static string ConvertToOSPath(const string &path)
Convert "path" on any OS to the current OS-dependent path.
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
const TPrim & Get(void) const
#define MSerial_AsnText
I/O stream manipulators –.
const string AsFastaString(void) const
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
void SetWidth(TSeqPos width)
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
bool IsSetLinks(void) const
Check if a value has been assigned to Links data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
void SetTaxid(TTaxid value)
Assign a value to Taxid data member.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
const Tdata & Get(void) const
Get the member data.
@ eBlast_filter_program_repeat
@ eBlast_filter_program_seg
const TTag & GetTag(void) const
Get the Tag member data.
TId GetId(void) const
Get the variant data.
TGi GetGi(void) const
Get the variant data.
const TGeneral & GetGeneral(void) const
Get the variant data.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
void ResetDescr(void)
Reset Descr data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
CBioseq_Info & GetBioseq(CTSE_Info &tse, const CBioObjectId &id)
unique_ptr< CLocalTaxon > tax1
static void hex(unsigned char c)
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
static const string kDb("db")
string s_Stringify(CRef< ASNOBJ > a)
static void s_TestMaskingLimits(EMaskingType mask, unsigned first, unsigned last, unsigned lowest, unsigned highest, unsigned count)
static void s_MapAllGis(CSeqDB &db, map< int, int > &m, int change, int &total)
static void s_TestPartialAmbig(CSeqDB &db, TGi nt_gi)
void s_ApproxEqual(NUM a, NUM b, DIF epsilon, int lineno)
BOOST_AUTO_TEST_CASE(ConstructLocal)
static bool s_DbHasOID(CSeqDB &db, int &count, int oid)
static void s_ModifyMap(map< int, int > &m, int key, int c, int &total)
static void s_CheckIdLookup(CSeqDB &db, const string &acc, size_t exp_oids, size_t exp_size)
static void s_TestPartialAmbigRange(CSeqDB &db, int oid, int begin, int end)
static Uint4 s_BufHash(const char *buf_in, Uint4 length, Uint4 start=1)
string s_ToString(const A &a, const B &b, const C &c, const D &d, const E &e)
static bool s_MaskingTest(EMaskingType mask, unsigned oid)
Defines exception class and several constants for SeqDB.
Int4 TOid
Ordinal ID in BLAST databases.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
bool SeqDB_IsBinaryGiList(const string &fname)
Read a text or binary SeqId list from a file.
unsigned SeqDB_SequenceHash(const char *sequence, int length)
Returns a path minus filename.
const int kSeqDBNuclBlastNA8
Used to request ambiguities in BLAST/NA8 format.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
Defines `expert' version of CSeqDB interfaces.
ISAM index database access object.
static const char * kTaxid
static SLJIT_INLINE sljit_ins nr(sljit_gpr dst, sljit_gpr src)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
List of sequence offset ranges.
int measured_oids
Measured oid count should equal filtered if alias files are correct.
int total_oids
Total oid count, sum of all volume oid counts.
Int8 filtered_length
Filtered length, result of all filtering.
void CompareField(Int8 X, Int8 Y, string &sum, char ch)
Int8 total_length
Total length, sum of all volume lengths.
string Compare(SDbSumInfo &other)
int filtered_oids
Filtered oid count, result of all filtering.
Int8 measured_length
Measured length should equal filtered if alias files are correct.
Utility stuff for more convenient using of Boost.Test library.
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
static string kCount("Count")