32 #define NCBI_TEST_APPLICATION
43 #include "../mask_info_registry.hpp"
47 #include <boost/current_function.hpp>
55 #include <unordered_map>
57 #ifndef SKIP_DOXYGEN_PROCESSING
78 sequence.assign(
buffer, slength);
104 const vector<int> & layout,
107 BOOST_REQUIRE(layout.size());
115 for(
int i = 0;
i < (
int)raw.size();
i += width) {
116 width = layout[layout_i];
117 BOOST_REQUIRE(width);
120 mask >>= (64 - 8*width);
122 int left = raw.size() -
i;
123 int width1 = (left < width) ? left : width;
125 string sub(raw,
i, width1);
131 for(
int by = 0; by < (
int)sub.size(); by++) {
132 x = (x << 8) + (sub[by] & 0xFF);
142 layout_i = (layout_i + 1) % layout.size();
151 layout.push_back(per);
159 const vector<int> & layout,
162 ifstream
f(fname.c_str());
166 while(
f && !
f.eof()) {
170 int amt =
f.gcount();
175 raw.append(
buf, amt);
186 layout.push_back(per);
202 #define BOOST_REQUIRE_CUTPOINT(X) if (cutpoint == X) throw CNonException()
227 BOOST_REQUIRE(found);
281 BOOST_REQUIRE(found);
288 BOOST_REQUIRE(! seq.empty());
289 BOOST_REQUIRE(
ambig.empty() || is_nucl);
299 template<
class ASNOBJ>
309 template<
class ASNOBJ>
319 template<
class ASNOBJ>
339 BOOST_REQUIRE_EQUAL(s1, s2);
348 const string & title)
356 bool rv1 = dst.
OidToGi(oid, gi);
357 bool rv2 = src.
GiToOid(gi, src_oid);
368 BOOST_REQUIRE_EQUAL(dst.
GetTitle(), title);
383 for(
unsigned i = 0;
i < files.size();
i++) {
397 if (s.size() == 0)
break;
398 BOOST_REQUIRE(s2 <= s);
409 size_t pos = data.rfind(delim);
411 if (pos == string::npos)
416 data.size()-(pos + delim.size()));
424 bool need_hash =
false)
426 bool found_hash =
false;
428 for(
unsigned i = 0;
i < files.size();
i++) {
431 if (ext ==
"nsd" || ext ==
"psd") {
434 if (ext ==
"nhd" || ext ==
"phd") {
441 BOOST_REQUIRE(found_hash);
457 vector<string> files;
492 const string & src_name,
493 const string & dst_name,
494 const string & title,
503 basename += (is_protein ?
".p" :
".n");
504 const char* ext[] = {
"si",
"sd",
"og",
"ni",
"nd" };
505 for (
size_t i = 0;
i < (
sizeof(ext)/
sizeof(*ext));
i++) {
514 vector<string> files;
558 unique_ptr<CObjectIStream> ois
576 for(
TGi * ptr = gis; *ptr !=
ZERO_GI; ptr ++) {
583 for(
const char ** ptr = gis; *ptr; ptr ++) {
600 BOOST_REQUIRE(! lr->
AtEOF());
603 BOOST_REQUIRE(! entry.
Empty());
604 BOOST_REQUIRE(entry->
IsSeq());
620 78883515, 78883517, 24431485, 19110479, 15054463,
621 15054465, 15054467, 15054469, 15054471, 19570808, 18916476,
622 1669608, 1669610, 1669612, 1669614, 1669616, 10944307,
623 10944309, 10944311, 19909844, 19909846, 19909860, 19911180,
624 19911220, 19911222, 19911224, 57472140, 20126670, 20387092,
625 57639630, 57639632, 7670507, 2394289, 21280378, 21327938,
626 6518520, 20086356, 20086357, 21392391, 20086359, 19110509,
627 21623739, 21623761, 38303844, 38197377, 56788779, 57032781,
628 57870443, 56789136, 0
636 const string srcname(
"data/writedb_nucl");
637 const string dstname(
"w-nucl-bs");
638 const string title(
"bioseq nucleotide dup");
650 const string dstname2(
"w-nucl-raw");
651 const string title2(
"raw nucleotide dup");
1070 1477444, 1669609, 1669611, 1669615, 1669617, 7544146,
1071 22652804, 3114354, 3891778, 3891779, 81294290,
1072 81294330, 49089974, 62798905, 3041810, 7684357, 7684359,
1073 7684361, 7684363, 7544148, 3452560, 3452564, 6681587,
1074 6681590, 6729087, 7259315, 2326257, 3786310, 3845607,
1075 13516469, 2575863, 4049591, 3192363, 1871126, 2723484,
1076 6723181, 11125717, 2815400, 1816433, 3668177, 6552408,
1077 13365559, 8096667, 3721768, 9857600, 2190043, 3219276,
1078 10799943, 10799945, 0
1087 "data/writedb_prot",
1089 "bioseq protein dup");
1094 "data/writedb_prot",
1155 BOOST_REQUIRE(bytes.size() == sv.
size());
1197 bs1->
SetInst().ResetSeq_data();
1210 vector<string> files;
1242 db2.
GetGis(oid, gis,
false);
1244 bool found_gi =
false;
1245 for(
unsigned i = 0;
i < gis.size();
i++) {
1246 if (gis[
i] == 129295 + oid) {
1252 BOOST_REQUIRE(found_gi);
1253 BOOST_REQUIRE_EQUAL(pig-oid, 101);
1256 BOOST_REQUIRE_EQUAL(oid, 3);
1275 int gis[] = { 129295, 129296, 129297, 129299, 0 };
1277 Uint8 letter_count = 0;
1279 for(
int i = 0; gis[
i];
i++) {
1294 BOOST_REQUIRE_EQUAL(3, (
int) v.size());
1295 BOOST_REQUIRE_EQUAL(v[0],
string(
"multivol.00"));
1296 BOOST_REQUIRE_EQUAL(v[1],
string(
"multivol.01"));
1297 BOOST_REQUIRE_EQUAL(v[2],
string(
"multivol.02"));
1299 BOOST_REQUIRE_EQUAL(25, (
int)
f.size());
1310 BOOST_REQUIRE_EQUAL(oids, 4);
1311 BOOST_REQUIRE_EQUAL(letter_count, letters);
1322 vector<string> files;
1337 bdls->
Set().push_back(dl);
1339 dl->SetTitle(
"Some protein sequence");
1340 dl->SetSeqid().push_back(seqid);
1341 dl->SetTaxid(12345);
1348 BOOST_REQUIRE(files.size() != 0);
1355 BOOST_REQUIRE_EQUAL(found,
true);
1356 BOOST_REQUIRE_EQUAL(oid, 0);
1379 const char* accs[] = {
1380 "AAC77159.1",
"AAC76880.1",
1381 "AAC76230.1",
"AAC76373.1",
"AAC77137.1",
"AAC76637.2",
1382 "AAA58101.1",
"AAC76702.1",
"AAC77109.1",
1383 "AAC76757.1",
"AAA58162.1",
"AAC76604.1",
"AAC76539.1",
1384 "AAA24224.1",
"AAC76926.1",
"AAC77047.1",
1385 "AAA57930.1",
"AAC76134.1",
1386 "AAC76586.2",
"AAA58123.1",
"AAC76430.1",
"AAA58107.1",
1387 "AAA24272.1",
"AAC76396.2",
1388 "AAC76918.1",
"AAC76727.1",
"AAA57964.1",
1398 "data/writedb_prot",
1400 "test of string ISAM sortedness");
1408 const char* accs[] = {
1409 "AAC76335.1",
"AAC77159.1",
"AAA58145.1",
"AAC76880.1",
1410 "AAC76230.1",
"AAC76373.1",
"AAC77137.1",
"AAC76637.2",
1411 "AAA58101.1",
"AAC76329.1",
"AAC76702.1",
"AAC77109.1",
1412 "AAC76757.1",
"AAA58162.1",
"AAC76604.1",
"AAC76539.1",
1413 "AAA24224.1",
"AAC76351.1",
"AAC76926.1",
"AAC77047.1",
1414 "AAC76390.1",
"AAC76195.1",
"AAA57930.1",
"AAC76134.1",
1415 "AAC76586.2",
"AAA58123.1",
"AAC76430.1",
"AAA58107.1",
1416 "AAC76765.1",
"AAA24272.1",
"AAC76396.2",
"AAA24183.1",
1417 "AAC76918.1",
"AAC76727.1",
"AAC76161.1",
"AAA57964.1",
1427 "data/writedb_prot",
1429 "test of string ISAM sortedness"),
1439 TGi prot_gis[] = { 129295, 129296, 129297, 0 };
1440 TGi nucl_gis[] = { 555, 556, 405832, 0 };
1453 "test of hash ISAMs (P)",
1458 "test of hash ISAMs (N)",
1473 vector<string> files;
1474 string title =
"pdb-id";
1475 string I1(
"pdb|3E3Q|BB"), T1(
"Lower case chain b");
1485 string str =
">" + I1 +
" " + T1 +
"\n" +
"ELVISLIVES\n";
1503 vector<string> files;
1505 string title =
"pdb-id";
1508 I1(
"pdb|3E3Q|b"), T1(
"Lower case chain b");
1518 string str =
">" + I1 +
" " + T1 +
"\n" +
"ELVISLIVES\n";
1537 BOOST_REQUIRE(oids.size() == 1);
1546 vector<string> files;
1548 string title =
"from-fasta-reader";
1551 I1(
"gi|123"), T1(
"One two three."),
1552 I2(
"gi|124"), T2(
"One two four.");
1563 ">" + I1 +
" " + T1 +
1564 "\001" + I2 +
" " + T2 +
"\n" +
1587 BOOST_REQUIRE_EQUAL(bdls->
Get().size(), 1);
1588 BOOST_REQUIRE_EQUAL(bdls->
Get().front()->GetTitle(), T1);
1589 BOOST_REQUIRE_EQUAL(bdls->
Get().front()->GetSeqid().size(), 1);
1590 BOOST_REQUIRE_EQUAL(bdls->
Get().front()->GetSeqid().front()->AsFastaString(), I1);
1599 string fn4(
"test4.til"), fn8(
"test8.til");
1605 for(
int i = 0;
i<10;
i++) {
1623 "400 1000 4000 10000 40000");
1627 "0 1 0 10 0 100 0 1000 0 10000 "
1628 "0 100000 0 1000000 0 10000000 1 0 10 0");
1637 typedef pair<string, string>
TPair;
1638 vector< TPair > ids48;
1644 Int8 a4(1234), b4(2), a8(1234), b8(1000);
1646 string prefix =
"gnl|ti|";
1648 for(
int i = 0;
i < 5;
i++) {
1654 Int8 p4(a4), p8(a8);
1661 BOOST_REQUIRE(a4 > p4);
1662 BOOST_REQUIRE(a8 > p8);
1667 BOOST_REQUIRE((a4 >> 32) == 0);
1668 BOOST_REQUIRE((a8 >> 32) != 0);
1671 string dbname4 =
"test-db-short-tis";
1672 string dbname8 =
"test-db-long-tis";
1676 dbname4 +
" database.",
1681 dbname8 +
" database.",
1684 string iupac =
"GATTACA";
1686 ITERATE(vector< TPair >, iter, ids48) {
1687 string f4 =
string(
">") + iter->first +
" test\n" + iupac +
"\n";
1688 string f8 =
string(
">") + iter->second +
" test\n" + iupac +
"\n";
1703 i4(
"1 0 28 5 1 100 0 0 0 4D2 0 FFFFFFFF 0"),
1704 i8(
"1 5 3C 5 1 100 0 0 0 0 4D2 0 FFFFFFFF FFFFFFFF 0"),
1705 d4(
"1234 0 2468 1 4936 2 9872 3 19744 4"),
1706 d8(
"1234 0 1234000 1 1234000000 2 1234000000000 3 1234000000000000 4");
1708 BOOST_REQUIRE(index4 == i4);
1709 BOOST_REQUIRE(index8 == i8);
1711 vector<int> overlay;
1712 overlay.push_back(8);
1713 overlay.push_back(4);
1719 string data8 =
s_HexDumpFile(dbname8 +
".ntd", overlay, 10);
1724 BOOST_REQUIRE(data4 == d4);
1725 BOOST_REQUIRE(data8 == d8);
1728 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1729 (!defined(NCBI_COMPILER_MIPSPRO)) )
1732 vector<string> files;
1744 meta_data[
"created-by"] =
"unit test";
1745 meta_data[
"purpose"] =
"none";
1746 meta_data[
"format"] =
"text";
1748 vector<string> column_data;
1749 column_data.push_back(
"Groucho Marx");
1750 column_data.push_back(
"Charlie Chaplain");
1751 column_data.push_back(
"");
1752 column_data.push_back(
"Abbott and Costello");
1753 column_data.push_back(
"Jackie Gleason");
1754 column_data.push_back(
"Jerry Seinfeld");
1755 column_data.back()[5] = (char) 0;
1757 string fname(
"user-column");
1758 string vname(
"user-column-db");
1759 string title(
"comedy");
1764 "User defined column");
1768 int col_id =
W.CreateUserColumn(title);
1770 ITERATE(TMeta, iter, meta_data) {
1771 CB.AddMetaData(iter->first, iter->second);
1772 W.AddColumnMetaData(col_id, iter->first, iter->second);
1779 ITERATE(vector<string>, iter, column_data) {
1780 W.AddSequence(*
R.GetBioseq(
i++));
1811 vector<int> algo_ids;
1821 ITERATE(vector<int>,
id, algo_ids) {
1822 BOOST_REQUIRE_EQUAL(
true,
registry.IsRegistered(*
id));
1832 BOOST_REQUIRE_EQUAL(id1+1, id2);
1838 size_t kMaxNumSupportedAlgorithmVariants)
1842 vector<int> algo_ids;
1843 for (
size_t i = 0;
i < kMaxNumSupportedAlgorithmVariants*2;
i++) {
1855 if (
i >= kMaxNumSupportedAlgorithmVariants) {
1856 BOOST_REQUIRE_THROW(algo_id =
registry.Add(masking_algo, options),
1859 algo_id =
registry.Add(masking_algo, options);
1861 if (algo_id != -1) {
1863 algo_ids.push_back(algo_id);
1868 BOOST_REQUIRE_EQUAL(kMaxNumSupportedAlgorithmVariants, algo_ids.size());
1869 for (
size_t i = 0;
i < algo_ids.size();
i++) {
1870 BOOST_REQUIRE_EQUAL((
int)(masking_algo +
i), algo_ids[
i]);
1874 for (
size_t i = 0;
i < kMaxNumSupportedAlgorithmVariants*2;
i++) {
1875 int algo_id = masking_algo +
i;
1876 if (
i >= kMaxNumSupportedAlgorithmVariants) {
1877 BOOST_REQUIRE_EQUAL(
false,
registry.IsRegistered(algo_id));
1879 BOOST_REQUIRE_EQUAL(
true,
registry.IsRegistered(algo_id));
1924 const int kNumSeqs = 3;
1930 for(
int i = 0;
i < kNumSeqs;
i++) {
1931 int L =
R.GetSeqLength(next_oid);
1935 L =
R.GetSeqLength(next_oid);
1938 oids.push_back(next_oid++);
1944 "-species Desmodus_rotundus");
1948 for(
int i = 0;
i < kNumSeqs;
i++) {
1950 W.AddSequence(*
R.GetBioseq(oid));
1956 ranges.back().algorithm_id = seg_id;
1958 for(
int j = 0; j < (
i+5); j++) {
1959 pair<TSeqPos, TSeqPos> rng;
1960 rng.first =
i * 13 + j * 7 + 2;
1961 rng.second = rng.first + 3 + (
i+j) % 11;
1963 ranges.back().offsets.push_back(rng);
1969 ranges.back().algorithm_id = repeat_id;
1971 for(
int j = 0; j < (
i+5); j++) {
1972 pair<TSeqPos, TSeqPos> rng;
1973 rng.first =
i * 10 + j * 5 + 2;
1974 rng.second = rng.first + 20;
1976 ranges.back().offsets.push_back(rng);
1987 W.SetMaskData(ranges, gis);
2012 int seg_repeated_id;
2013 BOOST_REQUIRE_THROW( seg_repeated_id =
2016 (void)seg_repeated_id;
2025 vector<int> algo_ids;
2028 const size_t kMaxNumSupportedAlgorithmVariants =
2030 for (
size_t i = 0;
i < kMaxNumSupportedAlgorithmVariants*2;
i++) {
2033 if (
i >= kMaxNumSupportedAlgorithmVariants) {
2034 BOOST_REQUIRE_THROW(
2035 algo_id =
W.RegisterMaskAlgorithm(masking_algorithm, options),
2038 algo_id =
W.RegisterMaskAlgorithm(masking_algorithm, options);
2040 if (algo_id != -1) {
2041 algo_ids.push_back(algo_id);
2046 BOOST_REQUIRE_EQUAL(kMaxNumSupportedAlgorithmVariants, algo_ids.size());
2047 for (
size_t i = 0;
i < algo_ids.size();
i++) {
2048 BOOST_REQUIRE_EQUAL((
int)(masking_algorithm +
i), (
int)algo_ids[
i]);
2061 "-species Aotus_vociferans");
2064 "-species Desmodus_rotundus");
2070 int L =
R.GetSeqLength(oid);
2071 W.AddSequence(*
R.GetBioseq(oid));
2078 pair<TSeqPos, TSeqPos> rng;
2082 ranges.back().offsets.push_back(rng);
2099 "-species Aotus_vociferans");
2102 "-species Desmodus_rotundus");
2108 int L =
R.GetSeqLength(oid);
2109 W.AddSequence(*
R.GetBioseq(oid));
2116 pair<TSeqPos, TSeqPos> rng;
2120 ranges.back().offsets.push_back(rng);
2168 bool retval =
false;
2170 line = line.erase(0, data.
GetKey().size()+1);
2180 ifstream alias_file(fname.c_str());
2181 if ( ! alias_file ) {
return; }
2182 while (getline(alias_file, line)) {
2204 const string kCurrentYear =
2216 CTmpFile tmp_aliasfile, tmp_gifile;
2217 const string kDbName(
"data/writedb_prot");
2218 const string kTitle(
"My alias file");
2219 string kAliasFileName(tmp_aliasfile.
GetFileName());
2222 ofstream gifile(tmp_gifile.
GetFileName().c_str());
2223 gifile <<
"129295" << endl;
2224 gifile <<
"555" << endl;
2225 gifile <<
"55" << endl;
2231 kAliasFileName +=
".pal";
2234 BOOST_REQUIRE(
CFile(kAliasFileName).Exists());
2242 BOOST_CHECK_EQUAL(
"1", alias_file_data.
m_NSeqs.
Get());
2244 BOOST_CHECK_EQUAL(
"232", alias_file_data.
m_Length.
Get());
2256 CTmpFile tmp_aliasfile, tmp_gifile;
2257 const string kDbName(
"data/writedb_prot");
2258 const string kTitle(
"My alias file");
2259 string kAliasFileName(tmp_aliasfile.
GetFileName());
2262 ofstream gifile(tmp_gifile.
GetFileName().c_str());
2263 gifile <<
"P01013.1" << endl;
2264 gifile <<
"X65215.1" << endl;
2270 kAliasFileName +=
".pal";
2273 BOOST_REQUIRE(
CFile(kAliasFileName).Exists());
2281 BOOST_CHECK_EQUAL(
"1", alias_file_data.
m_NSeqs.
Get());
2283 BOOST_CHECK_EQUAL(
"232", alias_file_data.
m_Length.
Get());
2296 const string kTitle(
"My alias file");
2298 const unsigned int kNumVols(9);
2299 const string kMyAliasDb(
"nr");
2300 const string kAliasFileName(kMyAliasDb +
".pal");
2306 BOOST_REQUIRE(
CFile(kAliasFileName).Exists());
2331 const string kDbName(
"nr");
2332 const string kTitle(
"My alias file");
2333 string kAliasFileName(tmp_aliasfile.
GetFileName());
2338 kAliasFileName +=
".pal";
2341 BOOST_REQUIRE(
CFile(kAliasFileName).Exists());
2366 const string kTitle(
"My alias file");
2367 const string kMyAliasDb(
"est");
2368 const string kAliasFileName(kMyAliasDb +
".nal");
2370 vector<string> dbs2aggregate;
2371 dbs2aggregate.push_back(
"est_human");
2372 dbs2aggregate.push_back(
"est_others");
2373 dbs2aggregate.push_back(
"est_mouse");
2378 BOOST_REQUIRE(
CFile(kAliasFileName).Exists());
2384 ITERATE(vector<string>, itr, dbs2aggregate) {
2400 const string kTitle(
"My alias file");
2402 const string kAliasFileName(kMyAliasDb +
".pal");
2405 if (
CFile(kAliasFileName).Exists()) {
2408 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2415 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2422 const string kTitle(
"My alias file");
2424 const string kAliasFileName(kMyAliasDb +
".pal");
2427 if (
CFile(kAliasFileName).Exists()) {
2430 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2432 vector<string> dbs2aggregate;
2433 dbs2aggregate.push_back(
"nr");
2434 dbs2aggregate.push_back(
"pataa");
2435 dbs2aggregate.push_back(
"env_nr");
2436 dbs2aggregate.push_back(
"dummy!");
2437 dbs2aggregate.push_back(
"ecoli");
2445 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2451 const string kTitle(
"My alias file");
2452 const string kBlastDb(
"ecoli");
2453 const string kAliasFileName(kBlastDb +
".pal");
2456 if (
CFile(kAliasFileName).Exists()) {
2459 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2466 BOOST_REQUIRE(
CFile(kAliasFileName).Exists() ==
false);
2471 CTmpFile tmp_aliasfile, tmp_gifile;
2472 const string kDbName(
"nr");
2473 const string kTitle(
"My alias file");
2474 string kAliasFileName(tmp_aliasfile.
GetFileName());
2477 ofstream gifile(tmp_gifile.
GetFileName().c_str());
2479 gifile <<
"556" << endl;
2480 gifile <<
"555" << endl;
2484 BOOST_REQUIRE_THROW(
2489 kAliasFileName +=
".pal";
2492 BOOST_REQUIRE(!
CFile(kAliasFileName).Exists());
2501 BOOST_REQUIRE_THROW(
2505 BOOST_REQUIRE(bd.
Empty());
2517 const string kOutput(
"/dev/null");
2519 BOOST_REQUIRE_THROW(
2523 BOOST_REQUIRE(bd.
Empty());
2525 BOOST_REQUIRE(f1.Exists() ==
false);
2526 BOOST_REQUIRE(f2.
Exists() ==
false);
2533 const string kDbName(
"foo");
2539 while (!reader.
AtEOF()) {
2542 BOOST_REQUIRE(se->
IsSeq());
2554 for (
int oid=0; oid<total; oid++)
2556 vector<TTaxId> taxids;
2558 BOOST_REQUIRE(taxids.size() == 1);
2559 BOOST_REQUIRE_EQUAL(
kTaxId, taxids.front());
2568 const string kDbName(
"foo");
2576 while (!reader.
AtEOF()) {
2579 BOOST_REQUIRE(se->
IsSeq());
2591 for (
int oid=0; oid<total; oid++)
2593 vector<TTaxId> taxids;
2595 BOOST_REQUIRE(taxids.size() == 1);
2596 BOOST_REQUIRE_EQUAL(
kTaxId, taxids.front());
2605 const string kDbName(
"foo");
2612 while (!reader.
AtEOF()) {
2615 BOOST_REQUIRE(se->
IsSeq());
2626 for (
int oid=0; oid<total; oid++)
2628 vector<TTaxId> taxids;
2630 BOOST_REQUIRE(taxids.size() == 1);
2631 BOOST_REQUIRE_EQUAL(
kTaxId, taxids.front());
2640 const string kOutput(
"a/b/c/d");
2655 vector<string> ids(1,
"129295");
2659 BOOST_REQUIRE(f1.
Exists() ==
true);
2662 BOOST_REQUIRE(f1.
Exists() ==
false);
2685 vector<string> ids(1,
"129295");
2689 BOOST_REQUIRE(f1.
Exists() ==
true);
2692 BOOST_REQUIRE(f1.
Exists() ==
false);
2700 const string title(
"fuwafuwa");
2720 ids.push_back(
"166225656");
2721 ids.push_back(
"259646160");
2723 bool success = bd->
Build(ids, &fasta_file);
2725 BOOST_REQUIRE(success);
2728 BOOST_REQUIRE(f1.
Exists() ==
true);
2731 BOOST_REQUIRE(f1.
Exists() ==
false);
2739 const string title(
"fuwafuwa");
2760 bool success = bd->
Build(ids, &fasta_file);
2762 BOOST_REQUIRE(success);
2765 BOOST_REQUIRE(f1.
Exists() ==
true);
2768 BOOST_REQUIRE(f1.
Exists() ==
false);
2821 unique_ptr<CObjectIStream> ois
2828 BOOST_REQUIRE(status ==
true);
2831 BOOST_REQUIRE(f1.
Exists() ==
true);
2841 const Int8 big_gi = 0xC0000000;
2855 const int nrecs = 10;
2859 for (
Uint4 i = 0;
i < nrecs; ++
i) {
2864 tidlist.push_back(seqid);
2865 wdb.AddIds(
i, tidlist);
2867 BOOST_FAIL(
"CSeq_id constructor threw exception");
2880 for (
int i = 0;
i < nrecs; ++
i) {
2885 rdb->IdToOid(
GI_TO(
Int8, seqid->GetGi()), oid);
2886 BOOST_REQUIRE(oid ==
i);
2888 BOOST_FAIL(
"CSeq_id constructor threw exception");
2900 string sequence =
"MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER";
2902 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
2922 for (
auto it: fasta_ids) {
2923 ostr <<
">" << it.first << endl << sequence << endl;
2929 BOOST_REQUIRE(istr);
2930 string dbname =
"data/bare_id_test_prot";
2931 string title =
"Temporary unit test db";
2950 for (
auto it: fasta_ids) {
2951 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(index++);
2952 BOOST_REQUIRE_MESSAGE(ids.front()->Which() == it.second,
2953 (
string)
"Sequence id type for " +
2959 BOOST_REQUIRE_EQUAL(index, (
int)fasta_ids.size());
2968 string sequence =
"MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER";
2970 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
2978 auto it = fasta_ids.begin();
2979 ostr <<
">" << it->first <<
" Some defline";
2981 for (; it != fasta_ids.end(); ++it) {
2982 ostr <<
'\01' << it->first <<
" Some defline";
2984 ostr << endl << sequence << endl;
2989 BOOST_REQUIRE(istr);
2990 string dbname =
"data/bare_id_test_prot2";
2991 string title =
"Temporary unit test db";
3008 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(0);
3009 BOOST_REQUIRE_EQUAL(ids.size(), fasta_ids.size());
3011 auto seqdb_id = ids.begin();
3012 for (
auto it: fasta_ids) {
3013 BOOST_REQUIRE_MESSAGE((*seqdb_id)->Which() == it.second,
3014 (
string)
"Sequence id type for " +
3021 BOOST_REQUIRE(seqdb_id == ids.end());
3030 string sequence =
"AACTAGTATTAGAGGCACTGCCTGCCCAGTGACAATCGTTAAACGGCCG";
3032 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
3044 for (
auto it: fasta_ids) {
3045 ostr <<
">" << it.first << endl << sequence << endl;
3051 BOOST_REQUIRE(istr);
3052 string dbname =
"data/bare_id_test_nucl";
3053 string title =
"Temporary unit test db";
3072 for (
auto it: fasta_ids) {
3073 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(index++);
3074 BOOST_REQUIRE_MESSAGE(ids.front()->Which() == it.second,
3075 (
string)
"Sequence id type for " +
3081 BOOST_REQUIRE_EQUAL(index, (
int)fasta_ids.size());
3090 string sequence =
"MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER";
3092 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
3113 for (
auto it: fasta_ids) {
3114 ostr <<
">" << it.first << endl << sequence << endl;
3120 BOOST_REQUIRE(istr);
3121 string dbname =
"data/bare_id_test_prot_legacy";
3122 string title =
"Temporary unit test db";
3141 for (
auto it: fasta_ids) {
3142 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(index++);
3143 BOOST_REQUIRE_MESSAGE(ids.front()->Which() == it.second,
3144 (
string)
"Sequence id type for " +
3150 BOOST_REQUIRE_EQUAL(index, (
int)fasta_ids.size());
3159 string sequence =
"MASTQNIVEEVQKMLDTYDTNKDGEITKAEAVEYFKGKKAFNPER";
3161 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
3169 auto it = fasta_ids.begin();
3170 ostr <<
">" << it->first <<
" Some defline";
3172 for (; it != fasta_ids.end(); ++it) {
3173 ostr <<
'\01' << it->first <<
" Some defline";
3175 ostr << endl << sequence << endl;
3180 BOOST_REQUIRE(istr);
3181 string dbname =
"data/bare_id_test_legacy_prot2";
3182 string title =
"Temporary unit test db";
3199 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(0);
3200 BOOST_REQUIRE_EQUAL(ids.size(), fasta_ids.size());
3202 auto seqdb_id = ids.begin();
3203 for (
auto it: fasta_ids) {
3204 BOOST_REQUIRE_MESSAGE((*seqdb_id)->Which() == it.second,
3205 (
string)
"Sequence id type for " +
3212 BOOST_REQUIRE(seqdb_id == ids.end());
3221 string sequence =
"AACTAGTATTAGAGGCACTGCCTGCCCAGTGACAATCGTTAAACGGCCG";
3223 std::unordered_map<string, CSeq_id::E_Choice> fasta_ids = {
3231 for (
auto it: fasta_ids) {
3232 ostr <<
">" << it.first << endl << sequence << endl;
3238 BOOST_REQUIRE(istr);
3239 string dbname =
"data/bare_id_test_nucl_legacy";
3240 string title =
"Temporary unit test db";
3259 for (
auto it: fasta_ids) {
3260 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(index++);
3261 BOOST_REQUIRE_EQUAL(ids.front()->Which(), it.second);
3263 BOOST_REQUIRE_EQUAL(index, (
int)fasta_ids.size());
3269 vector<string> idlist;
3273 if ( !line.empty() ) {
3274 idlist.push_back(line);
3279 const string kTitle(
"Unit Test Seqidlist");
3280 const size_t num_of_ids = 12;
3284 vector<CSeqDBGiList::SSiOid> read_idlist;
3288 BOOST_REQUIRE_EQUAL(num_of_ids, list_info.
num_ids);
3289 BOOST_REQUIRE_EQUAL(num_of_ids, read_idlist.size());
3292 BOOST_REQUIRE_EQUAL(read_idlist[2].
si,
"D88758.1");
3293 BOOST_REQUIRE_EQUAL(read_idlist[7].
si,
"SRA:SRR066117.18823.2");
3294 BOOST_REQUIRE_EQUAL(read_idlist[11].
si,
"u00001.1");
3298 const string kTitle(
"Unit Test Seqidlist w DB");
3299 const size_t num_of_ids = 9;
3303 vector<CSeqDBGiList::SSiOid> read_idlist;
3308 BOOST_REQUIRE_EQUAL(num_of_ids, list_info.
num_ids);
3309 BOOST_REQUIRE_EQUAL(num_of_ids, read_idlist.size());
3313 BOOST_REQUIRE_EQUAL(read_idlist[2].
si,
"D88758.1");
3314 BOOST_REQUIRE_EQUAL(read_idlist[6].
si,
"U00001.1");
3320 const string kTitle(
"Unit Test Seqidlist Duplicate");
3321 const size_t num_of_ids = 12;
3325 vector<string> dup_list;
3326 dup_list.insert(dup_list.begin(), idlist.begin(), idlist.end());
3327 dup_list.insert(dup_list.end(), idlist.begin(), idlist.end());
3328 vector<CSeqDBGiList::SSiOid> read_idlist;
3332 BOOST_REQUIRE_EQUAL(num_of_ids, list_info.
num_ids);
3333 BOOST_REQUIRE_EQUAL(num_of_ids, read_idlist.size());
3336 BOOST_REQUIRE_EQUAL(read_idlist[2].
si,
"D88758.1");
3337 BOOST_REQUIRE_EQUAL(read_idlist[7].
si,
"SRA:SRR066117.18823.2");
3338 BOOST_REQUIRE_EQUAL(read_idlist[11].
si,
"u00001.1");
3344 static const int num_ids = 7;
3345 pair <string, CSeq_id::E_Choice> fasta_ids[num_ids] = {
3355 string dbname =
"data/multiseqids";
3356 string title =
"Temporary unit test db";
3375 list< CRef<CSeq_id> > ids = seqdb.
GetSeqIDs(0);
3376 BOOST_REQUIRE_EQUAL(ids.size(), num_ids);
3378 auto seqdb_id = ids.begin();
3379 for (
auto it: fasta_ids) {
3380 BOOST_REQUIRE_EQUAL((*seqdb_id)->Which(),it.second);
3381 BOOST_REQUIRE_EQUAL((*seqdb_id)->GetSeqIdString(
true),it.first);
3389 string dbname =
"data/pdbs_v5";
3390 string title =
"Temporary unit test db";
3401 vector<string> db_ids;
3402 vector<int> db_oids;
3406 list<CRef<CSeq_id> > seq_ids = seqdb.
GetSeqIDs(oid);
3413 vector<string> ref_ids;
3415 while (getline(ref_ids_file, line)) {
3416 ref_ids.push_back(line);
3418 vector<blastdb::TOid> oids;
3421 ITERATE(vector<blastdb::TOid>, itr, oids){
3441 string dbname =
"data/asn1_v5";
3442 string title =
"Temporary unit test db";
3456 vector<string> db_ids;
3457 vector<int> db_oids;
3461 list<CRef<CSeq_id> > seq_ids = seqdb.
GetSeqIDs(oid);
3468 BOOST_REQUIRE_EQUAL(oid, num_oids);
3470 vector<string> ref_ids;
3472 while (getline(ref_ids_file, line)) {
3473 ref_ids.push_back(line);
3475 vector<blastdb::TOid> oids;
3478 for(
unsigned int i=0;
i <oids.size();
i++){
3479 BOOST_REQUIRE_EQUAL(oids[
i],
i);
3513 const int kNumOfDeflines=4;
3514 string dbname=
"limit_df";
3519 char seq[9]={1,2,3,4,1,2,3,4,
'\0'};
3520 for(
unsigned int i=0;
i < kNumOfDeflines;
i++){
3530 static const int num_taxids[kNumOfDeflines] = {14, 107, 1, 45};
3531 static const int num_deflines[kNumOfDeflines] = {11, 107, 6, 43};
3533 for(
unsigned int i=0;
i < kNumOfDeflines;
i++){
3537 BOOST_REQUIRE_EQUAL(num_taxids[
i],
t.size());
3538 BOOST_REQUIRE_EQUAL(num_deflines[
i], new_set->
Set().size());
Code to build a database given various sources of sequence data.
Binary GI or TI List Builder.
void Write(const string &fname)
Write the list to a file.
void AppendId(const Int8 &id)
Add an identifier to the list.
`Blob' Class for SeqDB (and WriteDB).
@ eNone
Write the string as-is.
int WriteString(CTempString str, EStringFormat fmt)
Write string data to the blob.
static int GetSeqidlist(CMemoryFile &file, vector< CSeqDBGiList::SSiOid > &idlist, SBlastSeqIdListInfo &list_info)
Get seqidlist from dbv5 seqidlist file.
Build BlastDB format databases from various data sources.
bool AddSequences(IBioseqSource &src, bool add_pig=false)
Add sequences from an IBioseqSource object.
bool AddFasta(CNcbiIstream &fasta_file)
Add sequences from a file containing FASTA data.
void SetUseRemote(bool use_remote)
Specify whether to use remote fetching for locally absent IDs.
void SetSourceDb(const string &src_db_name)
Specify source database(s) via the database name(s).
void SetTaxids(CTaxIdSet &taxids)
Specify a mapping of sequence ids to taxonomic ids.
bool Build(const vector< string > &ids, CNcbiIstream *fasta_file)
Build the database.
void StartBuild()
Start building a new database.
bool EndBuild(bool erase=false)
Finish building a new database.
bool AddIds(const vector< string > &ids)
Add the specified sequences from the source database.
Base class for reading FASTA sequences.
Registry class for the sequence masking/filtering algorithms used to create masks to be added to a CW...
This represents a set of masks for a given sequence.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
bool OidToPig(int oid, int &pig) const
Translate an OID to a PIG.
void GetGis(int oid, vector< TGi > &gis, bool append=false) const
Gets a list of GIs for an OID.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
bool OidToGi(int oid, TGi &gi) const
Translate an OID to a GI.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx=true) const
Returns the sum of the sequence lengths.
string GetTitle() const
Returns the database title.
int GetNumSeqs() const
Returns the number of sequences available.
void GetAllTaxIDs(int oid, set< TTaxId > &taxids) const
Get all tax ids for an oid.
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
CRef< CBioseq > GiToBioseq(TGi gi) const
Get a CBioseq for a given GI.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids) const
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
CSeqEntryGetSource(CRef< CSeq_entry > seq_entry)
CTypeIterator< CBioseq > m_Bioseq
CRef< CSeq_entry > m_entry
CRef< CObjectManager > m_objmgr
virtual CConstRef< CBioseq > GetNext()
Get a Bioseq object if there are any more to get.
Simple implementation of ILineReader for i(o)streams.
void FixTaxId(CRef< objects::CBlast_def_line_set > deflines)
Check that each defline has the specified taxid; if not, replace the defline and set the taxid.
void SetMappingFromFile(CNcbiIstream &f)
Builder for BlastDb format column files.
void ListFiles(vector< string > &files) const
List Filenames.
vector< CRef< CSeq_id > > TIdList
Type used for lists of sequence identifiers.
@ eProtein
Protein database.
@ eNucleotide
Nucleotide database.
void ListFiles(vector< string > &files)
List Filenames.
void SetPig(int pig)
Set the PIG to be used for the sequence.
void AddSequence(const CBioseq &bs)
Add a sequence as a CBioseq.
void SetMaxVolumeLetters(Uint8 letters)
Set maximum letters for output volumes.
EIndexType
Whether and what kind of indices to build.
@ eFullIndex
Use several forms of each Seq-id in the string index.
@ eAddHash
Add an index from sequence hash to OID.
@ eDefault
Like eFullIndex but also build a numeric Trace ID index.
@ eNoIndex
Build a database without any indices.
@ eFullWithTrace
Like eFullIndex but also build a numeric Trace ID index.
void ListVolumes(vector< string > &vols)
List Volumes.
static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)
Extract Deflines From Bioseq.
void SetDeflines(const CBlast_def_line_set &deflines)
Set the deflines to be used for the sequence.
void Close()
Close the Database.
Interface to a source of Bioseq objects.
iterator_bool insert(const value_type &val)
static const char si[8][64]
SStaticPair< const char *, const char * > TPair
std::ofstream out("events_result.xml")
main entry point for tests
Operators to edit gaps in sequences.
#define GI_FROM(T, value)
CNcbiEnvironment & SetEnvironment(void)
Get a non-const copy of the application's cached environment.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
CNcbiIstream & AsInputFile(EIfExists if_exists, IOS_BASE::openmode mode=IOS_BASE::in)
Create I/O stream on the base of our file.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
CNcbiOstream & AsOutputFile(EIfExists if_exists, IOS_BASE::openmode mode=IOS_BASE::out)
static void Add(const string &path)
Add the name of a dir entry; it will be deleted on (normal) exit.
Int8 GetFileSize(void) const
Get length of the mapped file.
const string & GetFileName(void) const
Return used file name (generated or given in the constructor).
virtual bool Exists(void) const
Check existence of file.
@ eIfExists_Throw
You can make call of AsInputFile/AsOutputFile only once, on each following call throws CFileException...
@ eIfExists_Reset
Delete previous stream and return reference to new object.
@ eOnlyEmpty
Directory entry only, no other files or subdirectories.
#define MSerial_AsnText
I/O stream manipulators –.
@ eSerial_AsnText
ASN.1 text.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
long TFlags
binary OR of EFlags
EFlags
Note on fAllSeqIds: some databases (notably nr) have merged identical sequences, joining their deflin...
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
@ fAssumeProt
Assume prots unless accns indicate otherwise.
TGi FindGi(const container &ids)
Return gi from id list if exists, return 0 otherwise.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
position_type GetToOpen(void) const
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
@ eCurrent
Use current time. See also CCurrentTime.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
Tdata & Set(void)
Assign a value to data member.
const Tdata & Get(void) const
Get the member data.
@ eBlast_filter_program_dust
@ eBlast_filter_program_max
@ eBlast_filter_program_repeat
@ eBlast_filter_program_seg
@ eBlast_filter_program_windowmasker
@ eBlast_filter_program_other
TFrom GetFrom(void) const
Get the From member data.
TGi GetGi(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
bool IsSeq(void) const
Check if variant Seq is selected.
TSeq & SetSeq(void)
Select the variant.
const TId & GetId(void) const
Get the Id member data.
void SetInst(TInst &value)
Assign a value to Inst data member.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
const std::string kOutput
Command line flag to specify the output.
double f(double x_, const double &y_)
static const char * prefix[]
static pcre_uint8 * buffer
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
const blastdb::TOid kSeqDBEntryNotFound
Defines `expert' version of CSeqDB interfaces.
ISAM index database access object.
int WriteBlastSeqidlistFile(const vector< string > &idlist, CNcbiOstream &os, const string &title, const CSeqDB *seqdb=NULL)
static const char * str(char *buf, int n)
Encapsulates the alias' file key-value pair.
Value(const string &name)
void Set(const string &v)
Auxiliary class to parse the contents of an alias file.
void x_Parse(const string &fname)
Parse the alias file's contents.
bool x_HasKeyword(string line, Value &data)
SAliasFileData(const string &fname)
Structure describing filtered regions created using a particular sequence filtering algorithm.
Blast DB v5 seqid list info.
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Utility stuff for more convenient using of Boost.Test library.
static const string kTitle
CTraceGlyph inline method implementation.
static bool ambig(char c)
Defines BLAST database construction classes.
void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title=string(), EAliasFileFilterType alias_type=eGiList)
Writes an alias file that restricts a database with a gi list.
@ eSeqIdList
Filter a BLAST database via a Seq-id list.
Code for database isam construction.
string s_HexDumpFile(const string &fname, const vector< int > &layout, int base)
void RegisterTooManyVariantsOfSameMaskingAlgorithm(EBlast_filter_program masking_algo, size_t kMaxNumSupportedAlgorithmVariants)
CRef< CSeq_id > s_GiToSeqId(TGi gi)
static void s_DupIdsBioseq(CWriteDB &w, CSeqDB &s, const TIdList &ids, int cutpoint)
vector< CRef< CSeq_id > > TIdList
void s_Unstringify(const string &s, ASNOBJ &a)
CRef< CBioseq > s_FastaStringToBioseq(const string &str, bool protein)
void s_CheckFiles(const vector< string > &files, bool need_hash=false)
void s_Stringify(const ASNOBJ &a, string &s)
static void s_DupIdsRaw(CWriteDB &w, CSeqDBExpert &seqdb, const TIdList &ids)
void s_WrapUpColumn(CWriteDB_ColumnBuilder &cb)
static CRef< CScope > s_GetScope()
void s_WrapUpDb(CWriteDB &db)
BOOST_AUTO_TEST_CASE(NuclBioseqDupI)
string s_ExtractLast(const string &data, const string &delim)
#define BOOST_REQUIRE_CUTPOINT(X)
void s_RemoveFile(const string &f)
void s_TestDatabase(CSeqDBExpert &src, const string &name, const string &title)
static void s_DupSequencesTest(const TIdList &ids, bool is_protein, bool raw_data, const string &src_name, const string &dst_name, const string &title, int cutpoint=99)
string s_HexDumpText(const string &raw, const vector< int > &layout, int base)
CRef< ASNOBJ > s_Duplicate(const ASNOBJ &a)
void s_RemoveFiles(const vector< string > &files)
void s_CheckSorted(const string &fname)
CRef< CSeq_id > s_AccToSeqId(const char *acc)
void s_TestReadPDBAsn1(CNcbiIfstream &istr, CNcbiIfstream &ref_ids_file, int num_oids)
void s_WrapUpFiles(const vector< string > &files)
static void s_BuildIds(TIdList &ids, TGi *gis)
void s_FetchRawData(CSeqDBExpert &seqdb, int oid, string &sequence, string &ambig)
static void s_NuclBioseqDupSwitch(int cutpoint)
void s_CompareBioseqs(CBioseq &src, CBioseq &dst)