75 "Invalid sequence type specified.");
122 if (prot_nucl ==
'-') {
164 "Database name is required.");
186 "Database name is required.");
189 const bool kUseAtlasLock =
true;
209 "Database name is required.");
235 "Database name is required.");
292 "Database name is required.");
306 const bool kUseAtlasLock =
true;
329 "Database name is required.");
332 const bool kUseAtlasLock =
true;
353 "Database name is required.");
356 const bool kUseAtlasLock =
true;
380 "Database name is required.");
383 const bool kUseAtlasLock =
true;
438 "Internal sequence type is not valid.");
447 TmpMap gi_to_taxid_tmp;
452 ITERATE ( TmpMap, it, gi_to_taxid_tmp ) {
453 gi_to_taxid[it->first] = it->second;
459 vector<TTaxId> & taxids,
481 TmpMap gi_to_taxid_set_tmp;
484 gi_to_taxid_set.clear();
486 ITERATE ( TmpMap, it, gi_to_taxid_set_tmp ) {
487 gi_to_taxid_set[it->first] = it->second;
494 vector<TTaxId>& taxids,
574 int end_offset)
const
602 "Invalid allocation strategy specified.");
623 "Invalid allocation strategy specified.");
646 string fmt =
"b d, Y H:m P";
649 ITERATE(vector<string>, vol, vols) {
651 ifstream
f(fn.c_str(),
ios::in|ios::binary);
654 f.seekg(8, ios::beg);
661 CTime d(
string(date), fmt);
662 if (retv.
IsEmpty() || d > retv) {
913 if (! oids.empty()) {
1030 if (! oids.empty()) {
1042 vector<string> & paths,
1043 vector<string> * alias_paths,
1078 list< CRef<CSeq_id> > seqids =
GetSeqIDs(oid);
1085 if ((**seqid).IsGi()) {
1086 gis.push_back((**seqid).GetGi());
1112 Uint8 * total_length,
1113 bool use_approx)
const
1153 if (
range.NotEmpty()) {
1161 raw.assign(
buffer, length);
1175 if (code_from == coding) {
1189 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1190 (!defined(NCBI_COMPILER_MIPSPRO)) )
1215 const string & volname)
1244 vector<int> invalid_algo_ids, available_algo_ids;
1246 invalid_algo_ids.reserve(algorithm_ids.size());
1247 if (available_algo_ids.empty()) {
1248 copy(algorithm_ids.begin(), algorithm_ids.end(),
1249 back_inserter(invalid_algo_ids));
1250 return invalid_algo_ids;
1253 ITERATE(vector<int>, itr, algorithm_ids) {
1254 vector<int>::const_iterator pos = find(available_algo_ids.begin(),
1255 available_algo_ids.end(), *itr);
1256 if (pos == available_algo_ids.end()) {
1257 invalid_algo_ids.push_back(*itr);
1260 return invalid_algo_ids;
1265 string & program_name,
1278 string & program_name,
1330 string retval(
"Unknown");
1332 case eProtein: retval.assign(
"Protein");
break;
1333 case eNucleotide: retval.assign(
"Nucleotide");
break;
1354 const string& extn = de.
GetPath().substr(de.
GetPath().length() - 3, 1);
1360 oss <<
"\"" <<
value.m_BlastDbName <<
"\"";
1362 value.m_MoleculeType =
1374 string alias = retval +
".nal", index = retval +
".nin";
1377 string alias = retval +
".pal", index = retval +
".pin";
1400 for (
size_t i = 0;
i < finder.
m_DBs.size();
i++) {
1402 if (path[path.size()-1] !=
'l') {
1410 while (getline(
in, line)) {
1412 vector<string> tokens;
1414 for (
size_t j = 1; j < tokens.size(); j++) {
1415 dbs2remove.
insert(tokens[j]);
1424 finder.
m_DBs.end());
1428 vector<SSeqDBInitInfo>
1430 bool include_alias_files ,
1431 bool remove_redundant_dbs )
1434 vector<string> fmasks, dmasks;
1438 if (dbtype !=
"nucl") {
1439 fmasks.push_back(
"*.pin");
1440 if (include_alias_files) {
1441 fmasks.push_back(
"*.pal");
1444 if (dbtype !=
"prot") {
1445 fmasks.push_back(
"*.nin");
1446 if (include_alias_files) {
1447 fmasks.push_back(
"*.nal");
1450 dmasks.push_back(
"*");
1457 if (remove_redundant_dbs) {
1461 return dbfinder.
m_DBs;
1466 vector<string> paths;
1472 vector<string> extn;
1475 string blastdb_dirname;
1477 ITERATE(vector<string>, path, paths) {
1478 ITERATE(vector<string>, ext, extn) {
1480 if (
file.Exists()) {
1485 blastdb_dirname =
file.GetDir();
1495 if (paths.size() > 1) {
1496 _ASSERT( !blastdb_dirname.empty() );
1498 vector<string> dblist;
1500 if (dblist.size() > 1) {
1502 oss <<
"Cannot compute disk usage for multiple BLASTDBs (i.e.: '"
1503 <<
dbname <<
"') at once. Please try again using one BLASTDB "
1508 for (
const auto& ext: extn) {
1510 if (
file.Exists()) {
1537 _ASSERT(
"Unknown molecule for BLAST DB" != 0);
1544 int num_files_removed = 0;
1545 vector<string> db_files, alias_files;
1548 vector<string> extn;
1550 vector<string> lmdb_extn;
1554 oss << dbpath <<
"." << *
lmdb;
1556 if (
CFile(fname).Remove()) {
1558 num_files_removed++;
1561 unsigned int index = 0;
1564 while (
CFile(vfname).Remove()) {
1575 ITERATE(vector<string>,
f, db_files) {
1576 ITERATE(vector<string>, e, extn) {
1578 oss << *
f <<
"." << *e;
1580 if (
CFile(fname).Remove()) {
1582 num_files_removed++;
1586 ITERATE(vector<string>,
f, alias_files) {
1589 num_files_removed++;
1592 return static_cast<bool>(num_files_removed != 0);
1612 vector<string> paths;
1613 vector<string> alias;
1621 ITERATE(vector<string>,
a, alias) {
1624 string fn = user_path + af.
GetName();
1625 db_files.push_back(fn);
1635 vector<string> extn;
1639 const string kExtnMol(1, is_protein ?
'p' :
'n');
1640 const string index_ext = kExtnMol +
"in";
1641 const string seq_ext = kExtnMol +
"sq";
1643 ITERATE(vector<string>, path, paths) {
1644 ITERATE(vector<string>, ext, extn) {
1646 if (
file.Exists()) {
1647 string f = user_path +
file.GetName();
1648 db_files.push_back(
f);
1651 disk_bytes += length;
1652 if((*ext == index_ext) || (*ext == seq_ext)) {
1653 cached_bytes += length;
1664 vector<string> lmdb_list;
1667 ITERATE(vector<string>,
l, lmdb_list) {
1669 if (
file.Exists()) {
1670 string f = user_path +
file.GetName();
1671 db_files.push_back(
f);
1674 disk_bytes += length;
1678 static const char * v5_exts[]={
"os",
"ot",
"tf",
"to",
NULL};
1679 for(
const char ** p=v5_exts; *p !=
NULL; p++) {
1680 CFile v(
file.GetDir() +
file.GetBase() +
"." + kExtnMol + (*p));
1682 string vf = user_path + v.
GetName();
1683 db_files.push_back(vf);
1702 sort(db_files.begin(), db_files.end());
1709 Uint8 total_length = 0;
1712 vector<string> dblist;
1716 if (off != string::npos ) {
1717 (*itr).erase(0, off+1);
1722 m->SetDbname(dbnames);
1727 m->SetNumber_of_letters(total_length);
1728 m->SetNumber_of_sequences(num_seqs);
1731 string fmt =
"b d, Y H:m P";
1733 m->SetLast_updated(date.
AsString(timeFmt));
1735 Int8 disk_bytes(0), cached_bytes(0);
1737 m->SetBytes_total(disk_bytes);
1738 m->SetBytes_to_cache(cached_bytes);
1745 if((tax_ids.
size() > 1) || ((tax_ids.
size() == 1) && (0 != *tax_ids.
begin()))){
1746 m->SetNumber_of_taxids(
static_cast<int>(tax_ids.
size()));
void remove_if(Container &c, Predicate *__pred)
`Blob' Class for SeqDB (and WriteDB).
Functor class for FindFilesInDir.
void operator()(CDirEntry &de)
vector< SSeqDBInitInfo > m_DBs
string GetFileName(size_t idx)
Auxiliary function to get the original file name found by this object.
void SetFrame(const string &frame)
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
static const string GenerateSearchPath()
Generate search path.
SeqDB ID list for performing boolean set operations.
bool Blank() const
Check if an ID list is blank.
bool IsPositive()
Checks whether a positive GI list was produced.
CRef< CSeqDBNegativeList > GetNegativeList()
Retrieve a negative GI list.
CRef< CSeqDBGiList > GetPositiveList()
Retrieve a positive GI list.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist)
Get gi to taxid map for an OID.
void GetDBTaxIds(set< TTaxId > &tax_ids)
Get all unique tax ids from db.
char GetSeqType() const
Get the sequence type.
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the OID corresponding to the offset given in residues, into the database as a whole.
int GetMinLength() const
Returns the length of the smallest sequence in the database.
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist)
Get gi to taxid map for an OID.
void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids)
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
int GetMaskAlgorithmId(const string &algo_name)
Get the numeric ID for a algorithm name.
int GetColumnId(const string &title)
Get an ID number for a given column title.
bool GiToOidwFilterCheck(TGi gi, int &oid)
GiToOis is meant to simply return oid for a gi if one exisits This method finds the oid and checks if...
void GetColumnBlob(int col_id, int oid, bool keep, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
void GetMaskAlgorithmDetails(int algorithm_id, string &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Invoke the garbage collector to free up memory.
TGi GetSeqGI(int oid)
Look up for the GI of a sequence.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv)
Get Oid list for input tax ids.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
void GetAllTaxIDs(int oid, set< TTaxId > &taxids)
Get all tax ids (leaf and non-leaf for an oid.
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void GetLMDBFileNames(vector< string > &lmdb_list) const
const string & GetDBNameList() const
Get list of database names.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi, const CSeq_id *target_seq_id, bool seqdata)
Get a CBioseq for a sequence.
static void FindVolumePaths(const string &dbname, char prot_nucl, vector< string > &paths, vector< string > *alias_paths, bool recursive, bool expand_links)
Find volume paths.
void ListColumns(vector< string > &titles)
List columns titles found in this database.
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
bool OidToPig(int oid, int &pig) const
Translate a PIG to an OID.
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
Uint8 GetExactTotalLength()
Returns the exact sum of the lengths of all available sequences.
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
int GetSequence(int oid, const char **buffer) const
Get the sequence data for a sequence.
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
string GetAvailableMaskAlgorithmDescriptions()
Returns a formatted string with the list of available masking algorithms in this database for display...
bool CheckOrFindOID(int &next_oid)
Find an included OID, incrementing next_oid if necessary.
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids, bool multi)
Translate a CSeq-id to a list of OIDs.
int GetNumSeqsStats() const
Returns the number of sequences available.
int GetAmbigSeq(int oid, char **buffer, int nucl_code, SSeqDBSlice *region, ESeqDBAllocType strategy, CSeqDB::TSequenceRanges *masks=NULL) const
Get a pointer to a range of sequence data with ambiguities.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
int GetSeqLengthApprox(int oid) const
Get the approximate sequence length.
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids)
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx)
Returns the sum of the sequence lengths.
const CSeqDBGiList * GetGiList() const
Get GI list attached to this database.
void GetMaskData(int oid, int algo_id, CSeqDB::TSequenceRanges &ranges)
Get masked ranges of a sequence.
bool TiToOid(Int8 ti, int &oid)
Translate a TI to an OID.
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
void SetVolsMemBit(int mbit)
Set the membership bit of all volumes.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
list< CRef< CSeq_id > > GetSeqIDs(int oid)
Gets a list of sequence identifiers.
void AccessionToOids(const string &acc, vector< int > &oids)
Find OIDs matching the specified string.
CRef< CBlast_def_line_set > GetHdr(int oid)
Get the sequence header data.
int GetNumSeqs() const
Returns the number of sequences available.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
CSeqDB::EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state)
Return a chunk of OIDs, and update the OID bookmark.
string GetDate() const
Returns the construction date of the database.
string GetTitle() const
Returns the database title.
void ResetInternalChunkBookmark()
Restart chunk iteration at the beginning of the database.
int GetSeqLength(int oid) const
Get the sequence length.
CSeqDBIdSet GetIdSet()
Get IdSet list attached to this database.
bool OidToGi(int oid, TGi &gi)
Translate a GI to an OID.
CSeqDBIter & operator++()
Increment operator.
int m_Length
The length of this OID.
const CSeqDB * m_DB
The CSeqDB object which this object iterates over.
CSeqDBIter & operator=(const CSeqDBIter &)
Copy one iterator to another.
void x_RetSeq()
Release hold on current sequence.
const char * m_Data
The sequence data for this OID.
int m_OID
The OID this iterator is currently accessing.
void x_GetSeq()
Get data pointer and length for the current sequence.
CSeqDBIter(const CSeqDBIter &)
Construct one iterator from another.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
int GetMinLength() const
Returns the length of the shortest sequence in the database.
void GetDBTaxIds(set< TTaxId > &tax_ids) const
Get all unique tax ids from db.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
bool OidToPig(int oid, int &pig) const
Translate an OID to a PIG.
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
Get Oid list for input tax ids.
bool GiToOidwFilterCheck(TGi gi, int &oid) const
Translate a GI To an OID with filter check.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
static string ESeqType2String(ESeqType type)
Converts a CSeqDB sequence type into a human readable string.
void GetGis(int oid, vector< TGi > &gis, bool append=false) const
Gets a list of GIs for an OID.
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
void GetSequenceAsString(int oid, CSeqUtil::ECoding coding, string &output, TSeqRange range=TSeqRange()) const
Get a sequence in a given encoding.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
static string GenerateSearchPath()
Returns the default BLAST database search path configured for this local installation of BLAST.
TGi GetSeqGI(int oid) const
Returns the first Gi (if any) of the sequence.
vector< int > ValidateMaskAlgorithms(const vector< int > &algorithm_ids)
Validates the algorithm IDs passed to this function, returning a vector of those algorithm IDs not pr...
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
bool OidToGi(int oid, TGi &gi) const
Translate an OID to a GI.
const string & GetDBNameList() const
Get list of database names.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Int8 GetDiskUsage() const
Retrieve the disk usage in bytes for this BLAST database.
string GetAvailableMaskAlgorithmDescriptions()
Returns a formatted string with the list of available masking algorithms in this database for display...
void ResetInternalChunkBookmark()
Resets this object's internal chunk bookmark, which is used when the oid_state argument to GetNextOID...
EOidListType
Indicates how block of OIDs was returned.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
bool GiToPig(TGi gi, int &pig) const
Translate a GI to a PIG.
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
void RemoveOffsetRanges(int oid)
Remove any offset ranges for the given OID.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
bool PigToGi(int pig, TGi &gi) const
Translate a PIG to a GI.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
const CSeqDBGiList * GetGiList() const
Get GI list attached to this database.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the sequence closest to the given offset into the database.
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *partial_ranges, TSequenceRanges *masks=NULL) const
CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence without sequence data.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
void SetVolsMemBit(int mbit)
Set the membership of all volumes.
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const
void GetMaskAlgorithmDetails(int algorithm_id, objects::EBlast_filter_program &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx=true) const
Returns the sum of the sequence lengths.
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
string GetTitle() const
Returns the database title.
int GetNumSeqs() const
Returns the number of sequences available.
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
Get all tax ids for a seq id.
EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state=NULL)
Return a chunk of OIDs, and update the OID bookmark.
void x_GetDBFilesMetaData(Int8 &disk_bytes, Int8 &cached_bytes, vector< string > &db_files, const string &user_path) const
CRef< CBlast_db_metadata > GetDBMetaData(string user_path=kEmptyStr)
void GetAllTaxIDs(int oid, set< TTaxId > &taxids) const
Get all tax ids for an oid.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
void ListColumns(vector< string > &titles)
List columns titles found in this database.
void GetTaxIdsForAccession(const string &accs, vector< TTaxId > &taxids)
Get all tax ids for an accessions.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
string GetDate() const
Returns the construction date of the database.
int GetNumSeqsStats() const
Returns the number of sequences available.
ESummaryType
Types of summary information available.
@ eFilteredAll
Values from alias files, or summation over all included sequences.
int GetColumnId(const string &title)
Get an ID number for a given column title.
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids) const
Translate a Seq-id to a list of OIDs.
int GetMaskAlgorithmId(const string &algo_name) const
Get the numeric algorithm ID for a string.
int GetAmbigSeqAlloc(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *masks=NULL) const
Get a pointer to sequence data with ambiguities.
static const string kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
bool TiToOid(Int8 ti, int &oid) const
Translate a TI to an OID.
CSeqDBIter Begin() const
Returns a sequence iterator.
const string & GetColumnValue(int column_id, const string &key)
Look up the value for a specific column metadata key.
static const char * kBlastDbDateFormat
Format string for the date returned by CSeqDB::GetDate.
CRef< CBioseq > GiToBioseq(TGi gi) const
Get a CBioseq for a given GI.
class CSeqDBImpl * m_Impl
Implementation details are hidden. (See seqdbimpl.hpp).
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
Uint8 GetExactTotalLength()
Returns the exact sum of the lengths of all available sequences.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Dump debug information for this object.
void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids) const
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
int GetSeqLengthApprox(int oid) const
Returns an unbiased, approximate sequence length.
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
CSeqDB()
No-argument Constructor.
CRef< CBioseq > PigToBioseq(int pig) const
Get a CBioseq for a given PIG.
int GetAmbigSeq(int oid, const char **buffer, int nucl_code) const
Get a pointer to sequence data with ambiguities.
void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)
Get masked ranges of a sequence.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
CSeqDBIdSet GetIdSet() const
Get IdSet list attached to this database.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist=false) const
Get taxid for an OID.
Functor object for s_RemoveAliasComponents where the path name is matched in SSeqDBInitInfo.
PathFinder(const string &p)
bool operator()(const SSeqDBInitInfo &value) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
static unsigned char depth[2 *(256+1+29)+1]
static void DLIST_NAME() append(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static SQLCHAR output[256]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Trace(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
void FindFilesInDir(const CDir &dir, const vector< string > &masks, const vector< string > &masks_subdir, TFindFunc &find_func, TFindFiles flags=fFF_Default)
Find files in the specified directory.
Int8 GetLength(void) const
Get size of file.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
EFindFiles
File finding flags.
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
string GetName(void) const
Get the base entry name with extension (if any).
const string & GetPath(void) const
Get entry path.
virtual bool Exists(void) const
Check existence of file.
@ fFF_Recursive
descend into sub-dirs
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
@ eNocase
Case insensitive compare.
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
bool IsEmpty(void) const
Is time object empty (date and time)?
static CTimeFormat GetPredefined(EPredefined fmt, TFlags flags=fDefault)
Get predefined format.
@ eISO8601_DateTimeSec
Y-M-DTh:m:s (eg 1997-07-16T19:20:30)
strategy
Block allocation strategies.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
<lmdb++.h> - C++11 wrapper for LMDB.
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
CSeqDB::ESeqType ParseMoleculeTypeString(const string &s)
Convert a string to a CSeqDB ESeqType object.
static char s_GetSeqTypeChar(CSeqDB::ESeqType seqtype)
Helper function to translate enumerated type to character.
static CSeqDBImpl * s_SeqDBInit(const string &dbname, char prot_nucl, int oid_begin, int oid_end, bool use_atlas_lock, CSeqDBGiList *gi_list=NULL, CSeqDBNegativeList *neg_list=NULL, CSeqDBIdSet idset=CSeqDBIdSet())
Helper function to build private implementation object.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
vector< SSeqDBInitInfo > FindBlastDBs(const string &path, const string &dbtype, bool recurse, bool include_alias_files, bool remove_redundant_dbs)
Find BLAST DBs in the directory specified.
static void s_RemoveAliasComponents(CBlastDbFinder &finder)
Defines BLAST database access classes.
ESeqDBAllocType
Certain methods have an "Alloc" version.
void SeqDB_GetLMDBFileExtensions(bool db_is_protein, vector< string > &extn)
Retrieves file extensions for BLAST LMDB files.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
void SeqDB_GetFileExtensions(bool db_is_protein, vector< string > &extensions, EBlastDbVersion dbver=eBDB_Version4)
Retrieves a list of all supported file extensions for BLAST databases.
EBlastDbVersion
BLAST database version.
This file defines several SeqDB utility functions related to byte order and file system portability.
void SeqDB_CombineAndQuote(const vector< string > &dbs, string &dbname)
Combine and quote list of database names.
const U & SeqDB_MapFind(const std::map< T, U > &m, const T &k, const U &dflt)
Find a map value or return a default.
T SeqDB_GetStdOrd(const T *stdord_obj)
Read a network order integer value.
The top level of the private implementation layer for SeqDB.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
List of sequence offset ranges.
Structure to define basic information to initialize a BLAST DB.
OID-Range type to simplify interfaces.