1 #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
2 #define OBJTOOLS_BLAST_SEQDB_READER___SEQDB__HPP
120 inline void x_GetSeq();
123 inline void x_RetSeq();
180 static string ESeqType2String(ESeqType
type);
254 if (_size + 1 > _capacity) {
272 bool empty()
const {
return _size == 0; }
287 if (num_elements > _capacity) {
289 (
value_type*) realloc(_data, (num_elements + 1) *
291 if ( !reallocation ) {
292 string msg(
"Failed to allocate ");
296 _data = (
TSeqPos*) reallocation;
297 _capacity = num_elements;
303 reserve(_size + num_elements);
304 memcpy(&_data[1+ 2*_size], src, num_elements *
sizeof(
value_type));
305 _size += num_elements;
310 x_reallocate_if_necessary();
339 bool use_atlas_lock =
true);
422 CSeqDB(
const vector<string> & dbs,
502 static string GenerateSearchPath();
512 TGi GetSeqGI(
int oid)
const;
521 int GetSeqLengthApprox(
int oid)
const;
572 vector<TTaxId> & taxids,
591 void GetTaxIDs(
int oid,
593 bool persist =
false)
const;
611 void GetTaxIDs(
int oid,
612 vector<TTaxId> & taxids,
613 bool persist =
false)
const;
622 void GetAllTaxIDs(
int oid,
668 ExtractBlastDefline(
const CBioseq & bioseq);
708 int GetAmbigSeq(
int oid,
const char **
buffer,
int nucl_code)
const;
729 int GetAmbigSeq(
int oid,
733 int end_offset)
const;
761 int GetAmbigSeqAlloc(
int oid,
767 int GetAmbigPartialSeq(
int oid,
787 void RetSequence(
const char **
buffer)
const;
802 void RetAmbigSeq(
const char **
buffer)
const;
813 list< CRef<CSeq_id> > GetSeqIDs(
int oid)
const;
828 void GetGis(
int oid, vector<TGi> & gis,
bool append =
false)
const;
847 string GetDate()
const;
865 int GetNumSeqs()
const;
870 int GetNumSeqsStats()
const;
873 int GetNumOIDs()
const;
881 Uint8 GetTotalLength()
const;
888 Uint8 GetExactTotalLength();
896 Uint8 GetTotalLengthStats()
const;
905 Uint8 GetVolumeLength()
const;
933 Uint8 * total_length,
934 bool use_approx =
true)
const;
940 int GetMaxLength()
const;
946 int GetMinLength()
const;
963 bool CheckOrFindOID(
int & next_oid)
const;
998 GetNextOIDChunk(
int & begin_chunk,
1001 vector<int> & oid_list,
1002 int * oid_state =
NULL);
1007 void ResetInternalChunkBookmark();
1014 const string & GetDBNameList()
const;
1041 bool PigToOid(
int pig,
int & oid)
const;
1044 bool OidToPig(
int oid,
int & pig)
const;
1047 bool TiToOid(
Int8 ti,
int & oid)
const;
1050 bool OidToGi(
int oid,
TGi & gi)
const;
1053 bool GiToOid(
TGi gi,
int & oid)
const;
1056 bool GiToOidwFilterCheck(
TGi gi,
int & oid)
const;
1059 bool GiToPig(
TGi gi,
int & pig)
const;
1062 bool PigToGi(
int pig,
TGi & gi)
const;
1065 void AccessionToOids(
const string & acc, vector<int> & oids)
const;
1067 void AccessionsToOids(
const vector<string>& accs, vector<blastdb::TOid>& oids)
const;
1070 void SeqidToOids(
const CSeq_id & seqid, vector<int> & oids)
const;
1073 bool SeqidToOid(
const CSeq_id & seqid,
int & oid)
const;
1090 int GetOidAtOffset(
int first_seq,
Uint8 residue)
const;
1149 FindVolumePaths(
const string &
dbname,
1151 vector<string> & paths,
1152 vector<string> * alias_paths =
NULL,
1153 bool recursive =
true,
1154 bool expand_links =
true);
1165 void FindVolumePaths(vector<string> & paths,
bool recursive=
true)
const;
1179 void SetIterationRange(
int oid_begin,
int oid_end);
1253 void GetSequenceAsString(
int oid,
1270 void GetSequenceAsString(
int oid,
1275 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1276 (!defined(NCBI_COMPILER_MIPSPRO)) )
1285 void ListColumns(vector<string> & titles);
1296 int GetColumnId(
const string & title);
1322 const string & GetColumnValue(
int column_id,
const string &
key);
1336 GetColumnMetaData(
int column_id,
1337 const string & volname);
1343 void GetColumnBlob(
int col_id,
int oid,
CBlastDbBlob & blob);
1359 void GetAvailableMaskAlgorithms(vector<int> & algorithms);
1363 int GetMaskAlgorithmId(
const string &algo_name)
const;
1367 string GetAvailableMaskAlgorithmDescriptions();
1371 vector<int> ValidateMaskAlgorithms(
const vector<int>& algorithm_ids);
1387 void GetMaskAlgorithmDetails(
int algorithm_id,
1389 string & program_name,
1390 string & algo_opts);
1392 void GetMaskAlgorithmDetails(
int algorithm_id,
1394 string & program_name,
1395 string & algo_opts);
1409 const vector<int> & algo_ids,
1412 GetMaskData(oid, algo_ids[0], ranges);
1424 void GetMaskData(
int oid,
1426 TSequenceRanges &ranges);
1466 void SetOffsetRanges(
int oid,
1473 void RemoveOffsetRanges(
int oid);
1476 void FlushOffsetRangeCache();
1487 void SetNumberOfThreads(
int num_threads,
bool force_mt =
false);
1490 Int8 GetDiskUsage()
const;
1493 void SetVolsMemBit(
int mbit);
1505 void TaxIdsToOids(
set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv)
const;
1511 void GetTaxIdsForOids(
const vector<blastdb::TOid> & oids,
set<TTaxId> & tax_ids)
const;
1519 void GetTaxIdsForAccession(
const string & accs, vector<TTaxId> & taxids);
1525 void GetTaxIdsForSeqId(
const CSeq_id & seq_id, vector<TTaxId> & taxids);
1537 void x_GetDBFilesMetaData(
Int8 & disk_bytes,
Int8 & cached_bytes, vector<string> & db_files,
const string & user_path)
const;
1578 vector<SSeqDBInitInfo>
1579 FindBlastDBs(
const string& path,
const string& dbtype,
bool recurse,
1580 bool include_alias_files =
false,
1581 bool remove_redundant_dbs =
false);
1607 m_Length = m_DB->GetSequence(oid, & m_Data);
1614 m_DB->RetSequence(& m_Data);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
const size_t kResizeFactor
factor by which these arrays are resized
@ eOidRange
Data is a range of contiguous ordinal ids (indices)
`Blob' Class for SeqDB (and WriteDB).
SeqDB ID list for performing boolean set operations.
virtual ~CSeqDBIter()
Destructor.
int m_Length
The length of this OID.
const CSeqDB * m_DB
The CSeqDB object which this object iterates over.
void x_RetSeq()
Release hold on current sequence.
const char * m_Data
The sequence data for this OID.
int GetLength()
Get the length (in base pairs) of the currently held sequence.
int m_OID
The OID this iterator is currently accessing.
void x_GetSeq()
Get data pointer and length for the current sequence.
const char * GetData()
Get the sequence data for the currently held sequence.
int GetOID()
Get the OID of the currently held sequence.
DECLARE_OPERATOR_BOOL(m_Length !=-1)
Returns true if the iterator points to a valid sequence.
CSeqDBSequence(const CSeqDBSequence &)
Prevent copy construct.
const char * GetData()
Get pointer to sequence data.
CRef< CSeqDB > m_DB
The CSeqDB object this sequence is from.
~CSeqDBSequence()
Destructor, returns the sequence.
const char * m_Data
The sequence data for this sequence.
CSeqDBSequence & operator=(const CSeqDBSequence &)
Prevent copy.
int m_Length
The length of this sequence.
CSeqDB::TOID TOID
Defines the type used to select which sequence to get.
CSeqDBSequence(CSeqDB *db, int oid)
Get a hold a database sequence.
int GetLength()
Get sequence length.
int TOID
Sequence type accepted and returned for OID indices.
int TPIG
Sequence type accepted and returned for PIG indices.
EOidListType
Indicates how block of OIDs was returned.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
ESummaryType
Types of summary information available.
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
@ eFilteredAll
Values from alias files, or summation over all included sequences.
static const string kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
static const char * kBlastDbDateFormat
Format string for the date returned by CSeqDB::GetDate.
class CSeqDBImpl * m_Impl
Implementation details are hidden. (See seqdbimpl.hpp).
TGi TGI
Sequence type accepted and returned for GI indices.
EMmapStrategies
Permitted mmap strategies.
@ eMmap_Normal
Normal, no special behavior (should undo next two options).
@ eMmap_Sequential
Expect sequential page references.
TSeqDBAliasFileValues TAliasFileValues
Import type to allow shorter name.
EMmapFileTypes
File type for which mmap strategy may be set.
@ eMmap_IndexFile
Index files (name ends with ".pin" or ".nin").
set< pair< int, int > > TRangeList
List of sequence offset ranges.
void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)
Get masked ranges of a sequence.
static unsigned char depth[2 *(256+1+29)+1]
int GetSeqLength(const CBioseq &bioseq)
static void DLIST_NAME() append(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static SQLCHAR output[256]
static const char * str(char *buf, int n)
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
unsigned int TSeqPos
Type for sequence locations and lengths.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
#define NCBI_XOBJREAD_EXPORT
strategy
Block allocation strategies.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
CBioseq_Info & GetBioseq(CTSE_Info &tse, const CBioObjectId &id)
range(_Ty, _Ty) -> range< _Ty >
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::KEY key
USING_SCOPE(objects)
Include definitions from the objects namespace.
CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)
Convert a string to a CSeqDB ESeqType object.
vector< SSeqDBInitInfo > FindBlastDBs(const string &path, const string &dbtype, bool recurse, bool include_alias_files=false, bool remove_redundant_dbs=false)
Find BLAST DBs in the directory specified.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
Defines BlastDb `Blob' class for SeqDB and WriteDB.
Defines exception class and several constants for SeqDB.
ESeqDBAllocType
Certain methods have an "Alloc" version.
EBlastDbVersion
BLAST database version.
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to represent a range.
TOffsetPair()
Default constructor.
List of sequence offset ranges.
void append(const void *src, size_type num_elements)
Append extra elements at the end.
const_iterator end() const
value_type & operator[](size_type i)
const value_type * const_iterator
const_iterator begin() const
void reserve(size_t num_elements)
Reserves capacity for at least num_elements elements.
void push_back(const value_type &element)
Append extra element at the end.
void x_reallocate_if_necessary()
value_type * get_data() const
Structure to define basic information to initialize a BLAST DB.
string m_BlastDbName
The BLAST DB name.
bool operator<(const SSeqDBInitInfo &rhs) const
operator less to support sorting
CRef< CSeqDB > InitSeqDb() const
Create a new CSeqDB instance from this object.
CSeqDB::ESeqType m_MoleculeType
The molecule type.
SSeqDBInitInfo()
Default constructor.