NCBI C++ ToolKit
Classes | Typedefs | Enumerations | Functions | Variables
seqdbcommon.hpp File Reference

Defines exception class and several constants for SeqDB. More...

#include <ncbiconf.h>
#include <corelib/ncbiobj.hpp>
#include <objects/seqloc/Seq_id.hpp>
+ Include dependency graph for seqdbcommon.hpp:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Go to the SVN repository for this file.

Classes

class  CSeqDBException
 CSeqDBException. More...
 
struct  SBlastSeqIdListInfo
 Blast DB v5 seqid list info. More...
 
class  CSeqDBGiList
 CSeqDBGiList. More...
 
struct  CSeqDBGiList::SGiOid
 Structure that holds GI,OID pairs. More...
 
struct  CSeqDBGiList::STiOid
 Structure that holds TI,OID pairs. More...
 
struct  CSeqDBGiList::SSiOid
 Structure that holds Seq-id,OID pairs. More...
 
struct  CSeqDBGiList::STaxIdsOids
 
struct  CSeqDBGiList::SPigOid
 
class  CSeqDBBitVector
 CSeqDBBitVector. More...
 
class  CSeqDBNegativeList
 CSeqDBNegativeList. More...
 
class  CSeqDBFileGiList
 CSeqDBFileGiList. More...
 
class  CIntersectionGiList
 GI list containing the intersection of two other lists of GIs. More...
 
class  CSeqDBIdSet_Vector
 Helper class to allow copy-on-write semantics for CSeqDBIdSet. More...
 
class  CSeqDBIdSet
 SeqDB ID list for performing boolean set operations. More...
 
struct  SSeqDBTaxInfo
 SSeqDBTaxInfo. More...
 

Typedefs

typedef Int4 TOid
 Ordinal ID in BLAST databases. More...
 
typedef Uint8 TTi
 
typedef Uint4 TPig
 
typedef map< string, stringTSeqDBAliasFileInstance
 Set of values found in one instance of one alias file. More...
 
typedef vector< TSeqDBAliasFileInstanceTSeqDBAliasFileVersions
 Contents of all instances of a particular alias file pathname. More...
 
typedef map< string, TSeqDBAliasFileVersionsTSeqDBAliasFileValues
 Contents of all alias file are returned in this type of container. More...
 

Enumerations

enum  EBlastDbVersion { eBDB_Version4 = 4 , eBDB_Version5 = 5 }
 BLAST database version. More...
 
enum  ESeqDBAllocType { eAtlas = 0 , eMalloc , eNew }
 Certain methods have an "Alloc" version. More...
 
enum  EOidMaskType { fNone = 0x0 , fExcludeModel = 0x01 }
 
enum  ESeqDBIdType {
  eGiId , eTiId , ePigId , eStringId ,
  eHashId , eOID
}
 Various identifier formats used in Id lookup. More...
 

Functions

 USING_SCOPE (objects)
 Include definitions from the objects namespace. More...
 
void SeqDB_ReadBinaryGiList (const string &name, vector< TGi > &gis)
 Read a binary-format GI list from a file. More...
 
void SeqDB_ReadMemoryGiList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order=0)
 Read a text or binary GI list from an area of memory. More...
 
void SeqDB_ReadMemoryTiList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::STiOid > &tis, bool *in_order=0)
 Read a text or binary TI list from an area of memory. More...
 
void SeqDB_ReadMemorySiList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order=0)
 Read a text SeqID list from an area of memory. More...
 
void SeqDB_ReadMemoryMixList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, vector< CSeqDBGiList::STiOid > &tis, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order)
 Read an ID list (mixed type) from an area of memory. More...
 
void SeqDB_ReadMemoryPigList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order=0)
 
void SeqDB_CombineAndQuote (const vector< string > &dbs, string &dbname)
 Combine and quote a list of database names. More...
 
void SeqDB_SplitQuoted (const string &dbname, vector< CTempString > &dbs, bool keep_quote=false)
 Split a (possibly) quoted list of database names into pieces. More...
 
void SeqDB_ReadGiList (const string &fname, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order=0)
 Read a text or binary GI list from a file. More...
 
void SeqDB_ReadTiList (const string &fname, vector< CSeqDBGiList::STiOid > &tis, bool *in_order=0)
 Read a text or binary TI list from a file. More...
 
void SeqDB_ReadSiList (const string &fname, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order, SBlastSeqIdListInfo &db_info)
 Read a text SeqId list from a file. More...
 
void SeqDB_ReadMixList (const string &fname, vector< CSeqDBGiList::SGiOid > &gis, vector< CSeqDBGiList::STiOid > &tis, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order)
 Read a text SeqId list from a file. More...
 
void SeqDB_ReadPigList (const string &fname, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order=0)
 
void SeqDB_ReadGiList (const string &fname, vector< TGi > &gis, bool *in_order=0)
 Read a text or binary GI list from a file. More...
 
bool SeqDB_IsBinaryGiList (const string &fname)
 Read a text or binary SeqId list from a file. More...
 
bool SeqDB_IsBinaryTiList (const string &fname)
 Returns true if the file name passed contains a binary TI list. More...
 
string SeqDB_ResolveDbPath (const string &filename)
 Resolve a file path using SeqDB's path algorithms. More...
 
string SeqDB_ResolveDbPathNoExtension (const string &filename, char dbtype='-')
 Resolve a file path using SeqDB's path algorithms. More...
 
string SeqDB_ResolveDbPathForLinkoutDB (const string &filename)
 Resolve a file path using SeqDB's path algorithms. More...
 
bool SeqDB_CompareVolume (const string &volpath1, const string &volpath2)
 Compares two volume file names and determine the volume order. More...
 
unsigned SeqDB_SequenceHash (const char *sequence, int length)
 Returns a path minus filename. More...
 
unsigned SeqDB_SequenceHash (const CBioseq &sequence)
 Sequence Hashing For a CBioseq. More...
 
ESeqDBIdType SeqDB_SimplifySeqid (CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler)
 Seq-id simplification. More...
 
ESeqDBIdType SeqDB_SimplifyAccession (const string &acc, Int8 &num_id, string &str_id, bool &simpler)
 String id simplification. More...
 
const string SeqDB_SimplifyAccession (const string &acc)
 String id simplification. More...
 
void SeqDB_GetFileExtensions (bool db_is_protein, vector< string > &extensions, EBlastDbVersion dbver=eBDB_Version4)
 Retrieves a list of all supported file extensions for BLAST databases. More...
 
void SeqDB_GetLMDBFileExtensions (bool db_is_protein, vector< string > &extn)
 Retrieves file extensions for BLAST LMDB files. More...
 
void SeqDB_GetMetadataFileExtension (bool db_is_protein, string &extn)
 
bool IsStringId (const CSeq_id &id)
 Determine if id is srting id. More...
 
string GetBlastSeqIdString (const CSeq_id &seqid, bool version)
 Return ID string as stored in lmdb. More...
 
const string SeqDB_GetOidMaskFileExt (bool db_is_protein, EOidMaskType t)
 

Variables

const string kSeqDBGroupAliasFileName
 The name of the group alias file name expected at each directory For more documentation, see "Group Alias Files" in source/src/objtools/blast/seqdb_reader/alias_files.txt. More...
 
const int kSeqDBNuclNcbiNA8 = 0
 Used to request ambiguities in Ncbi/NA8 format. More...
 
const int kSeqDBNuclBlastNA8 = 1
 Used to request ambiguities in BLAST/NA8 format. More...
 
const blastdb::TOid kSeqDBEntryNotFound = -1
 
const blastdb::TOid kSeqDBEntryDuplicate = -2
 

Detailed Description

Defines exception class and several constants for SeqDB.

Defines classes: CSeqDBException

Implemented for: UNIX, MS-Windows

Definition in file seqdbcommon.hpp.

Typedef Documentation

◆ TOid

typedef Int4 TOid

Ordinal ID in BLAST databases.

Definition at line 58 of file seqdbcommon.hpp.

◆ TPig

typedef Uint4 TPig

Definition at line 142 of file seqdbcommon.hpp.

◆ TSeqDBAliasFileInstance

Set of values found in one instance of one alias file.

Definition at line 1838 of file seqdbcommon.hpp.

◆ TSeqDBAliasFileValues

Contents of all alias file are returned in this type of container.

Definition at line 1844 of file seqdbcommon.hpp.

◆ TSeqDBAliasFileVersions

Contents of all instances of a particular alias file pathname.

Definition at line 1841 of file seqdbcommon.hpp.

◆ TTi

typedef Uint8 TTi

Definition at line 140 of file seqdbcommon.hpp.

Enumeration Type Documentation

◆ EBlastDbVersion

BLAST database version.

Enumerator
eBDB_Version4 
eBDB_Version5 

Definition at line 51 of file seqdbcommon.hpp.

◆ EOidMaskType

Enumerator
fNone 
fExcludeModel 

Definition at line 159 of file seqdbcommon.hpp.

◆ ESeqDBAllocType

Certain methods have an "Alloc" version.

When these methods are used, the following constants can be specified to indicate which libraries to use to allocate returned data, so the corresponding calls (delete[] vs. free()) can be used to delete the data.

Enumerator
eAtlas 
eMalloc 
eNew 

Definition at line 133 of file seqdbcommon.hpp.

◆ ESeqDBIdType

Various identifier formats used in Id lookup.

Enumerator
eGiId 
eTiId 

Genomic ID is a relatively stable numeric identifier for sequences.

ePigId 

Trace ID is a numeric identifier for Trace sequences.

eStringId 

Each PIG identifier refers to exactly one protein sequence.

eHashId 

Some sequence sources uses string identifiers.

eOID 

Lookup from sequence hash values to OIDs.

The ordinal id indicates the order of the data in the volume's index file.

Definition at line 1963 of file seqdbcommon.hpp.

Function Documentation

◆ GetBlastSeqIdString()

string GetBlastSeqIdString ( const CSeq_id seqid,
bool  version 
)

◆ IsStringId()

bool IsStringId ( const CSeq_id id)

Determine if id is srting id.

Parameters
idinput id for check
Returns
Return true if id is not of type gi, ti or pig

Definition at line 2678 of file seqdbcommon.cpp.

References CDbtag_Base::CanGetDb(), CSeq_id_Base::e_General, CSeq_id_Base::e_Gi, and CDbtag_Base::GetDb().

Referenced by CSeqDBImpl::SeqidToOids(), and CBlastDBExtractor::SetSeqId().

◆ SeqDB_CombineAndQuote()

void SeqDB_CombineAndQuote ( const vector< string > &  dbs,
string dbname 
)

Combine and quote a list of database names.

SeqDB permits multiple databases to be opened by a single CSeqDB instance, by passing the database names as a space-delimited list to the CSeqDB constructor. To support paths and filenames with embedded spaces, surround any space-containing names with double quotes ('"'). Filenames not containing spaces may be quoted safely with no effect. (This solution prevents the use of names containing embedded double quotes.)

This method combines a list of database names into a string encoded in this way.

Parameters
dbnameCombined database name.
dbsDatabase names to combine.

Combine and quote a list of database names.

Parameters
dbsDatabase names to combine.
dbnameCombined database name.

Definition at line 1717 of file seqdbcommon.cpp.

References dbname(), i, int, and ncbi::grid::netcache::search::fields::size.

◆ SeqDB_CompareVolume()

bool SeqDB_CompareVolume ( const string volpath1,
const string volpath2 
)

Compares two volume file names and determine the volume order.

Parameters
volpath1The 1st volume path
volpath2The 2nd volume path
Returns
true if vol1 should appear before vol2

Definition at line 190 of file seqdbcommon.cpp.

References CSeqDB_Path::FindBaseName(), and CSeqDB_Substring::GetString().

Referenced by CIndexedDb_Old::CIndexedDb_Old(), and CSeqDBAliasNode::FindVolumePaths().

◆ SeqDB_GetFileExtensions()

void SeqDB_GetFileExtensions ( bool  db_is_protein,
vector< string > &  extensions,
EBlastDbVersion  dbver = eBDB_Version4 
)

Retrieves a list of all supported file extensions for BLAST databases.

Parameters
db_is_proteinset to true if the database is protein else false [in]
extensionswhere the return value will be stored [in|out]
dbverBLASTDB version to use [in]

Definition at line 2620 of file seqdbcommon.cpp.

References eBDB_Version4, eBDB_Version5, and SeqDB_GetLMDBFileExtensions().

Referenced by DeleteBlastDb(), CSeqDB::GetDiskUsage(), CBlastdbConvertApp::Run(), and CSeqDB::x_GetDBFilesMetaData().

◆ SeqDB_GetLMDBFileExtensions()

void SeqDB_GetLMDBFileExtensions ( bool  db_is_protein,
vector< string > &  extn 
)

Retrieves file extensions for BLAST LMDB files.

Parameters
db_is_proteinset to true if the database is protein else false [in]
extensionswhere the return value will be stored [in|out]

Definition at line 2659 of file seqdbcommon.cpp.

References NULL.

Referenced by DeleteBlastDb(), DeleteLMDBFiles(), and SeqDB_GetFileExtensions().

◆ SeqDB_GetMetadataFileExtension()

void SeqDB_GetMetadataFileExtension ( bool  db_is_protein,
string extn 
)

◆ SeqDB_GetOidMaskFileExt()

const string SeqDB_GetOidMaskFileExt ( bool  db_is_protein,
EOidMaskType  t 
)

Definition at line 2706 of file seqdbcommon.cpp.

References fExcludeModel, NCBI_THROW, and t.

Referenced by CWriteDB_CreateOidMaskDB(), and CSeqDBOIDList::x_ComputeFilters().

◆ SeqDB_IsBinaryGiList()

bool SeqDB_IsBinaryGiList ( const string fname)

Read a text or binary SeqId list from a file.

The SeqIds in a file are read into the provided vector<string>. If the in_order parameter is not null, the function will test the SeqIds for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the SeqId list file. [in]
sisThe SeqIds returned by this function. [out]
in_orderIf non-null, returns true iff the SeqIds were in order. [out]

void SeqDB_ReadSeqIdList(const string & fname, vector<string> & sis, bool * in_order = 0); Returns true if the file name passed contains a binary gi list

Parameters
fnameThe name of the GI list file. [in]
Exceptions
CSeqDBExceptionif file is invalid or empty

Definition at line 1400 of file seqdbcommon.cpp.

References CSeqDBFileGiList::eGiList, and s_ContainsBinaryNumericIdList().

Referenced by BOOST_AUTO_TEST_CASE(), and CBlastDBAliasApp::CreateAliasFile().

◆ SeqDB_IsBinaryTiList()

bool SeqDB_IsBinaryTiList ( const string fname)

Returns true if the file name passed contains a binary TI list.

Parameters
fnameThe name of the TI list file. [in]
Exceptions
CSeqDBExceptionif file is invalid or empty

Definition at line 1395 of file seqdbcommon.cpp.

References CSeqDBFileGiList::eTiList, and s_ContainsBinaryNumericIdList().

Referenced by CBlastDBAliasApp::CreateAliasFile().

◆ SeqDB_ReadBinaryGiList()

void SeqDB_ReadBinaryGiList ( const string name,
vector< TGi > &  gis 
)

Read a binary-format GI list from a file.

Parameters
nameThe name of the file containing GIs. [in]
gisThe GIs returned by this function. [out]

Definition at line 819 of file seqdbcommon.cpp.

References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), GI_FROM, NCBI_THROW, SeqDB_GetStdOrd(), and SeqDB_MakeOSPath().

◆ SeqDB_ReadGiList() [1/2]

void SeqDB_ReadGiList ( const string fname,
vector< CSeqDBGiList::SGiOid > &  gis,
bool in_order = 0 
)

Read a text or binary GI list from a file.

The GIs in a file are read into the provided SGiOid vector. The GI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the GI list file. [in]
gisThe GIs returned by this function. [out]
in_orderIf non-null, returns true iff the GIs were in order. [out]

Definition at line 1405 of file seqdbcommon.cpp.

References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemoryGiList().

Referenced by CSeqDBFileGiList::CSeqDBFileGiList(), and SeqDB_ReadGiList().

◆ SeqDB_ReadGiList() [2/2]

void SeqDB_ReadGiList ( const string fname,
vector< TGi > &  gis,
bool in_order = 0 
)

Read a text or binary GI list from a file.

The GIs in a file are read into the provided vector<int>. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the GI list file. [in]
gisThe GIs returned by this function. [out]
in_orderIf non-null, returns true iff the GIs were in order. [out]

Definition at line 1462 of file seqdbcommon.cpp.

References ITERATE, and SeqDB_ReadGiList().

◆ SeqDB_ReadMemoryGiList()

void SeqDB_ReadMemoryGiList ( const char *  fbeginp,
const char *  fendp,
vector< CSeqDBGiList::SGiOid > &  gis,
bool in_order = 0 
)

Read a text or binary GI list from an area of memory.

The GIs in a memory region are read into the provided SGiOid vector. The GI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fbeginpThe start of the memory region holding the GI list. [in]
fendpThe end of the memory region holding the GI list. [in]
gisThe GIs returned by this function. [out]
in_orderIf non-null, returns true iff the GIs were in order. [out]

Definition at line 925 of file seqdbcommon.cpp.

References _ASSERT, GI_FROM, NCBI_THROW, s_ReadDigit(), s_SeqDB_IsBinaryNumericList(), SeqDB_GetStdOrd(), and ZERO_GI.

Referenced by CSeqDBNodeFileIdList::CSeqDBNodeFileIdList(), and SeqDB_ReadGiList().

◆ SeqDB_ReadMemoryMixList()

void SeqDB_ReadMemoryMixList ( const char *  fbeginp,
const char *  fendp,
vector< CSeqDBGiList::SGiOid > &  gis,
vector< CSeqDBGiList::STiOid > &  tis,
vector< CSeqDBGiList::SSiOid > &  sis,
bool in_order 
)

Read an ID list (mixed type) from an area of memory.

The Seq ids in a memory region are read into the provided SSeqIdOid vector. The gi, ti or seqid half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the SeqIds for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fbeginpThe start of the memory region holding the SeqId list. [in]
fendpThe end of the memory region holding the SeqId list. [in]
gisThe gis returned by this function. [out]
tisThe tis returned by this function. [out]
sisThe seqids returned by this function. [out]
in_orderIf non-null, returns true iff the seqids were in order. [out]

Definition at line 1324 of file seqdbcommon.cpp.

References eGiId, eStringId, eTiId, GI_FROM, head, SeqDB_SimplifyAccession(), and NStr::ToLower().

Referenced by SeqDB_ReadMixList().

◆ SeqDB_ReadMemoryPigList()

void SeqDB_ReadMemoryPigList ( const char *  fbeginp,
const char *  fendp,
vector< CSeqDBGiList::SPigOid > &  pigs,
bool in_order = 0 
)

◆ SeqDB_ReadMemorySiList()

void SeqDB_ReadMemorySiList ( const char *  fbeginp,
const char *  fendp,
vector< CSeqDBGiList::SSiOid > &  sis,
bool in_order = 0 
)

Read a text SeqID list from an area of memory.

The Seqids in a memory region are read into the provided SSeqIdOid vector. The SeqId half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the SeqIds for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fbeginpThe start of the memory region holding the SeqId list. [in]
fendpThe end of the memory region holding the SeqId list. [in]
seqidsThe SeqId returned by this function. [out]
in_orderIf non-null, returns true iff the seqids were in order. [out]

Definition at line 1284 of file seqdbcommon.cpp.

References NStr::eTrunc_Both, head, and NStr::TruncateSpaces().

Referenced by CSeqDBNodeFileIdList::CSeqDBNodeFileIdList(), and SeqDB_ReadSiList().

◆ SeqDB_ReadMemoryTiList()

void SeqDB_ReadMemoryTiList ( const char *  fbeginp,
const char *  fendp,
vector< CSeqDBGiList::STiOid > &  tis,
bool in_order = 0 
)

Read a text or binary TI list from an area of memory.

The TIs in a memory region are read into the provided STiOid vector. The TI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the TIs for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fbeginpThe start of the memory region holding the TI list. [in]
fendpThe end of the memory region holding the TI list. [in]
tisThe TIs returned by this function. [out]
in_orderIf non-null, returns true iff the TIs were in order. [out]

Definition at line 1149 of file seqdbcommon.cpp.

References int, NCBI_THROW, s_ReadDigit(), s_SeqDB_IsBinaryNumericList(), and SeqDB_GetStdOrd().

Referenced by CSeqDBNodeFileIdList::CSeqDBNodeFileIdList(), and SeqDB_ReadTiList().

◆ SeqDB_ReadMixList()

void SeqDB_ReadMixList ( const string fname,
vector< CSeqDBGiList::SGiOid > &  gis,
vector< CSeqDBGiList::STiOid > &  tis,
vector< CSeqDBGiList::SSiOid > &  sis,
bool in_order 
)

Read a text SeqId list from a file.

The Seqids in a file are read into the provided SSeqIdOid vector. The Gi/Ti/Si half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the SeqIds for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the SeqId list file. [in]
tisThe TIs returned by this function. [out]
sisThe SeqIds returned by this function. [out]
in_orderIf non-null, returns true iff the SeqIds were in order. [out]

Definition at line 1428 of file seqdbcommon.cpp.

References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemoryMixList().

Referenced by CSeqDBFileGiList::CSeqDBFileGiList().

◆ SeqDB_ReadPigList()

void SeqDB_ReadPigList ( const string fname,
vector< CSeqDBGiList::SPigOid > &  pigs,
bool in_order = 0 
)

◆ SeqDB_ReadSiList()

void SeqDB_ReadSiList ( const string fname,
vector< CSeqDBGiList::SSiOid > &  sis,
bool in_order,
SBlastSeqIdListInfo db_info 
)

Read a text SeqId list from a file.

The Seqids in a file are read into the provided SSeqIdOid vector. The SeqId half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the SeqIds for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the SeqId list file. [in]
gisThe GIs returned by this function. [out]
sisThe SeqIds returned by this function. [out]
in_orderIf non-null, returns true iff the SeqIds were in order. [out]

Definition at line 1476 of file seqdbcommon.cpp.

References CMemoryFile::GetPtr(), CBlastSeqidlistFile::GetSeqidlist(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemorySiList().

Referenced by CSeqDBFileGiList::CSeqDBFileGiList().

◆ SeqDB_ReadTiList()

void SeqDB_ReadTiList ( const string fname,
vector< CSeqDBGiList::STiOid > &  tis,
bool in_order = 0 
)

Read a text or binary TI list from a file.

The TIs in a file are read into the provided STiOid vector. The TI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the TIs for orderedness. It will set the bool to which in_order points to true if so, false if not.

Parameters
fnameThe name of the TI list file. [in]
tisThe TIs returned by this function. [out]
in_orderIf non-null, returns true iff the TIs were in order. [out]

Definition at line 1417 of file seqdbcommon.cpp.

References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemoryTiList().

Referenced by CSeqDBFileGiList::CSeqDBFileGiList().

◆ SeqDB_ResolveDbPath()

string SeqDB_ResolveDbPath ( const string filename)

Resolve a file path using SeqDB's path algorithms.

This finds a file using the same algorithm used by SeqDB to find blast database filenames. The filename must include the extension if any. Paths which start with '/', '\', or a drive letter (depending on operating system) will be treated as absolute paths. If the file is not found an empty string will be returned.

Parameters
filenameName of file to find.
Returns
Resolved path or empty string if not found.

Definition at line 453 of file seqdbcommon.cpp.

References s_SeqDB_FindBlastDBPath().

Referenced by CIndexedDb_New::AddIndexInfo(), BOOST_AUTO_TEST_CASE(), CIgAnnotationInfo::CIgAnnotationInfo(), CIndexedDb_Old::CIndexedDb_Old(), CTaxDBFileInfo::CTaxDBFileInfo(), CTaxonomy4BlastSQLite::CTaxonomy4BlastSQLite(), CBlastDatabaseArgs::ExtractAlgorithmOptions(), CIgBlastArgs::ExtractAlgorithmOptions(), CBlastSeqidlistFile::GetSeqidlistInfo(), s_GetTaxIDList(), CBlastTabularInfo::x_CheckTaxDB(), CCmdLineBlastXML2ReportData::x_InitCommon(), CSeqDBGiMask::x_Open(), CVDBAliasNode::x_ResolveDBList(), and CVDBAliasNode::x_ResolveVDBList().

◆ SeqDB_ResolveDbPathForLinkoutDB()

string SeqDB_ResolveDbPathForLinkoutDB ( const string filename)

Resolve a file path using SeqDB's path algorithms.

Identical to SeqDB_ResolveDbPathNoExtension with the exception that this function searches for ISAM or SQLite files, specifically those storing numeric and string data (for LinkoutDB; i.e.: '.sqlite3'). This is intended to check whether the files used in LinkoutDB exist or not.

Parameters
filenameName of file to find.
Returns
Resolved path or empty string if not found.

Definition at line 472 of file seqdbcommon.cpp.

References CSeqDBAtlas::GenerateSearchPath(), and s_SeqDB_TryPaths().

◆ SeqDB_ResolveDbPathNoExtension()

string SeqDB_ResolveDbPathNoExtension ( const string filename,
char  dbtype = '-' 
)

Resolve a file path using SeqDB's path algorithms.

Identical to SeqDB_ResolveDbPath with the exception that this function does not require the extension to be provided. This is intended to check whether a BLAST DB exists or not.

Parameters
filenameName of file to find.
dbtypeDetermines whether the BLAST DB is protein ('p'), nucleotide ('n'), or whether the algorithm should guess it ('-')
Returns
Resolved path or empty string if not found.

Definition at line 464 of file seqdbcommon.cpp.

References s_SeqDB_FindBlastDBPath().

Referenced by CBlastDBCmdApp::Run(), s_DoesBlastDbExist(), and CVDBAliasNode::x_ResolveDBList().

◆ SeqDB_SequenceHash() [1/2]

unsigned SeqDB_SequenceHash ( const CBioseq bs)

Sequence Hashing For a CBioseq.

This computes a hash of a sequence expressed as a CBioseq.

Parameters
sequenceThe sequence. [in]
Returns
The 32 bit hash value.

Sequence Hashing For a CBioseq.

Parameters
bsThe Bioseq containing the sequence.
Returns
The hash value of the sequence data.

Definition at line 156 of file seqdbobj.cpp.

References SeqDB_ComputeSequenceHash().

◆ SeqDB_SequenceHash() [2/2]

unsigned SeqDB_SequenceHash ( const char *  sequence,
int  length 
)

Returns a path minus filename.

Substring version of the above. This returns the part of a file Sequence Hashing

This computes a hash of a sequence. The sequence is expected to be in either ncbistdaa format (for protein) or ncbi8na format (for nucleotide). These formats are produced by CSeqDB::GetAmbigSeq() if the kSeqDBNuclNcbiNA8 encoding is selected.

Parameters
sequenceA pointer to the sequence data. [in]
lengthThe length of the sequence in bases. [in]
Returns
The 32 bit hash value.

Returns a path minus filename.

Parameters
sequenceA sequence in ncbi8na format.
lengthLength of the sequence in bases (== bytes).
Returns
The hash value of the sequence data.

Definition at line 146 of file seqdbobj.cpp.

References SeqDB_ComputeSequenceHash().

Referenced by BOOST_AUTO_TEST_CASE(), CSeqDBImpl::GetSequenceHash(), and CWriteDB_Impl::x_ComputeHash().

◆ SeqDB_SimplifyAccession() [1/2]

const string SeqDB_SimplifyAccession ( const string acc)

String id simplification.

This simpler version will convert string id to the standard ISAM form, and return "" if the conversion fails.

Parameters
accThe string to look up. [in]
Returns
The resulting converted id.

Definition at line 2610 of file seqdbcommon.cpp.

References eStringId, result, and SeqDB_SimplifyAccession().

◆ SeqDB_SimplifyAccession() [2/2]

ESeqDBIdType SeqDB_SimplifyAccession ( const string acc,
Int8 num_id,
string str_id,
bool simpler 
)

String id simplification.

This routine tries to produce a numerical type from a string identifier. SeqDB can use faster lookup mechanisms if a PIG, GI, or OID type can be recognized in the string, for example. Even when the output is a string, it may be better formed for the purpose of lookup in the string ISAM file.

Parameters
accThe string to look up. [in]
num_idThe returned identifier, if numeric. [out]
str_idThe returned identifier, if a string. [out]
simplerWhether an adjustment was done at all. [out]
Returns
The resulting identifier type.

Definition at line 2535 of file seqdbcommon.cpp.

References CSeq_id::BestRank(), NStr::EqualNocase(), eStringId, NStr::fConvErr_NoThrow, FindBestChoice(), NULL, CSeq_id::ParseFastaIds(), result, s_SeqDB_ParseSeqIDs(), SeqDB_SimplifySeqid(), NStr::SplitInTwo(), and NStr::ToLower().

Referenced by CSeqDBVol::AccessionToOids(), CInputGiList::AppendSi(), BOOST_AUTO_TEST_CASE(), CSeqDBGiList::PreprocessIdsForISAMSiLookup(), CSeqDBNegativeList::PreprocessIdsForISAMSiLookup(), SeqDB_ReadMemoryMixList(), and SeqDB_SimplifyAccession().

◆ SeqDB_SimplifySeqid()

ESeqDBIdType SeqDB_SimplifySeqid ( CSeq_id bestid,
const string acc,
Int8 num_id,
string str_id,
bool simpler 
)

Seq-id simplification.

Given a Seq-id, this routine devolves it to a GI or PIG if possible. If not, it formats the Seq-id into a canonical form for lookup in the string ISAM files. If the Seq-id was parsed from an accession, it can be provided in the "acc" parameter, and it will be used if the Seq-id is not in a form this code can recognize. In the case that new Seq-id types are added, support for which has not been added to this code, this mechanism will try to use the original string.

Parameters
bestidThe Seq-id to look up. [in]
accThe original string the Seq-id was created from (or NULL). [in]
num_idThe returned identifier, if numeric. [out]
str_idThe returned identifier, if a string. [out]
simplerWhether an adjustment was done at all. [out]
Returns
The resulting identifier type.

Definition at line 2264 of file seqdbcommon.cpp.

References CSeq_id::AsFastaString(), CTextseq_id_Base::CanGetAccession(), CDbtag_Base::CanGetDb(), CTextseq_id_Base::CanGetName(), CDbtag_Base::CanGetTag(), CTextseq_id_Base::CanGetVersion(), NStr::CompareNocase(), CSeq_id_Base::e_Ddbj, CSeq_id_Base::e_Embl, CSeq_id_Base::e_Genbank, CSeq_id_Base::e_General, CSeq_id_Base::e_Gi, CSeq_id_Base::e_Gibbsq, CSeq_id_Base::e_Gpipe, CSeq_id_Base::e_Local, CSeq_id_Base::e_Other, CSeq_id_Base::e_Pir, CSeq_id_Base::e_Prf, CSeq_id_Base::e_Swissprot, CSeq_id_Base::e_Tpd, CSeq_id_Base::e_Tpe, CSeq_id_Base::e_Tpg, CSeq_id::eFasta, eGiId, eOID, ePigId, eStringId, eTiId, CSeq_id::fLabel_GeneralDbIsContent, CSeq_id::fLabel_Version, CTextseq_id_Base::GetAccession(), CDbtag_Base::GetDb(), CSeq_id_Base::GetGeneral(), CSeq_id_Base::GetGi(), CSeq_id_Base::GetGibbsq(), CObject_id_Base::GetId(), CSeq_id::GetLabel(), CSeq_id_Base::GetLocal(), CTextseq_id_Base::GetName(), CObject_id_Base::GetStr(), CDbtag_Base::GetTag(), CSeq_id::GetTextseq_Id(), CTextseq_id_Base::GetVersion(), GI_TO, NStr::IntToString(), CObject_id_Base::IsStr(), result, NStr::StringToInt8(), NStr::ToLower(), NStr::UIntToString(), and CSeq_id_Base::Which().

Referenced by CSeqDBGiList::FindId(), CSeqDBNegativeList::FindId(), SeqDB_SimplifyAccession(), CSeqDBVol::SeqidToOids(), and CBlastDB_BioseqFormatter::Write().

◆ SeqDB_SplitQuoted()

void SeqDB_SplitQuoted ( const string dbname,
vector< CTempString > &  dbs,
bool  keep_quote = false 
)

Split a (possibly) quoted list of database names into pieces.

SeqDB permits multiple databases to be opened by a single CSeqDB instance, by passing the database names as a space-delimited list to the CSeqDB constructor. To support paths and filenames with embedded spaces, surround any space-containing names with double quotes ('"'). Filenames not containing spaces may be quoted safely with no effect. (This solution prevents the use of names containing embedded double quotes.)

This method splits a string encoded in this way into individual database names. Note that the resulting vector's objects are CTempString "slice" objects, and are only valid while the original (encoded) string is unchanged.

Parameters
dbnameCombined database name.
dbsDatabase names to combine.

Definition at line 1744 of file seqdbcommon.cpp.

References dbname(), ITERATE, and tmp.

◆ USING_SCOPE()

USING_SCOPE ( objects  )

Include definitions from the objects namespace.

Variable Documentation

◆ kSeqDBEntryDuplicate

const blastdb::TOid kSeqDBEntryDuplicate = -2

Definition at line 126 of file seqdbcommon.hpp.

◆ kSeqDBEntryNotFound

const blastdb::TOid kSeqDBEntryNotFound = -1

◆ kSeqDBGroupAliasFileName

const string kSeqDBGroupAliasFileName
extern

The name of the group alias file name expected at each directory For more documentation, see "Group Alias Files" in source/src/objtools/blast/seqdb_reader/alias_files.txt.

Referenced by CWriteDB_ConsolidateAliasFiles(), and CSeqDBAliasSets::x_DbToIndexName().

◆ kSeqDBNuclBlastNA8

const int kSeqDBNuclBlastNA8 = 1

Used to request ambiguities in BLAST/NA8 format.

Definition at line 123 of file seqdbcommon.hpp.

Referenced by BOOST_AUTO_TEST_CASE(), CSeqDBVol::GetAmbigPartialSeq(), and CSeqDBVol::x_GetAmbigSeq().

◆ kSeqDBNuclNcbiNA8

const int kSeqDBNuclNcbiNA8 = 0
Modified on Fri Sep 20 14:57:40 2024 by modify_doxy.py rev. 669887