NCBI C++ ToolKit
|
Search Toolkit Book for CSeqDBAliasFile
CSeqDBAliasFile class. More...
#include "seqdbalias.hpp"
(Private to src/objtools/blast/seqdb_reader
.)
Public Member Functions | |
CSeqDBAliasFile (CSeqDBAtlas &atlas, const string &name_list, char prot_nucl, bool expand_links=true) | |
Constructor. More... | |
const vector< string > & | GetVolumeNames () const |
Get the list of volume names. More... | |
void | FindVolumePaths (vector< string > &vols, vector< string > *alias, bool recursive) const |
Find the base names of volumes. More... | |
string | GetTitle (const CSeqDBVolSet &volset) const |
Get the title. More... | |
Int4 | GetMinLength (const CSeqDBVolSet &volset) const |
Get the number of sequences available. More... | |
Int8 | GetNumSeqs (const CSeqDBVolSet &volset) const |
Get the number of sequences available. More... | |
Int8 | GetNumSeqsStats (const CSeqDBVolSet &volset) const |
Get the number of sequences available. More... | |
Int8 | GetNumOIDs (const CSeqDBVolSet &volset) const |
Get the size of the OID range. More... | |
Uint8 | GetTotalLength (const CSeqDBVolSet &volset) const |
Get the total length of the set of databases. More... | |
Uint8 | GetTotalLengthStats (const CSeqDBVolSet &volset) const |
Get the total length of the set of databases. More... | |
Uint8 | GetVolumeLength (const CSeqDBVolSet &volset) const |
Get the sum of the volume lengths. More... | |
int | GetMembBit (const CSeqDBVolSet &volset) const |
Get the membership bit. More... | |
bool | NeedTotalsScan (const CSeqDBVolSet &volset) const |
Check whether a db scan is need to compute correct totals. More... | |
bool | HasFilters () |
Check if any volume filtering exists. More... | |
CRef< CSeqDB_FilterTree > | GetFilterTree () |
Get filtering tree for all volumes. More... | |
void | GetAliasFileValues (TAliasFileValues &afv, const CSeqDBVolSet &volset) |
Get Name/Value Data From Alias Files. More... | |
bool | HasGiMask () const |
Is the top node alias file associated with Gi based masks? More... | |
void | GetMaskList (vector< string > &mask_list) |
Get Gi-based Mask Names From Alias Files. More... | |
void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Dump debug information for this object. More... | |
int | GetOidMaskType (const CSeqDBVolSet &volset) const |
Get the Oid Mask Type. More... | |
![]() | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
![]() | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Private Types | |
typedef TSeqDBAliasFileValues | TAliasFileValues |
Import type to allow shorter name. More... | |
Private Member Functions | |
void | x_ComputeMasks () |
Compute filtering options for all volumes. More... | |
CSeqDBAliasFile & | operator= (const CSeqDBAliasFile &) |
Disable copy operator. More... | |
CSeqDBAliasFile (const CSeqDBAliasFile &) | |
Disable copy constructor. More... | |
Private Attributes | |
CSeqDBAliasSets | m_AliasSets |
Combined alias files. More... | |
CRef< CSeqDBAliasNode > | m_Node |
This is the alias node tree's "artificial" topmost node, which aggregates the user provided database names. More... | |
vector< string > | m_VolumeNames |
The cached output of the topmost node's FindVolumePaths(recursive). More... | |
vector< string > | m_AliasNames |
The cached output of the topmost node's FindVolumePaths(recursive). More... | |
bool | m_IsProtein |
True if this is a protein database. More... | |
Int4 | m_MinLength |
Shortest sequence length. More... | |
Int8 | m_NumSeqs |
Number of sequences. More... | |
int | m_NumSeqsStats |
Number of sequences for statistics purposes. More... | |
Int8 | m_NumOIDs |
Number of OIDs. More... | |
Int8 | m_TotalLength |
Total length. More... | |
Int8 | m_TotalLengthStats |
Total length for statistics purposes. More... | |
Int8 | m_VolumeLength |
Total length ignoring filtering. More... | |
int | m_MembBit |
Membership bit. More... | |
bool | m_HasTitle |
True if we have the database title. More... | |
string | m_Title |
Database title. More... | |
int | m_NeedTotalsScan |
1 if we need a totals scan, 0 if not, -1 if not known. More... | |
CRef< CSeqDB_FilterTree > | m_TopTree |
Filter tree representing all alias file filtering. More... | |
bool | m_HasFilters |
Are there filters for this database? More... | |
int | m_OidMaskType |
Oid Mask Type. More... | |
Additional Inherited Members | |
![]() | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
![]() | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
![]() | |
static void | EnableDebugDump (bool on) |
![]() | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
![]() | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
CSeqDBAliasFile class.
This class is an interface to the alias node tree. It provides functionality to classes like CSeqDBImpl (and others) that do not need to understand alias walkers, nodes, and tree traversal.
Definition at line 982 of file seqdbalias.hpp.
|
private |
Import type to allow shorter name.
Definition at line 984 of file seqdbalias.hpp.
CSeqDBAliasFile::CSeqDBAliasFile | ( | CSeqDBAtlas & | atlas, |
const string & | name_list, | ||
char | prot_nucl, | ||
bool | expand_links = true |
||
) |
Constructor.
This builds a tree of CSeqDBAliasNode objects from a space-seperated list of database names. Every database instance has at least one node, because the top most node is an "artificial" node, which serves only to aggregate the list of databases specified to the constructor. The tree is constructed in a depth first manner, and will be complete upon return from this constructor.
atlas | The SeqDB memory management layer. |
name_list | A space seperated list of database names. |
prot_nucl | Indicates whether the database is protein or nucleotide. |
expand_links | Indicates whether the soft links should be expanded |
Definition at line 55 of file seqdbalias.cpp.
References CSeqDBAliasNode::FindVolumePaths(), m_AliasNames, m_AliasSets, m_Node, m_VolumeNames, and CRef< C, Locker >::Reset().
|
private |
Disable copy constructor.
|
virtual |
Dump debug information for this object.
Reimplemented from CObject.
Definition at line 1916 of file seqdbalias.cpp.
References CObject::DebugDump(), depth, i, CDebugDumpContext::Log(), m_AliasNames, m_HasFilters, m_HasTitle, m_IsProtein, m_MembBit, m_MinLength, m_NeedTotalsScan, m_NumOIDs, m_NumSeqs, m_NumSeqsStats, m_Title, m_TotalLength, m_TotalLengthStats, m_VolumeLength, m_VolumeNames, CDebugDumpContext::SetFrame(), and NStr::SizetToString().
|
inline |
Find the base names of volumes.
This method populates the vector with volume names.
vols | The vector to be populated with volume names |
recursive | If true, vol will include all volume names within the alias node tree. Otherwise, only the top-node volume names are included |
Definition at line 1032 of file seqdbalias.hpp.
Referenced by CSeqDBImpl::FindVolumePaths().
void CSeqDBAliasFile::GetAliasFileValues | ( | TAliasFileValues & | afv, |
const CSeqDBVolSet & | volset | ||
) |
Get Name/Value Data From Alias Files.
SeqDB treats each alias file as a map from a variable name to a value. This method will return a map from the basename of the filename of each alias file, to a mapping from variable name to value for each entry in that file. For example, the value of the "DBLIST" entry in the "wgs.nal" file would be values["wgs"]["DBLIST"]. The lines returned have been processed somewhat by SeqDB, including normalizing tabs to whitespace, trimming leading and trailing whitespace, and removal of comments and other non-value lines. Care should be taken when using the values returned by this method. SeqDB uses an internal "virtual" alias file entry to aggregate the values passed into SeqDB by the user. This mapping uses a filename of "-" and contains a single entry mapping "DBLIST" to SeqDB's database name input. This entry is the root of the alias file inclusion tree. Also note that alias files that appear in several places in the alias file inclusion tree only have one entry in the returned map (and are only parsed once by SeqDB).
afv | The alias file values will be returned here. |
volset | The set of database volumes |
Definition at line 1758 of file seqdbalias.cpp.
References CSeqDBAliasNode::CompleteAliasFileValues(), map_checker< Container >::end(), map_checker< Container >::find(), CSeqDBAliasNode::GetAliasFileValues(), CSeqDBVolSet::GetNumVols(), CSeqDBVol::GetTitle(), CSeqDBVolSet::GetVol(), CSeqDBVol::GetVolName(), i, ncbi::grid::netcache::search::fields::key, m_IsProtein, and m_Node.
Referenced by CSeqDBImpl::GetAliasFileValues().
CRef< CSeqDB_FilterTree > CSeqDBAliasFile::GetFilterTree | ( | ) |
Get filtering tree for all volumes.
This method applies the filtering options found in the alias node tree to all associated volumes (iterating over the tree recursively). The virtual OID lists are not built as a result of this process, but the data necessary for virtual OID construction is copied to the volume objects.
Definition at line 1903 of file seqdbalias.cpp.
References CSeqDBAliasNode::BuildFilterTree(), CRef< C, Locker >::Empty(), m_Node, m_TopTree, CRef< C, Locker >::Reset(), and x_ComputeMasks().
Referenced by CSeqDBImpl::x_GetOidList().
|
inline |
Get Gi-based Mask Names From Alias Files.
This will return the MASKLIST field of the top alias node.
mask_list | The mask names will be returned here. |
Definition at line 1250 of file seqdbalias.hpp.
Referenced by CSeqDBImpl::CSeqDBImpl().
int CSeqDBAliasFile::GetMembBit | ( | const CSeqDBVolSet & | volset | ) | const |
Get the membership bit.
This iterates the alias node tree to find the membership bit, if there is one. If more than one alias node provides a membership bit, only one will be used. This value can only be found in alias files (volumes do not have a single internal membership bit).
volset | The set of database volumes |
Definition at line 1788 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetMembBit(), m_MembBit, and m_Node.
Int4 CSeqDBAliasFile::GetMinLength | ( | const CSeqDBVolSet & | volset | ) | const |
Get the number of sequences available.
This iterates this node and possibly subnodes of it to compute the shortest sequence length.
volset | The set of database volumes |
Definition at line 1807 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetMinLength(), m_MinLength, and m_Node.
Referenced by CSeqDBImpl::x_GetMinLength().
Int8 CSeqDBAliasFile::GetNumOIDs | ( | const CSeqDBVolSet & | volset | ) | const |
Get the size of the OID range.
This iterates the alias node tree to compute the number of sequences in all volumes as encountered in traversal. Alias files cannot override this value. Filtering does not affect this value.
volset | The set of database volumes |
Definition at line 1833 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetNumOIDs(), m_Node, and m_NumOIDs.
Referenced by CSeqDBImpl::x_GetNumOIDs().
Int8 CSeqDBAliasFile::GetNumSeqs | ( | const CSeqDBVolSet & | volset | ) | const |
Get the number of sequences available.
This iterates the alias node tree to compute the number of sequences available here. Alias files may override this value (stopping traversal at that depth). It is normally used to provide information on how many OIDs exist after filtering has been applied.
volset | The set of database volumes |
Definition at line 1815 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetNumSeqs(), m_Node, and m_NumSeqs.
Referenced by CSeqDBImpl::x_GetNumSeqs().
Int8 CSeqDBAliasFile::GetNumSeqsStats | ( | const CSeqDBVolSet & | volset | ) | const |
Get the number of sequences available.
This iterates the alias node tree to compute the number of sequences available here. Alias files may override this value (stopping traversal at that depth). It is normally used to provide information on how many OIDs exist after filtering has been applied. This is like GetNumSeqs, but uses STATS_NSEQ.
volset | The set of database volumes |
Definition at line 1824 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetNumSeqsStats(), int, m_Node, and m_NumSeqsStats.
Referenced by CSeqDBImpl::x_GetNumSeqsStats().
int CSeqDBAliasFile::GetOidMaskType | ( | const CSeqDBVolSet & | volset | ) | const |
Get the Oid Mask Type.
This iterates the alias node tree to find the oid mask type, if there is one.
volset | The set of database volumes |
Definition at line 1895 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetOidMaskType(), m_Node, and m_OidMaskType.
string CSeqDBAliasFile::GetTitle | ( | const CSeqDBVolSet & | volset | ) | const |
Get the title.
This iterates the alias node tree to build and return a title string. Alias files may override this value (stopping traversal at that depth).
volset | The set of database volumes |
Definition at line 1799 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetTitle(), m_HasTitle, m_Node, and m_Title.
Referenced by CSeqDBImpl::GetTitle().
Uint8 CSeqDBAliasFile::GetTotalLength | ( | const CSeqDBVolSet & | volset | ) | const |
Get the total length of the set of databases.
This iterates the alias node tree to compute the total length of all sequences in all volumes included in the database. This may count volumes several times (depending on alias tree structure). Alias files can override this value (stopping traversal at that depth). It is normally used to describe the amount of sequence data remaining after filtering has been applied.
volset | The set of database volumes |
Definition at line 1842 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetTotalLength(), m_Node, and m_TotalLength.
Referenced by CSeqDBImpl::x_GetTotalLength().
Uint8 CSeqDBAliasFile::GetTotalLengthStats | ( | const CSeqDBVolSet & | volset | ) | const |
Get the total length of the set of databases.
This iterates the alias node tree to compute the total length of all sequences in all volumes included in the database. This may count volumes several times (depending on alias tree structure). Alias files can override this value (stopping traversal at that depth). It is normally used to describe the amount of sequence data remaining after filtering has been applied. This is like GetTotalLength but uses STATS_TOTLEN.
volset | The set of database volumes |
Definition at line 1851 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetTotalLengthStats(), m_Node, and m_TotalLengthStats.
Referenced by CSeqDBImpl::x_GetTotalLengthStats().
Uint8 CSeqDBAliasFile::GetVolumeLength | ( | const CSeqDBVolSet & | volset | ) | const |
Get the sum of the volume lengths.
This iterates the alias node tree to compute the total length of all sequences in all volumes as encountered in traversal. This may count volumes several times (depending on alias tree structure). Alias files cannot override this value.
volset | The set of database volumes |
Definition at line 1860 of file seqdbalias.cpp.
References CSeqDBAliasNode::GetVolumeLength(), m_Node, and m_VolumeLength.
Get the list of volume names.
This method returns a reference to the vector of volume names. The vector will contain all volume names mentioned in any of the DBLIST lines in the hierarchy of the alias node tree. The volume names do not include an extension (such as .pin or .nin).
Definition at line 1019 of file seqdbalias.hpp.
|
inline |
Check if any volume filtering exists.
This method computes and caches the sequence filtering for this node and any subnodes, and returns true if any filtering exists. Subsequent calls will just return the cached value.
Definition at line 1188 of file seqdbalias.hpp.
Referenced by CSeqDBImpl::CSeqDBImpl().
|
inline |
Is the top node alias file associated with Gi based masks?
This will return true if the MASKLIST field of the top alias node is set.
Definition at line 1239 of file seqdbalias.hpp.
bool CSeqDBAliasFile::NeedTotalsScan | ( | const CSeqDBVolSet & | volset | ) | const |
Check whether a db scan is need to compute correct totals.
This traverses this node and its subnodes to determine whether accurate estimation of the total number of sequences and bases requires a linear time scan of the index files.
volset | The set of database volumes. |
Definition at line 1869 of file seqdbalias.cpp.
References m_NeedTotalsScan, m_Node, and CSeqDBAliasNode::NeedTotalsScan().
Referenced by CSeqDBImpl::CSeqDBImpl().
|
private |
Disable copy operator.
|
inlineprivate |
Compute filtering options for all volumes.
This method applies the filtering options found in the alias node tree to all associated volumes (iterating over the tree recursively). The virtual OID lists are not built as a result of this process, but the data necessary for virtual OID construction is copied to the volume objects.
Definition at line 1278 of file seqdbalias.hpp.
Referenced by GetFilterTree().
|
private |
The cached output of the topmost node's FindVolumePaths(recursive).
Definition at line 1294 of file seqdbalias.hpp.
Referenced by CSeqDBAliasFile(), and DebugDump().
|
private |
Combined alias files.
Definition at line 1284 of file seqdbalias.hpp.
Referenced by CSeqDBAliasFile().
|
private |
Are there filters for this database?
Definition at line 1336 of file seqdbalias.hpp.
Referenced by DebugDump().
|
mutableprivate |
True if we have the database title.
Definition at line 1324 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetTitle().
|
private |
True if this is a protein database.
Definition at line 1297 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetAliasFileValues().
|
mutableprivate |
Membership bit.
Definition at line 1321 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetMembBit().
|
mutableprivate |
Shortest sequence length.
Definition at line 1300 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetMinLength().
|
mutableprivate |
1 if we need a totals scan, 0 if not, -1 if not known.
Definition at line 1330 of file seqdbalias.hpp.
Referenced by DebugDump(), and NeedTotalsScan().
|
private |
This is the alias node tree's "artificial" topmost node, which aggregates the user provided database names.
Definition at line 1288 of file seqdbalias.hpp.
Referenced by CSeqDBAliasFile(), GetAliasFileValues(), GetFilterTree(), GetMembBit(), GetMinLength(), GetNumOIDs(), GetNumSeqs(), GetNumSeqsStats(), GetOidMaskType(), GetTitle(), GetTotalLength(), GetTotalLengthStats(), GetVolumeLength(), and NeedTotalsScan().
|
mutableprivate |
Number of OIDs.
Definition at line 1309 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetNumOIDs().
|
mutableprivate |
Number of sequences.
Definition at line 1303 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetNumSeqs().
|
mutableprivate |
Number of sequences for statistics purposes.
Definition at line 1306 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetNumSeqsStats().
|
mutableprivate |
|
mutableprivate |
Database title.
Definition at line 1327 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetTitle().
|
private |
Filter tree representing all alias file filtering.
Definition at line 1333 of file seqdbalias.hpp.
Referenced by GetFilterTree().
|
mutableprivate |
Total length.
Definition at line 1312 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetTotalLength().
|
mutableprivate |
Total length for statistics purposes.
Definition at line 1315 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetTotalLengthStats().
|
mutableprivate |
Total length ignoring filtering.
Definition at line 1318 of file seqdbalias.hpp.
Referenced by DebugDump(), and GetVolumeLength().
|
private |
The cached output of the topmost node's FindVolumePaths(recursive).
Definition at line 1291 of file seqdbalias.hpp.
Referenced by CSeqDBAliasFile(), and DebugDump().