55 x_Setup( volset, filters, gi_list, neg_list, locked, lmdb_set);
104 if(
x_ComputeFilters(volset, filters, lmdb_set, f_bits, gi_list, neg_list)) {
134 int vol_end = vol.
OIDEnd();
141 int vols =
static_cast<int>(ft->
GetVolumes().size());
156 for(
int j = 1; j < vols; j++) {
230 switch(
mask.GetType()) {
280 CFile check_file(path_str);
281 if (!check_file.
Exists()) {
353 if(oids_tax.size()) {
355 for(
unsigned int k = 0; k < oids_tax.size(); k++) {
357 taxlist_oids->
SetBit(oids_tax[k]);
371 for(
unsigned int i=0;
i < excluded_oids.size();
i++) {
396 for(
int oid = 0; oid <
max; oid++) {
421 for(
int i = 0;
i < num_gis;
i++) {
424 if (oid != prev_oid) {
425 if ((oid >= oid_start) && (oid < oid_end)) {
432 for(
int i = 0;
i < num_tis;
i++) {
435 if (oid != prev_oid) {
436 if ((oid >= oid_start) && (oid < oid_end)) {
443 for(
int i = 0;
i < num_sis;
i++) {
446 if (oid != prev_oid) {
447 if ((oid >= oid_start) && (oid < oid_end)) {
496 bitend = bitmap + (((num_oids + 31) / 32) * 4);
521 vector<const CSeqDBVolEntry * > & excluded_vols,
525 vector<bool> vol_included(num_vol,
false);
526 excluded_vols.clear();
527 for(
unsigned int i=0;
i < num_vol;
i++) {
529 if(std::find(vol_basenames.begin(), vol_basenames.end(), vol->
GetVolName()) != vol_basenames.end()) {
540 for(
unsigned int i = 0;
i < excluded_vols.size();
i++) {
549 void s_AddFilterFile(
string & name,
const string & vn, vector<string> & fnames, vector<vector<string> > & fnames_vols)
552 for(; j < fnames.size(); j++) {
553 if(fnames[j] == name) {
554 fnames_vols[j].push_back(vn);
558 if( fnames.size() == j) {
559 vector<string> p(1,vn);
560 fnames.push_back(name);
561 fnames_vols.push_back(p);
572 if (seq_id1.
Match(seq_id2)) {
579 vector<vector<string> > & fnames_vols,
586 if (fnames.size() == 0) {
589 vector<string> user_accs;
594 vector<string> neg_user_accs;
595 if ((!neg_user_list.
Empty()) && (neg_user_list->
GetNumSis() > 0)) {
596 neg_user_accs = neg_user_list->
GetSiList();
597 sort(neg_user_accs.begin(), neg_user_accs.end());
600 for(
unsigned int k=0; k < fnames.size(); k++) {
601 vector<const CSeqDBVolEntry * > excluded_vols;
602 vector<blastdb::TOid> oids;
607 if(accs.size() == 0){
610 if((user_accs.size() > 0) || (neg_user_accs.size() > 0)){
612 if (user_accs.size() > 0) {
613 vector<string> common;
614 common.resize(accs.size());
615 vector<string>::iterator itr = set_intersection(accs.begin(), accs.end(),
616 user_accs.begin(), user_accs.end(), common.begin(),
s_CompareSeqId);
617 common.resize(itr-common.begin());
618 if(common.size() == 0){
623 if(neg_user_accs.size() > 0) {
624 vector<string> difference;
625 difference.resize(accs.size());
626 vector<string>::iterator itr = set_difference(accs.begin(), accs.end(),
627 neg_user_accs.begin(), neg_user_accs.end(), difference.begin(),
s_CompareSeqId);
628 difference.resize(itr-difference.begin());
629 if(difference.size() == 0){
632 swap(accs, difference);
637 for(
unsigned int i=0;
i < accs.size();
i++) {
641 if(excluded_vols.size() != 0) {
652 vector<vector<string> > & fnames_vols,
659 if (fnames.size() == 0) {
672 for(
unsigned int k=0; k < fnames.size(); k++) {
673 vector<const CSeqDBVolEntry * > excluded_vols;
674 vector<blastdb::TOid> oids;
679 if(taxids.
size() == 0){
682 if(user_taxids.
size() > 0){
683 vector<TTaxId> common;
684 common.resize(taxids.
size());
685 vector<TTaxId>::iterator itr = set_intersection(taxids.
begin(), taxids.
end(),
686 user_taxids.
begin(), user_taxids.
end(), common.begin());
687 common.resize(itr-common.begin());
688 if( common.size() == 0) {
692 taxids.
insert(common.begin(), common.end());
694 if(neg_user_taxids.
size() > 0) {
695 vector<TTaxId> difference;
696 difference.resize(taxids.
size());
697 vector<TTaxId>::iterator itr = set_difference(taxids.
begin(), taxids.
end(),
698 neg_user_taxids.
begin(), neg_user_taxids.
end(), difference.begin());
699 difference.resize(itr-difference.begin());
700 if(difference.size() == 0){
704 taxids.
insert(difference.begin(), difference.end());
708 for(
unsigned int i=0;
i < oids.size();
i++) {
709 if(excluded_vols.size() != 0) {
727 vector<string> seqid_fnames;
728 vector<string> taxid_fnames;
729 vector< vector<string> > seqid_fnames_vols;
730 vector< vector<string> > taxid_fnames_vols;
739 string name = (*itr)->GetPath().GetPathS();
751 if (seqid_fnames.size() > 0) {
753 lmdb_set, volset, filter_bit);
755 if (taxid_fnames.size() > 0) {
757 lmdb_set, volset, filter_bit);
760 return ((seqid_fnames.size() + taxid_fnames.size()) > 0 ?
true:
false);
void SetFrame(const string &frame)
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
void Init(const string &filename)
Initializes a memory map object.
TListRef GetNodeIdList(const CSeqDB_Path &filename, const CSeqDBVol *volp, EGiListType list_type, CSeqDBLockHold &locked)
Get a reference to a named GI list.
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
const SPigOid & GetPigOid(int index) const
int GetNumTis() const
Get the number of TIs in the array.
const SSiOid & GetSiOid(int index) const
Access an element of the array.
void GetSiList(vector< string > &sis) const
TODO Get the seqid list?
const vector< blastdb::TOid > & GetOidsForTaxIdsList()
set< TTaxId > & GetTaxIdsList()
const STiOid & GetTiOid(int index) const
Access an element of the array.
bool Empty() const
Return false if there are elements present.
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
const vector< string > & GetSiList()
int GetNumTis() const
Get the number of TIs in the array.
bool GetOidStatus(int oid)
Get the inclusion status of an OID.
int GetNumGis() const
Get the number of GIs in the array.
const vector< blastdb::TOid > & GetExcludedOids()
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
int GetNumOids()
Get the size of the OID array.
CSeqDBOIDList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volumes, CSeqDB_FilterTree &filters, CRef< CSeqDBGiList > &gi_list, CRef< CSeqDBNegativeList > &neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Constructor.
CRef< CSeqDB_BitSet > x_IdsToBitSet(const CSeqDBGiList &ids, int vol_start, int vol_end)
Load an ID (GI or TI) list file into a bitset object.
bool x_IsSet(TOID oid) const
Check if a bit is set.
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Dump debug information for this object.
void x_Setup(const CSeqDBVolSet &volset, CSeqDB_FilterTree &filters, CRef< CSeqDBGiList > &gi_list, CRef< CSeqDBNegativeList > &neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Build an oid mask in memory.
void x_ClearBitRange(int oid_start, int oid_end)
Clear all bits in a range.
CRef< CSeqDB_BitSet > x_GetOidMask(const CSeqDB_Path &fn, int vol_start, int vol_end)
Load the named OID mask file into a bitset object.
void x_ApplyUserGiList(CSeqDBGiList &gis)
Apply a user GI list to a volume.
void x_ApplyNegativeList(CSeqDBNegativeList &neg, bool is_v5)
Apply a negative user GI list to a volume.
~CSeqDBOIDList()
Destructor.
CSeqDBAtlas & m_Atlas
The memory management layer object.
CRef< CSeqDB_BitSet > x_ComputeFilters(const CSeqDB_FilterTree &ft, const CSeqDBVolEntry &vol, CSeqDBGiListSet &gis, CSeqDBLockHold &locked, bool isBlastDBv5)
Compute the oid mask bitset for a database volume.
const unsigned char TCUC
Shorthand type to clarify code that iterates over memory.
CRef< CSeqDB_BitSet > m_AllBits
An OID bit set covering all volumes.
int m_NumOIDs
The total number of OIDs represented in the bit set.
TIndx GetFileLength() const
Get the length of the file.
TIndx ReadSwapped(CSeqDBFileMemMap &lease, TIndx offset, Uint4 *value) const
Read a four byte numerical object from the file.
bool Open(const CSeqDB_Path &name)
MMap or Open a file.
const char * GetFileDataPtr(CSeqDBFileMemMap &lease, TIndx start, TIndx end) const
Get a pointer to a section of the file.
int OIDStart() const
Get the starting OID in this volume's range.
int OIDEnd() const
Get the ending OID in this volume's range.
CSeqDBVol * Vol()
Get a pointer to the underlying volume object.
const CSeqDBVolEntry * GetVolEntry(int i) const
Find a volume entry by index.
const CSeqDBVol * GetVol(int i) const
Find a volume by index.
int GetNumVols() const
Get the number of volumes.
int GetNumOIDs() const
Get the size of the OID range.
const string & GetVolName() const
Get the volume name.
void SetOidMaskType(int oid_masks) const
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
char GetSeqType() const
Get the sequence type stored in this database.
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
Something else yet again etc.
@ eTaxIdList
Taxonomay Id List.
@ eOidRange
OID Range [start, end).
@ eAllSet
All OIDs are set.
@ eAllClear
All OIDs are clear.
void IntersectWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the intersection of it and another.
void UnionWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the union of it and another.
void Normalize()
If this is a special case bitset, convert it to a normal one.
void ClearBit(size_t index)
Clear the specified bit (to false).
bool CheckOrFindBit(size_t &index) const
Check if a bit is true or find the next bit that is.
void AssignBitRange(size_t start, size_t end, bool value)
Store the provided value in a range of bits.
void SetBit(size_t index)
Set the specified bit (to true).
Tree of nodes describing filtering of database sequences.
bool HasFilter() const
Check whether this tree represents any volume filtering.
const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const
Get child nodes attached to this node.
vector< CRef< CSeqDB_AliasMask > > TFilters
Type used to store lists of filters found here.
const TFilters & GetFilters() const
Get filters from this node.
const vector< CSeqDB_BasePath > & GetVolumes() const
Get volumes attached to this node.
CRef< CSeqDB_FilterTree > Specialize(string volname) const
Specialized this tree for the indicated volume.
const string & GetPathS() const
Get the path as a string.
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
The NCBI C++ standard methods for dealing with std::string.
static unsigned char depth[2 *(256+1+29)+1]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
virtual bool Exists(void) const
Check existence of file.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)
File access objects for CSeqDB.
Implementation for some assorted ID list filtering code.
void s_AddFilterFile(string &name, const string &vn, vector< string > &fnames, vector< vector< string > > &fnames_vols)
void s_GetFilteredOidRange(const CSeqDBVolSet &volset, const vector< string > &vol_basenames, vector< const CSeqDBVolEntry * > &excluded_vols, CRef< CSeqDBGiList > &si_list)
void s_ProcessTaxIdFilters(const vector< string > &fnames, vector< vector< string > > &fnames_vols, CRef< CSeqDBGiList > user_list, CRef< CSeqDBNegativeList > neg_user_list, const CSeqDBLMDBSet &lmdb_set, const CSeqDBVolSet &volset, CSeqDB_BitSet &filter_bit)
void s_ProcessSeqIdFilters(const vector< string > &fnames, vector< vector< string > > &fnames_vols, CRef< CSeqDBGiList > user_list, CRef< CSeqDBNegativeList > neg_user_list, const CSeqDBLMDBSet &lmdb_set, const CSeqDBVolSet &volset, CSeqDB_BitSet &filter_bit)
bool s_IsOidInFilteredVol(blastdb::TOid oid, vector< const CSeqDBVolEntry * > &excluded_vols)
bool s_CompareSeqId(const string &id1, const string &id2)
The SeqDB oid filtering layer.
static const sljit_gpr r2
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.