99 const char * fendp = fbeginp + file_size;
102 bool in_order =
false;
150 vector<string> tmplist;
151 tmplist.reserve(idlist.size());
152 for(
unsigned int i=0;
i < idlist.size();
i++) {
161 tmplist.push_back(
id);
172 if (tmplist.size() == 0) {
176 sort(tmplist.begin(), tmplist.end());
177 vector<string>::iterator it = unique (tmplist.begin(), tmplist.end());
178 tmplist.resize(distance(tmplist.begin(),it));
180 idlist.swap(tmplist);
186 newInfo.
is_v4 =
false;
189 vector<string> idlist;
198 newInfo.
is_v4 =
false;
201 vector<string> idlist = user_list->
GetSiList();
210 "please run blastdb_aliastool -seqid_file_in <INPUT_FILE_NAME> " \
211 "-seqid_file_out <OUT_FILE_NAME> and use <OUT_FILE_NAME> as the argument to -seqidlist");
217 "Seqidlist is not in BLAST db v4 format");
248 vector<blastdb::TOid> oids;
251 for(
unsigned int i=0;
i < accs.size();
i++) {
271 vector<TIndexCount> OidsPerVolume;
278 TIndexCount vol_oids;
279 vol_oids.m_Index =
i;
282 OidsPerVolume.push_back(vol_oids);
291 std::sort(OidsPerVolume.begin(), OidsPerVolume.end());
293 for(
int i = 0;
i < (
int)OidsPerVolume.size();
i++) {
294 int vol_idx = OidsPerVolume[
i].m_Index;
355 for(
int v = 0; v < volset.
GetNumVols(); v++) {
364 exclude_oid_list.push_back(pig.
oid);
386 m_UserList (user_list),
387 m_NegativeList (neg_list)
414 if (gilist.
Empty()) {
425 map_ref[filename.
GetPathS()] = gilist;
482 int source_num =
source.GetNumGis();
485 int source_index = 0;
486 int target_index = 0;
488 while(source_index < source_num && target_index < target_num) {
489 TGi source_gi =
source.GetGiOid(source_index).gi;
494 if (source_gi == target_gi) {
500 }
else if (source_gi > target_gi) {
505 int test = target_index + jump;
510 test = target_index + jump;
517 int test = source_index + jump;
519 while(
test < source_num &&
source.GetGiOid(
test).gi < target_gi) {
522 test = source_index + jump;
536 int source_num =
source.GetNumTis();
539 int source_index = 0;
540 int target_index = 0;
542 while(source_index < source_num && target_index < target_num) {
543 TTi source_ti =
source.GetTiOid(source_index).ti;
548 if (source_ti == target_ti) {
551 source.GetTiOid(source_index).oid);
556 }
else if (source_ti > target_ti) {
561 int test = target_index + jump;
563 while(
test < target_num &&
568 test = target_index + jump;
575 int test = source_index + jump;
577 while(
test < source_num &&
582 test = source_index + jump;
bool GetFileSizeL(const string &fname, TIndx &length)
Get size of a file.
void RegisterExternal(CSeqDBMemReg &memreg, size_t bytes, CSeqDBLockHold &locked)
Register externally allocated memory.
CNcbiStreamoff TIndx
The type used for file offsets.
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
void Clear()
Clears the memory mapobject.
TNodeListMap m_GINodeListMap
Map of filenames to alias node specified GI lists.
void x_ResolveNegativeList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volset, CRef< CSeqDBNegativeList > neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
void x_ResolvePositiveList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volset, CRef< CSeqDBGiList > user_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
CSeqDBGiListSet(CSeqDBAtlas &atlas, const CSeqDBVolSet &vol_set, TListRef user_list, TNegativeRef neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Constructor.
TListRef m_UserList
User-specified GI list.
TNodeListMap m_SINodeListMap
Map of filenames to alias node specified SI lists.
TNegativeRef m_NegativeList
User-specified Negative GI list.
void x_TranslateFromUserList(CSeqDBGiList &gilist)
Translate a volume gilist from the user gilist.
CSeqDBAtlas & m_Atlas
Memory management layer object.
TNodeListMap m_TINodeListMap
Map of filenames to alias node specified TI lists.
void x_TranslateTisFromUserList(CSeqDBGiList &gilist)
Translate a volume gilists's TIs from the user gilist's TIs.
void x_TranslateGisFromUserList(CSeqDBGiList &gilist)
Translate a volume gilists's GIs from the user gilist's GIs.
TListRef GetNodeIdList(const CSeqDB_Path &filename, const CSeqDBVol *volp, EGiListType list_type, CSeqDBLockHold &locked)
Get a reference to a named GI list.
vector< SGiOid > m_GisOids
Pairs of GIs and OIDs.
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
vector< SPigOid > m_PigsOids
int GetNumSis() const
Get the number of Seq-ids in the array.
const SPigOid & GetPigOid(int index) const
int GetNumTis() const
Get the number of TIs in the array.
vector< STiOid > m_TisOids
Pairs of GIs and OIDs.
void SetSiTranslation(int index, int oid)
Specify the correct OID for a Seq-id.
void SetSiList(const vector< string > &new_list)
void SetTiTranslation(int index, int oid)
Specify the correct OID for a TI.
void GetSiList(vector< string > &sis) const
TODO Get the seqid list?
void SetListInfo(const SBlastSeqIdListInfo &list_info)
TODO Reserve space for seqids?
@ eGi
The array is sorted by GI.
vector< blastdb::TOid > & SetOidsForTaxIdsList()
bool NotEmpty() const
Return true if there are elements present.
const SBlastSeqIdListInfo & GetListInfo()
set< TTaxId > & GetTaxIdsList()
void ReservePigs(size_t n)
void PreprocessIdsForISAMSiLookup()
Preprocess ids for ISAM string id lookup.
const STiOid & GetTiOid(int index) const
Access an element of the array.
void SetGiTranslation(int index, int oid)
Specify the correct OID for a GI.
void InsureOrder(ESortOrder order)
Sort if necessary to insure order of elements.
vector< SSiOid > m_SisOids
Pairs of Seq-ids and OIDs.
ESortOrder m_CurrentOrder
Indicates the current sort order, if any, of this container.
void NegativeTaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
void NegativeSeqIdsToOids(const vector< string > &ids, vector< blastdb::TOid > &rv) const
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
const vector< string > & GetSiList()
const SBlastSeqIdListInfo & GetListInfo()
int GetNumTis() const
Get the number of TIs in the array.
bool NotEmpty() const
Return true if there are elements present.
void SetSiList(const vector< string > &new_list)
void PreprocessIdsForISAMSiLookup()
int GetNumGis() const
Get the number of GIs in the array.
const vector< TPig > & GetPigList()
void SetListInfo(const SBlastSeqIdListInfo &list_info)
vector< blastdb::TOid > & SetExcludedOids()
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
CSeqDBMemReg m_VectorMemory
Memory associated with the m_GisOids vector.
virtual ~CSeqDBNodeFileIdList()
Destructor.
CSeqDBNodeFileIdList(CSeqDBAtlas &atlas, const CSeqDB_Path &fname, CSeqDBGiListSet::EGiListType list_type, CSeqDBLockHold &locked)
Build a GI,TI, or SI list from a memory mapped file.
int OIDStart() const
Get the starting OID in this volume's range.
int OIDEnd() const
Get the ending OID in this volume's range.
CSeqDBVol * Vol()
Get a pointer to the underlying volume object.
const CSeqDBVolEntry * GetVolEntry(int i) const
Find a volume entry by index.
Uint8 GetVolumeSetLength() const
Find total volume length for all volumes.
int GetNumVols() const
Get the number of volumes.
void IdsToOids(CSeqDBGiList &gis, CSeqDBLockHold &locked) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
const string & GetPathS() const
Get the path as a string.
#define test(a, b, c, d, e)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Warning(CExceptionArgs_Base &args)
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
bool IsPrf(void) const
Check if variant Prf is selected.
bool IsGi(void) const
Check if variant Gi is selected.
bool IsPir(void) const
Check if variant Pir is selected.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
const CharType(& source)[N]
void SeqDB_ReadMemorySiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order=0)
Read a text SeqID list from an area of memory.
void SeqDB_ReadMemoryGiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order=0)
Read a text or binary GI list from an area of memory.
void SeqDB_ReadMemoryTiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::STiOid > &tis, bool *in_order=0)
Read a text or binary TI list from an area of memory.
void SeqDB_ReadMemoryPigList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order=0)
void s_ProcessSeqIDsForV5(vector< string > &idlist)
bool s_VerifySeqidlist(const SBlastSeqIdListInfo &list_info, const CSeqDBVolSet &volset, const CSeqDBLMDBSet &lmdb_set)
void s_ProcessNegativeSeqIDsForV5(CRef< CSeqDBNegativeList > &user_list)
void s_ProcessPositiveSeqIDsForV5(CRef< CSeqDBGiList > &user_list)
TGi gi
The GI or 0 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
TTi ti
The TI or 0 if unknown.
Blast DB v5 seqid list info.
Defines a pair of integers and a sort order.
bool operator<(const SSeqDB_IndexCountPair &rhs) const
Less than operator, where elements with larger allows sorting.
int m_Index
Index of the volume in the volume set.
int m_Count
Number of OIDs associated with this volume.