80 template<
int k_nFields>
82 SMultiIntRecord<k_nFields>& record,
85 return record.n[iField];
104 template <
typename TRecordType>
106 int n1,
int& iFirstIndex)
108 int iRecBeg = 0, iRecEnd = nRecs;
110 while (iRecBeg < iRecEnd)
112 iRecMid = (iRecBeg + iRecEnd) / 2;
115 iRecBeg = iRecMid + 1;
122 iFirstIndex = iRecEnd;
140 while (!listVals.empty() &&
141 listVals.front() == 0)
143 listVals.pop_front();
154 while (!listVals.empty() &&
157 listVals.pop_front();
183 template <
typename TRecordType>
186 list<int>& listFieldVals,
189 int iFirstIndex = -1;
192 while (iFirstIndex < nRecs &&
195 listFieldVals.push_back(
205 template <
typename TRecordType>
208 list<TGi>& listFieldVals,
211 int iFirstIndex = -1;
214 while (iFirstIndex < nRecs &&
217 listFieldVals.push_back(
237 template <
typename TRecordType>
239 TRecordType*& pRecs,
int& nRecs)
243 nRecs =
static_cast<int>(pMemFile->
GetSize() / (
sizeof(TRecordType)));
246 pRecs = (TRecordType*)(pMemFile->
GetPtr());
305 GI_TO(
int, gi), 1, listGeneIds,
false);
310 "Cannot access the memory-mapped file for "
311 "Gi to Gene ID conversion.");
335 "Cannot access the memory-mapped file for "
336 "Gene ID to Gene Info Offset conversion.");
347 "Gi to offset lookup is disabled.");
357 GI_TO(
int, gi), 1, listOffsets,
false);
362 "Cannot access the memory-mapped file for "
363 "Gi to Gene Info Offset conversion.");
379 geneId, iGiField, listGis,
true);
384 "Cannot access the memory-mapped file for "
385 "Gene ID to Gi conversion.");
402 const string& strGene2OffsetFile,
403 const string& strGi2OffsetFile,
404 const string& strAllGeneDataFile,
405 const string& strGene2GiFile,
406 bool bGiToOffsetLookup)
407 : m_strGi2GeneFile(strGi2GeneFile),
408 m_strGene2OffsetFile(strGene2OffsetFile),
409 m_strGi2OffsetFile(strGi2OffsetFile),
410 m_strGene2GiFile(strGene2GiFile),
411 m_strAllGeneDataFile(strAllGeneDataFile),
412 m_bGiToOffsetLookup(bGiToOffsetLookup)
417 "Cannot open the Gene Data file for reading: " +
449 if ( !
CDir(retval).Exists() ) {
458 #if defined(NCBI_OS_MSWIN)
468 : m_bGiToOffsetLookup(bGiToOffsetLookup)
471 if (strDirPath.length() == 0 ||
475 "Invalid path to Gene info directory: " +
489 "Cannot open the Gene Data file for reading: " +
528 bool bSuccess =
false;
533 list<int> listOffsets;
536 list<int>::const_iterator itOffset = listOffsets.begin();
537 for (; itOffset != listOffsets.end(); itOffset++)
542 infoList.push_back(
info);
550 list<int> listGeneIds;
553 list<int>::const_iterator itId = listGeneIds.begin();
554 for (; itId != listGeneIds.end(); itId++)
561 "Gene info not found for Gene ID:" +
563 " linked from valid Gi:" +
574 bool bSuccess =
false;
589 infoList.push_back(
info);
static bool CheckDirExistence(const string &strDir)
Check if a directory exists, given its name.
static bool OpenBinaryInputFile(const string &strFileName, CNcbiIfstream &in)
Open the given binary file for reading.
static void ReadGeneInfo(CNcbiIfstream &in, int nOffset, CRef< CGeneInfo > &info)
Read a Gene info object from the file.
static bool CheckExistence(const string &strFile)
Check if a file exists, given its name.
virtual bool GetGeneInfoForId(int geneId, TGeneInfoList &infoList)
GetGeneInfoForId implementation, see IGeneInfoInput.
bool x_GeneIdToOffset(int geneId, int &nOffset)
Set the offset value given a Gene ID.
string m_strGi2OffsetFile
Path to the Gi to Offset file.
bool x_GeneIdToGi(int geneId, int iGiField, list< TGi > &listGis)
Fill the Gi list given a Gene ID, and the Gi field index, which represents the Gi type to be read fro...
CNcbiIfstream m_inAllData
Input stream for the Gene data file.
virtual ~CGeneInfoFileReader()
Destructor.
unique_ptr< CMemoryFile > m_memGi2GeneFile
Memory-mapped Gi to Gene ID file.
bool m_bGiToOffsetLookup
Perform Gi to Offset lookups directly.
CGeneInfoFileReader(const string &strGi2GeneFile, const string &strGene2OffsetFile, const string &strGi2OffsetFile, const string &strAllGeneDataFile, const string &strGene2GiFile, bool bGiToOffsetLookup=true)
Construct using direct paths.
virtual bool GetGenomicGisForGeneId(int geneId, TGiList &giList)
GetGenomicGisForGeneId implementation, see IGeneInfoInput.
virtual bool GetGeneIdsForGi(TGi gi, TGeneIdList &geneIdList)
GetGeneIdsForGi implementation, see IGeneInfoInput.
void x_MapMemFiles()
Memory-map all the files.
unique_ptr< CMemoryFile > m_memGi2OffsetFile
Memory-mapped Gi to Offset file.
TGeneIdToGeneInfoMap m_mapIdToInfo
Cached map of looked up Gene Info objects.
unique_ptr< CMemoryFile > m_memGene2GiFile
Memory-mapped Gene ID to Gi file.
string m_strGene2GiFile
Path to the Gene ID to Gi file.
void x_UnmapMemFiles()
Unmap all the memory-mapped files.
string m_strGene2OffsetFile
Path to the Gene ID to Offset file.
string m_strGi2GeneFile
Path to the Gi to Gene ID file.
bool x_GiToGeneId(TGi gi, list< int > &listGeneIds)
Fill the Gene ID list given a Gi.
virtual bool GetGeneInfoForGi(TGi gi, TGeneInfoList &infoList)
GetGeneInfoForGi implementation, see IGeneInfoInput.
unique_ptr< CMemoryFile > m_memGene2OffsetFile
Memory-mapped Gene ID to Offset file.
bool x_GiToOffset(TGi gi, list< int > &listOffsets)
Set the offset value given a Gi.
virtual bool GetProteinGisForGeneId(int geneId, TGiList &giList)
GetProteinGisForGeneId implementation, see IGeneInfoInput.
virtual bool GetRNAGisForGeneId(int geneId, TGiList &giList)
GetRNAGisForGeneId implementation, see IGeneInfoInput.
bool x_OffsetToInfo(int nOffset, CRef< CGeneInfo > &info)
Read Gene data at the given offset and create the info object.
string m_strAllGeneDataFile
Path to the file containing all the Gene data.
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
Classes to support using environment variables as a backend for the registry framework.
General file processing routines and structures.
static const int k_iGenomicGiField
Index of the Genomic Gi field in the Gene ID to Gi records.
static bool s_SearchSortedArrayGis(TRecordType *pRecs, int nRecs, int n1, int iField, list< TGi > &listFieldVals, bool bRemoveZeros)
static bool s_GetMemFilePtrAndLength(CMemoryFile *pMemFile, TRecordType *&pRecs, int &nRecs)
Interprets a memory file as a record array.
static const int k_iRNAGiField
Index of the RNA Gi field in the Gene ID to Gi records.
static const int k_iProteinGiField
Index of the Protein Gi field in the Gene ID to Gi records.
bool s_SearchSortedArray(TRecordType *pRecs, int nRecs, int n1, int &iFirstIndex)
Searches an array of records sorted by the first field.
int & s_GetField(CGeneInfoFileReader::STwoIntRecord &record, int iField)
Returns the field of a record given its index.
void s_SortAndFilter(list< int > &listVals, bool bRemoveZeros)
Sorts and filters a list of integers.
static string s_FindPathToGeneInfoFiles(void)
Find the path to the gene info files, first checking the environment variable GENE_INFO_PATH,...
void s_SortAndFilterGis(list< TGi > &listVals, bool bRemoveZeros)
Defines a class for reading Gene information from files.
#define GENE_GENE2OFFSET_FILE_NAME
Name of the processed "GeneID to Offset" file.
#define GENE_INFO_PATH_ENV_VARIABLE
Name of the environment variable holding the path to Gene info files.
#define GENE_GI2GENE_FILE_NAME
Name of the processed "Gi to GeneID" file.
#define GENE_ALL_GENE_DATA_FILE_NAME
Name of the combined "Gene Data" file.
#define GENE_GI2OFFSET_FILE_NAME
Name of the processed "Gi to Offset" file.
#define GENE_GENE2GI_FILE_NAME
Name of the processed "Gene ID to Gi" file.
#define GI_FROM(T, value)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
size_t GetSize(void) const
Get length of the mapped region.
void * GetPtr(void) const
Get pointer to beginning of data.
static string AddTrailingPathSeparator(const string &path)
Add trailing path separator, if needed.
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
static string GetCwd(void)
Get the current working directory.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
void Add(const IRegistry ®, TPriority prio=ePriority_Default, const string &name=kEmptyStr)
Non-empty names must be unique within each compound registry, but there is no limit to the number of ...
void AddMapper(const IEnvRegMapper &mapper, TPriority prio=ePriority_Default)
@ fWithNcbirc
Include .ncbirc (used only by CNcbiReg.)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const char *const kSection
SMultiIntRecord - an n-tuple of integers.
STwoIntRecord - a pair of integers.
int n1
First integer field of the record.
int n2
Second integer field of the record.