134 return !(*
this < index);
219 vector<CSeq_id_Handle>& extra_ids,
221 bool extract_products)
229 ITERATE (CBioseq::TInst::TExt::TDelta::Tdata, iter,
233 for ( ; id_iter; ++id_iter) {
243 if (extract_products) {
252 for (
CFeat_CI feat_iter(bsh, sel); feat_iter; ++feat_iter) {
253 if (feat_iter->IsSetProduct()) {
255 (feat_iter->GetProduct());
256 for ( ; id_iter; ++id_iter) {
274 return removed_annot;
283 return removed_annots;
291 bool trim_large_nucprot =
false;
292 bool trimmed =
false;
306 if ((*desc_it)->IsMolinfo()) {
307 trim_large_nucprot = (*desc_it)->GetMolinfo().GetBiomol() ==
314 if (trim_large_nucprot) {
349 entry.
Reset(full_entry = trimmed_entry);
366 "Mixed input mol types not allowed with trim-large-nucprots");
390 bool extract_product,
400 m_Buffer.reserve(128 * 1024 * 1024);
410 "trapped signal, exiting");
419 if (sub_cache_locator) {
435 small_blob.
Pack(*trimmed_entry);
467 if (sub_cache_locator) {
480 small_blob.
Pack(*trimmed_entry);
497 output_idh = main_cache_locator.
m_Idh;
500 if (sub_cache_locator) {
546 bool extract_product)
556 vector<SSubcacheIndexData> &sub_cache_locator)
560 "trapped signal, exiting");
588 vector<CBioseq_Handle> relevant_seqs;
590 relevant_seqs.push_back(bsh);
593 for (
CBioseq_CI seq_ci(seh); seq_ci; ++seq_ci) {
594 relevant_seqs.push_back(*seq_ci);
597 ITERATE (vector<CBioseq_Handle>, seq_it, relevant_seqs) {
605 blob_locator.
m_Ids = seq_it->GetId();
613 sub_cache_locator.push_back(blob_locator);
660 virtual void Init(
void);
661 virtual int Run(
void);
662 virtual void Exit(
void);
665 const CDir& sub_cache_root,
670 bool extract_product,
672 bool update_existing,
673 int recursion_level);
679 const CDir & subcache_root,
681 bool extract_product );
685 const vector<CDir>& main_cache_roots,
691 const string & cache_index);
694 const CDir & cache_root);
745 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
746 "CArgDescriptions demo program");
748 arg_desc->AddOptionalKey(
"cache",
"Cache",
749 "Comma-separated paths of one or more main caches",
752 arg_desc->AddOptionalKey(
"cache-manifest",
"CacheManifest",
753 "manifest of paths of one or more main caches",
755 arg_desc->SetDependency(
"cache-manifest",
759 arg_desc->AddKey(
"subcache",
"Subcache",
760 "Path to the ASN.1 subcache that will be created.",
763 arg_desc->AddDefaultKey(
"i",
"SeqIds",
764 "The list of Seq-ids is read from here.",
767 arg_desc->AddAlias(
"-input",
"i");
769 arg_desc->AddOptionalKey(
"input-manifest",
"SeqIds",
770 "The list of Seq-ids is read from here.",
772 arg_desc->SetDependency(
"i",
776 arg_desc->AddOptionalKey(
"timestamp",
"Timestamp",
777 "Only GIs stamped earlier than this timestamp (YYYY-MM-DD) are cached",
780 arg_desc->AddFlag(
"extract-delta",
781 "Extract and index delta-seq far-pointers");
783 arg_desc->AddOptionalKey(
"delta-level",
"RecursionLevel",
784 "Number of levels to descend when retrieving "
785 "items in delta sequences",
788 arg_desc->AddFlag(
"skip-retrieval-failures",
789 "Skip failed retrieval of sequences, "
790 "up to any limit imposed by -max-retrieval-failures");
791 arg_desc->AddOptionalKey(
"max-retrieval-failures",
"MaximumAllowedFailures",
792 "Configures the option of -skip-failures: "
793 "Maximum number of sequences we're allowed to "
794 "fail to retrieve from ID and still consider "
795 "execution a success; does not include withdrawn "
796 "sequences, which are counted separately. The "
797 "default is unlimited.",
800 arg_desc->AddFlag(
"skip-withdrawn",
801 "Skip retrieval of withdrawn sequences, "
802 "up to any limit imposed by -max-withdrawn");
803 arg_desc->AddOptionalKey(
"max-withdrawn",
"MaximumWithdrawnSequences",
804 "Configures the option of -skip-withdrawn: "
805 "Maximum number of withdrawn sequences allowed in "
806 "the input Seq-ids. The default is unlimited.",
809 arg_desc->AddFlag(
"extract-product",
810 "Extract and index product far-pointers");
812 arg_desc->AddFlag(
"fetch-missing",
813 "Retrieve ASN.1 blobs from ID directly if a look-up in "
814 "the main cache fails");
816 arg_desc->AddFlag(
"no-update-existing",
817 "Don't update sequences that are already in the subcache");
819 arg_desc->AddFlag(
"trim-large-nucprots",
820 "Divide large nucprots into separate Seq-entry per "
821 "sequences, to avoid fetching huge blobs when only one "
822 "protein is needed");
824 arg_desc->AddFlag(
"remove-annotation",
825 "Remove all annotation from caches entries");
827 arg_desc->AddFlag(
"no-wgs-master-descs",
828 "When fetching missing WGS sequences from ID, don't add "
829 "the descriptiors from the master WGS record");
831 arg_desc->AddFlag(
"overwrite-existing-cache",
832 "If the cache already exists, overwrite its current "
833 "contents; default action is to add to them");
835 arg_desc->AddFlag(
"allow-approximate-ids",
836 "If this flag is specified, a bioseq retrieved by "
837 "provided ID is considered good even if the exact ID "
838 "does not appear in bioseq");
840 arg_desc->SetDependency(
"skip-retrieval-failures",
842 arg_desc->SetDependency(
"max-retrieval-failures",
845 arg_desc->SetDependency(
"skip-withdrawn",
847 arg_desc->SetDependency(
"max-withdrawn",
850 arg_desc->SetDependency(
"no-wgs-master-descs",
852 arg_desc->SetDependency(
"no-wgs-master-descs",
855 arg_desc->AddOptionalKey(
"oseqids",
"oseqids",
"Seqids that actually made it to the cache",
857 arg_desc->AddOptionalKey(
"seq-id-type",
"TypeOfId",
858 "Kind of sequence identifier to use; by default "
859 "use same seq-id as provided in input",
861 arg_desc->SetConstraint(
"seq-id-type",
863 "canonical",
"best"));
865 arg_desc->AddOptionalKey(
"freeze-date",
"FreezeDate",
866 "When fetching missing blobs, Get old blobs from "
867 "no later than specified date; format M/D/Y. "
868 "Supported for gis only",
871 arg_desc->AddOptionalKey(
"idstat-executable",
"IdstatExecutable",
872 "Path to idstat executable",
874 arg_desc->SetDependency(
"freeze-date",
876 arg_desc->SetDependency(
"freeze-date",
879 arg_desc->AddFlag(
"accept-non-gi",
880 "Allow non-gi seq-ids, and get the latest version of the "
881 "sequence, ignoring freeze date. Default, if freeze "
882 "date is specified without this flag, is to fail if any "
883 "of the input seq-ids are not gis");
884 arg_desc->SetDependency(
"accept-non-gi",
902 if ( line.empty() || line[0] ==
'#') {
911 &blob_locations.front()));
934 if (args[
"seq-id-type"]) {
935 m_IdType = args[
"seq-id-type"].AsString() ==
"best"
939 vector<CDir> main_cache_roots;
940 vector<string> main_cache_paths;
942 NStr::Split(args[
"cache"].AsString(),
",", main_cache_paths);
943 }
else if (args[
"cache-manifest"]) {
946 ITERATE (vector<string>, it, main_cache_paths) {
948 if (! cache_root.
Exists() ) {
950 <<
" does not exist!" );
952 }
else if ( ! cache_root.
IsDir() ) {
954 <<
"valid cache path!" );
957 main_cache_roots.push_back(cache_root);
963 main_cache_roots.push_back(
dynamic_cast<const CDir &
>(**subdir_it));
967 CDir subcache_root( args[
"subcache"].AsString() );
968 if ( subcache_root.
Exists() ) {
969 if (args[
"overwrite-existing-cache"]) {
970 if (!subcache_root.
Remove()) {
972 "Cache already exists and can't be removed");
976 <<
" already exists!" );
981 LOG_POST(
Error <<
"Unable to create a path to a subcache at "
986 if ( args[
"timestamp"].
HasValue() ) {
987 string timestamp_string( args[
"timestamp"].AsString() );
988 timestamp =
CTime( timestamp_string,
993 bool extract_delta = args[
"extract-delta"];
994 bool extract_product = args[
"extract-product"];
995 bool fetch_missing = args[
"fetch-missing"];
996 bool update_existing = !args[
"no-update-existing"];
998 << (update_existing ?
"true" :
"false"));
1000 unsigned max_retrieval_failures =
1001 args[
"skip-retrieval-failures"] ? UINT_MAX : 0;
1002 if (args[
"max-retrieval-failures"]) {
1003 max_retrieval_failures = args[
"max-retrieval-failures"].AsInteger();
1006 unsigned max_withdrawn =
1007 args[
"skip-withdrawn"] ? UINT_MAX : 0;
1008 if (args[
"max-withdrawn"]) {
1009 max_withdrawn = args[
"max-withdrawn"].AsInteger();
1018 if (args[
"freeze-date"]) {
1029 if (args[
"no-wgs-master-descs"]) {
1034 #ifdef HAVE_NCBI_VDB
1036 om->GetRegisteredNames(registered_names);
1037 for (
const string& loader_name: registered_names) {
1058 if (args[
"input-manifest"]) {
1060 CNcbiIstream& mft_istr = args[
"input-manifest"].AsInputFile();
1063 if ( fname.empty() || fname[0] ==
'#') {
1079 if (args[
"delta-level"].
HasValue()) {
1083 size_t total_count =
1085 blob_locations, timestamp.
GetTimeT(),
1086 extract_delta, extract_product, fetch_missing,
1087 update_existing, 0 );
1093 << total_count <<
" items into cache ("
1094 << e <<
" seconds, " << total_count/e <<
" items/sec)");
1101 if (fetch_missing) {
1113 if(args[
"oseqids"]) {
1114 args[
"oseqids"].AsOutputFile() <<
"#seq-id"<<endl;
1116 args[
"oseqids"].AsOutputFile() << *it << endl;
1128 const CDir& subcache_root,
1133 bool extract_product,
1135 bool update_existing,
1136 int recursion_level)
1144 string subcache_main_index =
1148 size_t input_ids = index_map.
size();
1154 if (!update_existing) {
1158 blob_locations.begin()->m_Timestamp = 0;
1169 ids_missing, timestamp);
1180 if (update_existing) {
1187 blob_locations.begin()->m_Timestamp = timestamp;
1197 if (recursion_level == 0) {
1208 if (fetch_missing) {
1211 extract_delta, extract_product );
1215 index_map.
erase(*it);
1225 extract_delta, extract_product,
m_IdType);
1230 blob_writer(it->first, *it->second, output_idh);
1232 ITERATE (vector<CSeq_id_Handle>, id_it, it->second->m_Ids) {
1233 if (*id_it != it->first.m_Idh) {
1246 LOG_POST(
Error <<
"Error trying to copy " << it->first.m_Idh.AsString());
1258 if (!index_map.count(new_id)) {
1269 extra_ids, blob_locations, timestamp,
1270 extract_delta, extract_product, fetch_missing,
1271 update_existing, recursion_level);
1296 const CDir & subcache_root,
1298 bool extract_product )
1300 if (! ids_missing.empty()) {
1302 extract_delta, extract_product);
1307 "trapped signal, exiting");
1310 if (*(*it)->second) {
1317 index_map.
erase(*it);
1318 bool is_withdrawn =
false;
1325 is_withdrawn ?
"bioseq withdrawn"
1326 :
"empty bioseq handle");
1328 if (!
GetArgs()[
"allow-approximate-ids"] &&
1329 find(bsh.
GetId().begin(), bsh.
GetId().end(), idh) == bsh.
GetId().end())
1332 "Retrieved bioseq does not have this Seq-id");
1351 vector<SSubcacheIndexData> index_data;
1352 inserter(bsh, index_data);
1354 ITERATE (vector<SSubcacheIndexData>, blob_it, index_data) {
1356 blob_locations.push_back(*blob_it);
1359 ITERATE (vector<CSeq_id_Handle>, id_it, blob_it->m_Ids) {
1360 index_map[*id_it] = &blob_locations.back();
1372 if (!index_map.count(new_id)) {
1382 if (ids.
size() <= 500) {
1386 string key1 = ids.
begin()->first.m_SeqId;
1387 string key2 = (--ids.
end())->
first.m_SeqId;
1388 size_t half_length =
max(key1.size(), key2.size())/2;
1389 return key1.substr(0, half_length) != key2.substr(0, half_length);
1396 const vector<CDir>& main_cache_roots,
1405 missing_ids.push_back(it);
1409 ITERATE (vector<CDir>, dir_it, main_cache_roots) {
1410 string cache_index =
1412 LOG_POST(
Error <<
"locate blobs in " << cache_index <<
", missing " << missing_ids.size());
1413 if (
CFile(cache_index).Exists() && !missing_ids.empty()) {
1420 TIndexRefList::iterator iter = missing_ids.begin();
1421 while (iter != missing_ids.end()) {
1426 cursor.
To <<
key.m_SeqId <<
key.m_Version;
1434 sub_cache_index_data = asn_index;
1435 main_cache_locator = asn_index;
1438 if (sub_cache_index_data) {
1441 blob_locations.push_back(sub_cache_index_data);
1442 (*iter)->second = &blob_locations.back();
1443 index_map_by_blob.push_back(
1445 iter = missing_ids.erase(iter);
1453 TIndexRefList::iterator iter = missing_ids.begin();
1458 &end = (*--missing_ids.end())->
first;
1460 cursor.
To << end.m_SeqId << end.m_Version;
1464 << end.m_SeqId <<
'.' << end.m_Version);
1467 while (valid_index && iter != missing_ids.end()) {
1469 SBlobLocator main_cache_locator((*iter)->first.m_Idh, *dir_it);
1470 for (; valid_index && (*iter)->first >= asn_index;
1473 if ((*iter)->first == asn_index &&
1478 sub_cache_index_data = asn_index;
1479 main_cache_locator = asn_index;
1482 if (sub_cache_index_data) {
1485 blob_locations.push_back(sub_cache_index_data);
1486 (*iter)->second = &blob_locations.back();
1487 index_map_by_blob.push_back(
1489 iter = missing_ids.erase(iter);
1499 sort(index_map_by_blob.begin(), index_map_by_blob.end());
1505 const string& cache_index)
1507 if (
CFile(cache_index).Exists()) {
1514 vector<TIndexRef> ids_found;
1523 cursor.
To <<
key.m_SeqId <<
key.m_Version;
1524 bool found_match =
false;
1531 if (asn_index.
GetTimestamp() >= iter->second->m_Timestamp)
1534 ids_found.push_back(iter);
1546 cursor.
To << end.m_SeqId << end.m_Version;
1550 << end.m_SeqId <<
'.' << end.m_Version);
1557 bool found_match =
false;
1558 for (; valid_index && iter->first >= asn_index;
1561 if (iter->first == asn_index &&
1568 ids_found.push_back(iter);
1578 ITERATE (vector<TIndexRef>, iter, ids_found) {
1593 index_map.
erase(*iter);
1600 const CDir & cache_root)
1602 string main_index_path =
1605 string seq_id_index_path =
1611 main_index.
Open(main_index_path,
1616 seq_id_index.
Open(seq_id_index_path,
1620 main_index.
SetSeqId( it->first.m_SeqId );
1621 main_index.
SetVersion( it->first.m_Version );
1622 main_index.
SetGi( it->second->m_Gi );
1624 main_index.
SetChunkId( it->second->m_ChunkId );
1625 main_index.
SetOffset( it->second->m_Offset );
1626 main_index.
SetSize( it->second->m_BlobSize );
1628 main_index.
SetTaxId( it->second->m_TaxId );
1631 << it->first.m_SeqId );
1634 seq_id_index.
SetSeqId( it->first.m_SeqId );
1635 seq_id_index.
SetVersion( it->first.m_Version );
1636 seq_id_index.
SetGi( it->second->m_Gi );
1638 seq_id_index.
SetOffset( it->second->m_SeqIdOffset );
1639 seq_id_index.
SetSize( it->second->m_SeqIdSize );
1642 << it->first.m_SeqId );
1656 <<
"; getting latest version");
1664 vector<string> args {
"-i",
"PUBSEQ_OS_GI64",
"-g" };
1667 ostringstream idstat_out, idstat_err;
1672 idstat_out, idstat_err, idstat_exit);
1673 cerr << idstat_err.str();
1675 if(0 != idstat_exit) {
1679 vector<string> idstat_lines;
1680 NStr::Split(idstat_out.str(),
"\n", idstat_lines);
1681 bool reached_data =
false;
1682 for (
const string &line : idstat_lines) {
1683 if (!reached_data) {
1684 if (!line.empty() && line[0] ==
'-') {
1686 reached_data =
true;
1704 : sat(0), satkey(0), gi(
ZERO_GI)
1706 if (line.empty() || line[0] ==
' ') {
1710 vector<string> tokens;
1720 CDatabase db(
"dbapi://anyone:allowed@ENTREZ_MAIN/IdMain");
1724 s_SatelliteMap[
row[
"satellite"].AsString()] =
row[
"sat_id"].AsInt4();
1733 int main(
int argc,
const char* argv[])
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Contains the class definiton for CAsnCache, the main client class for accessing the ASN cache data.
void GetNormalizedSeqId(const objects::CSeq_id_Handle &id, string &id_str, Uint4 &version)
void BioseqIndexData(const objects::CBioseq &bioseq, CAsnIndex::TGi &gi, CAsnIndex::TSeqLength &seq_length, CAsnIndex::TTaxId &taxid)
Berkeley BDB file cursor.
This is a simple BDB structure holding information about a given accession and its indexed location.
TChunkId GetChunkId() const
TOffset GetOffset() const
void SetSeqId(TSeqId val)
void SetVersion(TVersion val)
void SetTaxId(TTaxId val)
void SetChunkId(TChunkId val)
TTimestamp GetTimestamp() const
void SetOffset(TOffset val)
TVersion GetVersion() const
void SetSeqLength(TSeqLength val)
TSeqLength GetSeqLength() const
void SetTimestamp(TTimestamp val)
TSeqId GetSeqId() const
accessors
TCachedSeqIds m_cached_seq_ids
size_t m_RecordsFetchedFromID
void x_FetchMissingBlobs(TIndexMapById &index_map, const TIndexRefList &missing_ids, TBlobLocationList &blob_locations, TIndexMapById &extra_ids, const CDir &subcache_root, bool extract_delta, bool extract_product)
size_t WriteBlobsInSubCache(const vector< CDir > &main_cache_roots, const CDir &sub_cache_root, TIndexMapById &index_map, TBlobLocationList &blob_locations, time_t timestamp, bool extract_delta, bool extract_product, bool fetch_missing, bool update_existing, int recursion_level)
CGBDataLoader * m_GbLoader
CAsnSubCacheCreateApplication()
TCachedSeqIds m_output_seq_ids
SSubcacheIndexData m_BlankIndexData
CBioseq_Handle x_GetBioseqHandle(const CSeq_id_Handle &idh)
string m_IdstatExecutable
sequence::EGetIdType m_IdType
virtual void Init(void)
Initialize the application.
void IndexNewBlobsInSubCache(const TIndexMapById &index_map, const CDir &cache_root)
virtual int Run(void)
Run the application.
void x_LocateBlobsInCache(TIndexMapById &index_map, TIndexMapByBlob &index_map_by_blob, const vector< CDir > &main_cache_roots, TBlobLocationList &blob_locations, TIndexRefList &missing_ids, time_t timestamp)
virtual void Exit(void)
Cleanup on application exit.
size_t m_RecordsInSubCache
size_t m_RecordsNotInMainCache
void x_EliminateIdsAlreadyInCache(TIndexMapById &index_map, const string &cache_index)
size_t m_RecordsWithdrawn
Berkeley DB file cursor class.
void UnPack(CSeq_entry &entry) const
void Pack(const CSeq_entry &entry)
void OpenForWrite(const std::string &root_path="")
void RawWrite(const char *raw_blob, size_t raw_blob_size)
void Write(const CCache_blob &cache_blob)
unsigned int GetChunkSerialNum() const
void OpenForRead(const std::string &root_path="", unsigned int chunk=0)
void RawRead(std::streampos offset, char *raw_blob, size_t raw_blob_size)
static void SetupObjectManager(const CArgs &args, objects::CObjectManager &obj_mgr, TLoaders loaders=fDefault)
Set up the standard object manager data loaders according to the arguments provided above.
static void AddArgumentDescriptions(CArgDescriptions &arg_desc, TLoaders loaders=fDefault)
Add a standard set of arguments used to configure the object manager.
Database connection object.
void Connect(void)
Explicitly (re)connect to the database server.
CQuery NewQuery(void)
Get new CQuery object for this database.
vector< string > GetAllFilePaths() const
Returns all the file paths referenced by the manifest.
void SetAddWGSMasterDescr(bool flag)
TBlobId GetBlobIdFromSatSatKey(int sat, int sat_key, int sub_sat=0) const
Object used to execute queries and stored procedures on the database server and retrieve result sets.
void OpenForWrite(const std::string &root_path="")
void Write(const objects::CBioseq::TId &seq_ids)
Template class for iteration on objects of class C (non-medifiable version)
void SetAddWGSMasterDescr(bool flag)
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
iterator_bool insert(const value_type &val)
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
#define GI_FROM(T, value)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
CNcbiRegistry & GetRWConfig(void)
Get the application's cached configuration parameters, accessible for read-write for an application's...
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eFollowLinks
Follow symbolic links.
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
@ eRequires
One argument requires another.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
void SetCacheSize(unsigned int cache_size)
Set Berkeley DB memory cache size for the file (default is 256K).
void SetCondition(ECondition cond_from, ECondition cond_to=eNotSet)
Set search condition(type of interval)
void Open(const string &filename, EOpenMode open_mode, bool support_dirty_read=false, unsigned rec_len=0)
Open file with specified access mode.
EBDB_ErrCode UpdateInsert(EAfterWrite write_flag=eDiscardData)
Update record corresponding to the current key value.
void InitMultiFetch(size_t buffer_size, EMultiFetchMode mfm=eFetchAll)
Init multi-row fetch.
EBDB_ErrCode Fetch(EFetchDirection fdir=eDefault)
Fetch record.
CBDB_ConditionHandle From
@ eReadWriteCreate
read-write, create if it doesn't exist
void PrintRequestStop(void)
Print request stop message (for request-driven applications)
CDiagContext & GetDiagContext(void)
Get diag context instance.
void PrintRequestStart(const string &message)
Print request start message (for request-driven applications)
static CRequestContext & GetRequestContext(void)
Shortcut to CDiagContextThreadData::GetThreadData().GetRequestContext()
void SetRequestStatus(int status)
const CStopWatch & GetRequestTimer(void) const
Request execution timer.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
TEntries GetEntries(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
bool CreatePath(TCreateFlags flags=fCreate_Default) const
Create the directory path recursively possibly more than one at a time.
virtual bool Exists(void) const
Check if directory "dirname" exists.
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Delete existing directory.
const string & GetPath(void) const
Get entry path.
@ fCreateObjects
Create appropriate subclasses of CDirEntry (CFile,CDir,...), not just CDirEntry objects.
#define MSerial_AsnBinary
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_HandleDefault
returns the ID associated with a bioseq-handle
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
vector< string > TRegisteredNames
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
const CSeq_id_Handle & GetSeq_id_Handle(void) const
Get handle of id used to obtain this bioseq handle.
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
const TInst & GetInst(void) const
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
static EFinish ExecWait(const string &cmd, const vector< string > &args, CNcbiIstream &in, CNcbiOstream &out, CNcbiOstream &err, int &exit_code, const string ¤t_dir=kEmptyStr, const char *const envp[]=0, IProcessWatcher *watcher=0, const STimeout *kill_timeout=0, size_t pipe_size=0)
Execute a command with a vector of arguments, and wait for its completion.
int64_t Int8
8-byte (64-bit) signed integer
size_t total
Total memory usage.
static bool GetMemoryUsage(SMemoryUsage &usage)
Get current process memory usage.
bool Set(const string §ion, const string &name, const string &value, TFlags flags=0, const string &comment=kEmptyStr)
Set the configuration parameter value.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
bool IsEmpty(void) const
Is time object empty (date and time)?
time_t GetTimeT(void) const
Get time in time_t format.
static CTimeFormat GetPredefined(EPredefined fmt, TFlags flags=fDefault)
Get predefined format.
void Start(void)
Start the timer.
@ eCurrent
Use current time. See also CCurrentTime.
@ eISO8601_Date
Y-M-D (eg 1997-07-16)
static bool IsSignaled(TSignalMask signals=eSignal_Any)
Check that any of specified signals is received.
static void TrapSignals(TSignalMask signals)
Sets interrupt signal handling.
@ eSignal_TERM
Termination.
TTimestamp GetTimestamp(void) const
Get the Timestamp member data.
void SetTimestamp(TTimestamp value)
Assign a value to Timestamp data member.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
const TSeq & GetSeq(void) const
Get the variant data.
void SetRelease(const TRelease &value)
Assign a value to Release data member.
TSet & SetSet(void)
Select the variant.
bool IsSetColl(void) const
to identify a collection Check if a value has been assigned to Coll data member.
TClass GetClass(void) const
Get the Class member data.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
bool IsSetRelease(void) const
Check if a value has been assigned to Release data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
void ResetAnnot(void)
Reset Annot data member.
void SetLevel(TLevel value)
Assign a value to Level data member.
const TRelease & GetRelease(void) const
Get the Release member data.
bool IsSetLevel(void) const
nesting level Check if a value has been assigned to Level data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetDate(TDate &value)
Assign a value to Date data member.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
void SetColl(TColl &value)
Assign a value to Coll data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
TLevel GetLevel(void) const
Get the Level member data.
@ eClass_nuc_prot
nuc acid and coded proteins
list< CRef< CSeqdesc > > Tdata
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
const Tdata & Get(void) const
Get the member data.
bool IsDelta(void) const
Check if variant Delta is selected.
const TExt & GetExt(void) const
Get the Ext member data.
EMol
molecule class in living organism
const TDelta & GetDelta(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TDescr & GetDescr(void) const
Get the Descr member data.
@ eMol_not_set
> cdna = rna
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::KEY key
Portable class to work with a spawned process via pipes.
Defines process management classes.
Setup interrupt signal handling.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Defines: CTimeFormat - storage class for time format.
static pcre_uint8 * buffer
Defines CRequestContext class for NCBI C++ diagnostic API.
CRef< objects::CObjectManager > om
#define row(bind, expected)
static void s_PopulateSatelliteMap()
SBlobVersion(const string &line="")
static map< string, int > s_SatelliteMap
Process memory usage information, in bytes.
const CDir & m_SubcacheRoot
const SBlobLocator * m_LastBlob
sequence::EGetIdType m_IdType
CSeqIdChunkFile m_SeqIdChunk
vector< CSeq_id_Handle > extra_ids
SBlobCopier(const CDir &subcache_root, bool extract_delta, bool extract_product, sequence::EGetIdType id_type)
CAsnIndex::TOffset m_LastBlobOffset
void operator()(const SBlobLocator &main_cache_locator, SSubcacheIndexData &sub_cache_locator, CSeq_id_Handle &output_idh)
CRef< CSeq_entry > m_CurrentNucprotSeqEntry
const CDir & m_SubcacheRoot
SBlobInserter(const CDir &subcache_root, bool extract_delta, bool extract_product)
CSeqIdChunkFile m_SeqIdChunk
void operator()(CBioseq_Handle bsh, vector< SSubcacheIndexData > &sub_cache_locator)
vector< CSeq_id_Handle > extra_ids
SBlobLocator & operator=(const CAsnIndex &main_index)
CAsnIndex::TChunkId m_ChunkId
SBlobLocator(CSeq_id_Handle idh, const CDir &root_cache)
CAsnIndex::TSize m_BlobSize
bool operator<(const SBlobLocator &k2) const
CAsnIndex::TOffset m_Offset
bool operator==(const SBlobLocator &k2) const
SSeqIdIndex(CSeq_id_Handle idh)
bool operator>=(const CAsnIndex &index) const
bool operator==(const CAsnIndex &index) const
CAsnIndex::TSeqId m_SeqId
bool operator<(const SSeqIdIndex &k2) const
bool operator<(const CAsnIndex &index) const
CAsnIndex::TVersion m_Version
CAsnIndex::TSeqLength m_SeqLength
vector< CSeq_id_Handle > m_Ids
CAsnIndex::TSize m_SeqIdSize
CAsnIndex::TTimestamp m_Timestamp
CAsnIndex::TOffset m_Offset
CAsnIndex::TSize m_BlobSize
SSubcacheIndexData & operator=(const CAsnIndex &main_index)
CAsnIndex::TTaxId m_TaxId
CAsnIndex::TChunkId m_ChunkId
CAsnIndex::TOffset m_SeqIdOffset
bool TrimEntry(CConstRef< CSeq_entry > &entry, CBioseq_Handle bsh)
If entry is a large nucprot set, Optionally create a new trimmed Seq-entry containing only the needed...
void VerifyMolType(CBioseq_Handle bsh)
bool HasNameAndAccession(const CSeq_id_Handle &idh)
static void s_ReadIdsFromFile(CNcbiIstream &istr, TIndexMapById &index_map, TBlobLocationList &blob_locations, TCachedSeqIds &cached_seq_ids)
TIndexMapById::iterator TIndexRef
static CSeq_inst::EMol s_MolType
deque< SSubcacheIndexData > TBlobLocationList
vector< pair< SBlobLocator, TBlobLocationEntry > > TIndexMapByBlob
list< TIndexRef > TIndexRefList
bool s_RemoveAnnotsFromEntry(CSeq_entry &entry)
static bool s_ShouldFetchOneByOne(TIndexMapById &ids)
set< CSeq_id_Handle, CSeq_id_Handle::PLessOrdered > TCachedSeqIds
static bool s_RemoveAnnot
map< SSeqIdIndex, TBlobLocationEntry > TIndexMapById
int main(int argc, const char *argv[])
static bool s_TrimLargeNucprots
SSubcacheIndexData * TBlobLocationEntry
void ExtractExtraIds(CBioseq_Handle bsh, vector< CSeq_id_Handle > &extra_ids, bool extract_delta, bool extract_products)
CSeq_id_Handle StrippedAccVer(const CSeq_id_Handle &idh)