73 ITERATE(vector<CSeq_id_Handle>, it, handles)
80 if(ids1.size() != ids2.size())
85 if((*it2)->Match(**it1)){
102 virtual void Init(
void);
103 virtual int Run(
void);
104 virtual void Exit(
void);
124 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
125 "CArgDescriptions demo program");
127 arg_desc->AddKey(
"cache",
"ASNCache",
128 "Path to ASN.1 cache",
131 arg_desc->AddDefaultKey(
"i",
"AccessionList",
132 "List of accessions to retrieve",
136 arg_desc->AddDefaultKey(
"o",
"OutputFile",
137 "File to place ASN seq-entries in",
141 arg_desc->AddFlag(
"test-loader",
"Test use of the ASN cache data loader");
143 arg_desc->AddFlag(
"raw",
"Test raw retrieval only");
144 arg_desc->AddFlag(
"text",
"Use ASN.1 text output");
145 arg_desc->AddFlag(
"find-annotated",
"Find annotated accessions");
146 arg_desc->AddFlag(
"dump-GP-7574",
"Dump comments and seq-descs");
147 arg_desc->AddFlag(
"dump-GP-8763",
"Dump qualifiers");
148 arg_desc->AddFlag(
"dump-proteins",
"Dump protein seqs");
149 arg_desc->AddFlag(
"no-serialize",
"Do not reserialize the ASN.1");
150 arg_desc->AddFlag(
"random-order",
"Retrieve sequences in random order");
151 arg_desc->AddFlag(
"test-warm",
"Retrieve sequences twice, to check differences between cold-cache and warm-cache times");
153 arg_desc->AddFlag(
"indexonly",
"Print the index entry only, do not fetch the blob." );
155 arg_desc->AddFlag(
"idonly",
"Verify that ID information in the cache is available and accurate for the listed accessions." );
156 arg_desc->AddFlag(
"verify-ids",
"Verify that ID information in the cache is available and accurate for the listed accessions." );
171 arg_desc->AddFlag(
"get-multiple",
172 "If several entries match the specified id, get all of "
173 "them, not only latest one");
190 bool readIndexOnly = args[
"indexonly" ];
191 bool raw = args[
"raw"];
193 bool verify_ids = args[
"verify-ids"];
194 bool getIdOnly = args[
"idonly"];
195 bool multiple = args[
"get-multiple"];
197 vector< CConstRef<CSeq_entry> >
entries;
198 vector< CDataLoader::TIds > id_sets;
199 vector< CSeq_id_Handle > ids;
201 unique_ptr<CObjectOStream> os;
205 if (line.empty() || line[0] ==
'#') {
214 << line <<
" to a SeqId: " << e.
what());
220 id_sets.reserve(ids.size());
233 if(args[
"test-loader"]){
238 if(args[
"dump-proteins"]){
240 string cache_path = args[
"cache"].AsString();
251 int num_cycles = args[
"test-warm"] ? 2 : 1;
252 size_t count_failed = 0;
254 for(
int cycle = 0; cycle < num_cycles; cycle++){
255 if (args[
"random-order"]) {
256 shuffle(ids.begin(), ids.end(), default_random_engine());
263 ITERATE(vector<CSeq_id_Handle>, id_it, ids){
265 if ( readIndexOnly ) {
266 vector<CAsnIndex::SIndexInfo>
info;
275 ITERATE (vector<CAsnIndex::SIndexInfo>, info_it,
info) {
276 ostr << *info_it << endl;
279 vector<CAsnCache::TBuffer>
buffer(multiple ? 0 : 1);
285 ostr.write((
const char*)&(*buf_it)[0],
291 << id_it->GetSeqId()->AsFastaString());
294 }
else if (getIdOnly) {
297 loader->
GetIds(*id_it, id_set);
302 << id_it->GetSeqId()->AsFastaString());
305 id_sets.push_back(id_set);
307 vector< CRef<CSeq_entry> > entries_for_id;
314 entries_for_id.push_back(cache->
GetEntry(*id_it));
318 if(args[
"find-annotated"]) {
322 loader->
GetIds(*id_it, id_set);
327 cout<<
"is_annotated" <<
"\t"
328 <<
"original" <<
"\t"
330 << boolalpha << is_annotated << endl;
332 cout<<
"is_annotated" <<
"\t"
335 << boolalpha << is_annotated << endl;
338 if(entry && args[
"dump-GP-7574"]) {
339 cerr << *id_it << endl;
341 switch ( desc->Which() ) {
350 if(entry && args[
"dump-GP-8763"]) {
351 cerr << *id_it << endl;
bool first=
true;
353 if(desc->GetQual() ==
"inference" ||
354 desc->GetQual() ==
"experiment"
356 if(
first) { cout << *id_it << endl; }
first=
false;
363 if(entry && args[
"dump-proteins"]) {
366 feat->IsSetProduct() ) {
374 if( (*delta)->IsLoc() ) {
383 cout << *id_it <<
"\t"
391 if( ! args[
"find-annotated"])
397 loader->
GetIds(*id_it, retrieved_ids);
400 if(retrieved_ids.empty()){
402 << id_it->GetSeqId()->AsFastaString());
407 << id_it->GetSeqId()->AsFastaString() <<
": retrieved:");
408 ITERATE(vector<CSeq_id_Handle>, it, retrieved_ids)
415 LOG_POST(
Info <<
"Succesfully retrieved " << retrieved_ids.size()
416 <<
" ids for " << id_it->GetSeqId()->AsFastaString());
420 << id_it->GetSeqId()->AsFastaString());
430 << line <<
": " << e.
what());
435 LOG_POST(
Error <<
"done cycle " << cycle+1 <<
", " <<
count <<
" seqs / " << e <<
" seconds = "
436 <<
count / e <<
" seqs/sec ("
437 << count_failed <<
" failed to retrieve)");
439 if(args[
"dump-proteins"]) {
return static_cast<int>(count_failed); }
442 ITERATE(vector< CDataLoader::TIds >, it, id_sets){
444 if(id_it != it->begin())
446 ostr << id_it->GetSeqId()->AsFastaString();
486 int main(
int argc,
const char* argv[])
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Contains the class definiton for CAsnCache, the main client class for accessing the ASN cache data.
static bool s_SameIds(const CBioseq::TId &ids1, const CBioseq::TId &ids2)
static const CBioseq::TId & s_GetSeqIds(const vector< CSeq_id_Handle > &handles)
int main(int argc, const char *argv[])
CConstRef< objects::CBioseq > ExtractBioseq(CConstRef< objects::CSeq_entry > entry, const objects::CSeq_id_Handle &id)
virtual int Run(void)
Run the application.
virtual void Init(void)
Initialize the application.
bool x_FindAnnotated(const CSeq_entry &entry)
virtual void Exit(void)
Cleanup on application exit.
static string GetLoaderNameFromArgs(void)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &db_path, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
CAsnCache is used by clients to access the ASN cache data.
vector< CRef< objects::CSeq_entry > > GetMultipleEntries(const objects::CSeq_id_Handle &id)
bool GetMultipleRaw(const objects::CSeq_id_Handle &id, vector< TBuffer > &buffer)
bool GetRaw(const objects::CSeq_id_Handle &id, TBuffer &buffer)
Return the raw blob in an unformatted buffer.
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Return a blob as a CSeq_entry object.
bool GetMultipleIndexEntries(const objects::CSeq_id_Handle &id, vector< CAsnIndex::SIndexInfo > &info)
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only=true)
Return the set of seq-ids associated with a given ID.
Template class for iteration on objects of class C (non-medifiable version)
const_iterator begin() const
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eOutputFile
Name of file (must be writable)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
void Error(CExceptionArgs_Base &args)
void Warning(CExceptionArgs_Base &args)
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
void Info(CExceptionArgs_Base &args)
#define MSerial_AsnText
I/O stream manipulators –.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< CSeq_id_Handle > TIds
string GetName(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CDataLoader * FindDataLoader(const string &loader_name) const
Try to find a registered data loader by name.
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Request for a list of all Seq-ids of a sequence.
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
@ eBioseq
main blob with complete bioseq
bool CanGetInst(void) const
const TInst & GetInst(void) const
void Reset(void)
Reset reference object.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Start(void)
Start the timer.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
const TSeq & GetSeq(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSet(void) const
Check if variant Set is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
list< CRef< CSeq_id > > TId
bool IsDelta(void) const
Check if variant Delta is selected.
const TExt & GetExt(void) const
Get the Ext member data.
const TDelta & GetDelta(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
list< CRef< CDelta_seq > > Tdata
@ e_User
user defined object
@ e_Comment
a more extensive comment
const CharType(& source)[N]
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Int4 delta(size_t dimension_, const Int4 *score_)
static wxAcceleratorEntry entries[3]