96 virtual void Init(
void);
97 virtual int Run (
void);
98 virtual void Exit(
void);
129 arg_desc->AddOptionalKey
131 "GI id of the Seq-Entry to fetch",
133 arg_desc->SetConstraint
137 arg_desc->AddDefaultKey
138 (
"fmt",
"OutputFormat",
139 "Format to dump the resulting data in",
141 arg_desc->SetConstraint
143 "asn",
"asnb",
"xml",
"genbank",
"genpept",
"fasta",
144 "quality",
"docsum"));
147 arg_desc->AddDefaultKey
148 (
"out",
"ResultFile",
149 "File to dump the resulting data to",
153 arg_desc->AddOptionalKey
155 "File to post errors and messages to",
160 arg_desc->AddOptionalKey
166 arg_desc->AddOptionalKey
167 (
"ent",
"EntityNumber",
168 "(Sub)entity number (retrieval number) to dump",
170 arg_desc->SetConstraint
174 arg_desc->AddDefaultKey
178 arg_desc->SetConstraint
180 "entry",
"state",
"ids",
"history",
"revisions",
"none"));
183 arg_desc->AddOptionalKey
184 (
"in",
"RequestFile",
185 "File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump",
189 arg_desc->AddOptionalKey
190 (
"maxplex",
"MaxComplexity",
191 "Maximum complexity to return",
193 arg_desc->SetConstraint
195 "entry",
"bioseq",
"bioseq-set",
"nuc-prot",
199 arg_desc->AddOptionalKey
200 (
"extfeat",
"ExtFeat",
201 "Add features, delimited by ',': "
202 "SNP, SNP_graph, CDD, MGC, HPRD, STS, tRNA, Exon",
206 arg_desc->AddOptionalKey
208 "Flattened SeqID; format can be\n"
209 "\t'type([name][,[accession][,[release][,version]]])'"
210 " [e.g., '5(HUMHBB)'],\n"
211 "\ttype=accession[.version], or type:number",
215 arg_desc->AddOptionalKey
217 "FASTA-style SeqID, in the form \"type|data\"; choices are\n"
218 "\tlcl|int lcl|str bbs|int bbm|int gim|int gb|acc|loc emb|acc|loc\n"
219 "\tpir|acc|name sp|acc|name pat|country|patent|seq ref|acc|name|rel\n"
220 "\tgnl|db|id gi|int dbj|acc|loc prf|acc|name pdb|entry|chain\n"
221 "\ttpg|acc|name tpe|acc|name tpd|acc|name",
225 arg_desc->AddOptionalKey
226 (
"query",
"EntrezQueryString",
227 "Generate GI list by Entrez query given on command line",
229 arg_desc->AddOptionalKey
230 (
"qf",
"EntrezQueryFile",
231 "Generate GI list by Entrez query in given file",
235 string prog_description =
236 "Fetch SeqEntry from ID server by its GI ID, possibly obtained from\n"
237 "its SeqID or an Entrez query";
238 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
239 prog_description,
false);
242 arg_desc->AddDefaultKey
244 "Repeat fetch number of times",
248 arg_desc->AddOptionalKey
249 (
"timeout",
"Timeout",
250 "Network connection timeout in seconds",
285 if ( args[
"timeout"] ) {
286 int timeout = args[
"timeout"].AsInteger();
296 const string& fmt = args[
"fmt"].AsString();
298 if (args[
"gi"]) id_count++;
299 if (args[
"in"]) id_count++;
300 if (args[
"flat"]) id_count++;
301 if (args[
"fasta"]) id_count++;
302 if (args[
"query"]) id_count++;
303 if (args[
"qf"]) id_count++;
307 "You must supply exactly one argument"
308 " indicating what to look up.");
310 if ((args[
"query"] || args[
"qf"] || fmt ==
"docsum")
312 ERR_POST(
"No Entrez database supplied. Try -db Nucleotide or "
316 if ((fmt ==
"genbank" || fmt ==
"genpept" || fmt ==
"quality")
317 && args[
"lt"].AsString() !=
"entry") {
318 ERR_POST(
"The output format '" << fmt
319 <<
"' is only available for Seq-Entries.");
335 int repeat = args[
"repeat"].AsInteger();
336 for (
int pass = 0; pass < repeat; ++pass ) {
358 while (is && !is.eof()) {
366 if (
id.find(
'|') !=
NPOS) {
368 }
else if (
id.find_first_of(
":=(") !=
NPOS) {
380 if (args[
"query"] || args[
"qf"]) {
386 e2_element->SetStr(args[
"query"].AsString());
399 query.SetExp().push_back(e2_element);
403 if ( !reply->GetCount() ) {
404 ERR_POST(
"Entrez query returned no results.");
410 = reply->GetUids().GetConstUidIterator();
425 const string& fmt = args[
"fmt"].AsString();
426 const string&
lt = args[
"lt"].AsString();
428 bool use_objmgr =
false;
433 }
else if (fmt ==
"docsum") {
445 if ( !docs->GetCount() ) {
446 ERR_POST(
"Entrez query returned no results.");
450 string caption, title;
454 if (it->GetField_name() ==
"Caption") {
455 caption = it->GetField_value();
456 }
else if (it->GetField_name() ==
"Title") {
457 title = it->GetField_value();
460 *m_OutputFile << '>
';
461 if ( !caption.empty() ) {
462 *m_OutputFile << caption;
464 *m_OutputFile << ' ';
465 if ( !title.empty() ) {
466 *m_OutputFile << title;
468 } else if (lt == "entry") {
469 if ( args["maxplex"] || args["extfeat"] ) {
470 CRef<CID1server_back> id1_reply(new CID1server_back);
471 CRef<CID1server_maxcomplex> maxcomplex(new CID1server_maxcomplex);
472 int mp = eEntry_complexities_entry;
473 if ( args["maxplex"] ) {
474 string maxplex = args["maxplex"].AsString();
475 if ( maxplex == "bioseq" ) {
476 mp = eEntry_complexities_bioseq;
478 else if ( maxplex == "bioseq-set" ) {
479 mp = eEntry_complexities_bioseq_set;
481 else if ( maxplex == "nuc-prot" ) {
482 mp = eEntry_complexities_nuc_prot;
484 else if ( maxplex == "pub-set" ) {
485 mp = eEntry_complexities_pub_set;
488 if ( args["extfeat"] ) {
490 vector<string> extfeat;
491 NStr::Split(args["extfeat"].AsString(), ",", extfeat);
492 ITERATE ( vector<string>, it, extfeat ) {
493 if ( *it == "SNP" ) {
496 else if ( *it == "SNP_graph" ) {
499 else if ( *it == "CDD" ) {
502 else if ( *it == "MGC" ) {
505 else if ( *it == "HPRD" ) {
508 else if ( *it == "STS" ) {
511 else if ( *it == "tRNA" ) {
514 else if ( *it == "Exon" ) {
518 ERR_POST("Unknown extfeat type: "<<*it);
523 maxcomplex->SetMaxplex(mp);
524 maxcomplex->SetGi(gi);
525 reply_object = m_ID1Client.AskGetsefromgi(*maxcomplex, id1_reply);
530 } else if (lt == "state") {
531 CRef<CID1server_back> id1_reply(new CID1server_back);
532 int state = m_ID1Client.AskGetgistate(gi, id1_reply);
533 if (fmt == "fasta") {
534 *m_OutputFile << "gi = " << gi << ", states: ";
535 switch (state & 0xff) {
536 case 0: *m_OutputFile << "NONEXISTENT"; break; // was "NOT EXIST"
537 case 10: *m_OutputFile << "DELETED"; break;
538 case 20: *m_OutputFile << "REPLACED"; break;
539 case 40: *m_OutputFile << "LIVE"; break;
540 default: *m_OutputFile << "UNKNOWN"; break;
543 *m_OutputFile << "|SUPPRESSED";
546 *m_OutputFile << "|WITHDRAWN";
549 *m_OutputFile << "|CONFIDENTIAL";
552 reply_object = id1_reply;
554 } else if (lt == "ids") {
556 CRef<CID1server_back> id1_reply(new CID1server_back);
557 CID1server_back::TIds ids
558 = m_ID1Client.AskGetseqidsfromgi(gi, id1_reply);
559 if (fmt == "fasta") {
562 reply_object = id1_reply;
567 } else if (lt == "history" || lt == "revisions") {
568 CRef<CID1server_back> id1_reply(new CID1server_back);
569 // ignore result -- it's simpler to use id1_reply
570 if (
lt ==
"history") {
575 if (fmt ==
"fasta") {
578 reply_object = id1_reply;
590 ERR_FATAL(
"Bioseq not found: " <<
id.DumpAsFasta());
595 ((fmt ==
"fasta" && (
lt ==
"ids" ||
lt ==
"entry")) ||
598 fmt ==
"genpept") ) {
607 ERR_FATAL(
"Bioseq not found: " <<
id.DumpAsFasta());
615 }
else if (fmt ==
"asnb") {
617 }
else if (fmt ==
"xml") {
619 }
else if (fmt ==
"fasta" &&
lt ==
"ids") {
623 }
else if (fmt ==
"fasta" &&
lt ==
"entry") {
628 }
else if (fmt ==
"quality") {
630 }
else if (fmt ==
"genbank" || fmt ==
"genpept") {
631 bool gp = fmt ==
"genpept";
651 unique_ptr<CObjectOStream> asn_output
703 string data = s.substr(pos + 1);
714 data.erase(data.end() - 1);
716 vector<string> pieces;
734 if (it != ids.begin()) {
753 string Get(
unsigned int index)
const {
770 dates.
Add(
"Loaded").
Add(
"------");
772 numbers.
Add(
"Retrieval No.").
Add(
"-------------");
778 if ( it->GetDate().IsStr() ) {
779 dates.
Add(it->GetDate().GetStr());
783 oss << setfill(
'0') << setw(2) << date.
GetMonth() <<
'/'
789 if ( (*it2)->IsGi() ) {
790 gi = (*it2)->GetGi();
791 }
else if ( (*it2)->IsGeneral() ) {
792 dbname = (*it2)->GetGeneral().GetDb();
807 for (
unsigned int n = 0;
n < gis.
Height();
n++) {
825 ->GetSeqIdString(
true);
828 string title = it->GetTitle();
829 if (title.find(
"uality") ==
NPOS) {
833 const CByte_graph& data = it->GetGraph().GetByte();
834 *m_OutputFile << '>
' << id << ' ' << title
835 << " (Length: " << it->GetNumval()
836 << ", Min: " << data.GetMin()
837 << ", Max: " << data.GetMax() << ')
' << NcbiEndl;
838 for (SIZE_TYPE n = 0; n < data.GetValues().size(); ++n) {
839 *m_OutputFile << setw(3) << static_cast<int>(data.GetValues()[n]);
841 *m_OutputFile << NcbiEndl;
857 int main(int argc, const char* argv[])
859 return CId1FetchApp().AppMain(argc, argv);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void GetDate(string *label, const string &format) const
Append a custom string representation of the date to the label.
CEntrez2_boolean_element –.
TUidIterator GetUidIterator()
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
void SetFormatGenbank(void)
CFlatFileConfig & SetHideSNPFeatures(bool val=true)
void SetMode(const TMode &mode)
CFlatFileConfig & SetShowContigFeatures(bool val=true)
CFlatFileConfig & SetShowContigSources(bool val=true)
void Generate(const CSeq_entry_Handle &entry, CFlatItemOStream &item_os, const multiout &={})
SAnnotSelector & SetAnnotSelector(void)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
void SetAllowDeadEntries(bool ok)
@ID1server_back.hpp User-defined methods of the data storage class.
void WriteHistoryTable(const CID1server_back &id1_reply)
void WriteQualityScores(CBioseq_Handle &handle)
TGi LookUpFlatSeqID(const string &s)
CEntrez2Client m_E2Client
virtual int Run(void)
Run the application.
CRef< CObjectManager > m_ObjMgr
TGi LookUpRawSeqID(const string &s)
virtual void Init(void)
Initialize the application.
TGi LookUpFastaSeqID(const string &s)
CNcbiOstream * m_OutputFile
void WriteFastaIDs(const list< CRef< CSeq_id > > &ids)
virtual void Exit(void)
Cleanup on application exit.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
string Get(unsigned int index) const
CTextColumn & Add(string s)
vector< string > m_Strings
Template class for iteration on objects of class C (non-medifiable version)
std::ofstream out("events_result.xml")
main entry point for tests
#define GI_FROM(T, value)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
@ eInputFile
Name of file (must exist and be readable)
@ eIntId
Convertible to TIntId (int or Int8 depending on NCBI_INT8_GI)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_FATAL(message)
Posting fatal error and abort.
void SetDiagPostFlag(EDiagPostFlag flag)
Set the specified flag (globally).
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
@ eDPF_All
All flags (except for the "unusual" ones!)
@ eDiag_Info
Informational message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define THROW_TRACE_ARGS(exception_class,...)
Throw trace.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
EIO_Status SetTimeout(const STimeout *timeout, EIO_Event direction=eIO_ReadWrite)
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
@ fParse_NoFASTA
Don't bother checking for a tag.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
@ fInstantiateGaps
honor specifed gap mode; on by default
@ fAssembleParts
assemble FAR delta sequences; on by dflt
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
size_t NcbiStreamToString(string *s, CNcbiIstream &is, size_t pos=0)
Input the entire contents of an istream into a string (NULL causes drain).
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
unsigned int usec
microseconds (modulo 1,000,000)
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
void SetTool(const TTool &value)
Assign a value to Tool data member.
virtual TRequest & SetDefaultRequest(void)
void SetReturn_UIDs(TReturn_UIDs value)
Assign a value to Return_UIDs data member.
virtual CRef< CEntrez2_boolean_reply > AskEval_boolean(const CEntrez2_eval_boolean &req, TReply *reply=0)
void SetDb(const TDb &value)
Assign a value to Db data member.
void SetQuery(TQuery &value)
Assign a value to Query data member.
virtual CRef< CEntrez2_docsum_list > AskGet_docsum(const CEntrez2_id_list &req, TReply *reply=0)
TYear GetYear(void) const
Get the Year member data.
TMonth GetMonth(void) const
Get the Month member data.
TDay GetDay(void) const
Get the Day member data.
virtual NCBI_NS_NCBI::TGi AskGetgi(const CSeq_id &req, TReply *reply=0)
virtual list< CRef< CID1Seq_hist > > AskGetgihist(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
virtual list< CRef< CID1Seq_hist > > AskGetgirev(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
TGi & SetGi(void)
Select the variant.
const TId & GetId(void) const
Get the Id member data.
list< CRef< CSeq_id > > TIds
char * dbname(DBPROCESS *dbproc)
Get name of current database.
USING_SCOPE(NCBI_NS_NCBI::objects)
constexpr bool empty(list< Ts... >) noexcept
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Process information in the NCBI Registry, including working with configuration files.
static const char * str(char *buf, int n)