NCBI C++ ToolKit
id1_fetch.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: id1_fetch.cpp 96207 2022-02-24 14:23:57Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Denis Vakatov, Aleksey Grichenko, Aaron Ucko
27  *
28  * File Description:
29  * New IDFETCH network client (get data from "ID1")
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbienv.hpp>
35 #include <corelib/ncbiargs.hpp>
36 #include <corelib/ncbireg.hpp>
37 
39 
40 #include <serial/enumvalues.hpp>
41 #include <serial/objostrasnb.hpp>
42 #include <serial/iterator.hpp>
43 
54 #include <objects/general/Date.hpp>
61 
62 #include <objmgr/graph_ci.hpp>
64 #include <objmgr/scope.hpp>
65 #include <objmgr/seq_vector.hpp>
68 
69 #include <objects/seq/Bioseq.hpp>
72 #include <objects/seq/Seqdesc.hpp>
79 #include <objmgr/util/sequence.hpp>
80 
81 #include <memory>
82 #include <algorithm>
83 #include <list>
84 
85 
87 USING_SCOPE(NCBI_NS_NCBI::objects); // MSVC requires qualification (!)
88 
89 
90 /////////////////////////////////
91 // CId1FetchApp::
92 //
93 
95 {
96  virtual void Init(void);
97  virtual int Run (void);
98  virtual void Exit(void);
99 
100 private:
101  bool LookUpGI(TGi gi);
102  TGi LookUpFastaSeqID(const string& s);
103  TGi LookUpRawSeqID(const string& s);
104  TGi LookUpFlatSeqID(const string& s);
105 
106  void WriteFastaIDs (const list< CRef< CSeq_id > >& ids);
107 
108  void WriteHistoryTable (const CID1server_back& id1_reply);
109  void WriteQualityScores(CBioseq_Handle& handle);
110 
116  bool m_ResetDiagStream = false;
117 };
118 
119 
121 {
122  // Prepare command line descriptions
123  //
124 
125  // Create
126  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
127 
128  // GI
129  arg_desc->AddOptionalKey
130  ("gi", "SeqEntryID",
131  "GI id of the Seq-Entry to fetch",
133  arg_desc->SetConstraint
134  ("gi", new CArgAllow_Int8s(0, kMax_I8));
135 
136  // Output format
137  arg_desc->AddDefaultKey
138  ("fmt", "OutputFormat",
139  "Format to dump the resulting data in",
141  arg_desc->SetConstraint
142  ("fmt", &(*new CArgAllow_Strings,
143  "asn", "asnb", "xml", "genbank", "genpept", "fasta",
144  "quality", "docsum"));
145 
146  // Output datafile
147  arg_desc->AddDefaultKey
148  ("out", "ResultFile",
149  "File to dump the resulting data to",
151 
152  // Log file
153  arg_desc->AddOptionalKey
154  ("log", "LogFile",
155  "File to post errors and messages to",
157  0);
158 
159  // Database to use
160  arg_desc->AddOptionalKey
161  ("db", "Database", // was "-d" in `idfetch'
162  "Database to use",
164 
165  // Entity number
166  arg_desc->AddOptionalKey
167  ("ent", "EntityNumber", // was "-e" in `idfetch'
168  "(Sub)entity number (retrieval number) to dump",
170  arg_desc->SetConstraint
171  ("ent", new CArgAllow_Integers(0, kMax_Int));
172 
173  // Type of lookup
174  arg_desc->AddDefaultKey
175  ("lt", "LookupType", // combination of "-i" (!) and "-n" in `idfetch'
176  "Type of lookup",
177  CArgDescriptions::eString, "entry");
178  arg_desc->SetConstraint
179  ("lt", &(*new CArgAllow_Strings,
180  "entry", "state", "ids", "history", "revisions", "none"));
181 
182  // File with list of stuff to dump
183  arg_desc->AddOptionalKey
184  ("in", "RequestFile", // was "-G" (!) in `idfetch'
185  "File with list of GIs, (versioned) accessions, FASTA SeqIDs to dump",
187 
188  // Maximum complexity
189  arg_desc->AddOptionalKey
190  ("maxplex", "MaxComplexity", // was "-c" in `idfetch'
191  "Maximum complexity to return",
193  arg_desc->SetConstraint
194  ("maxplex", &(*new CArgAllow_Strings,
195  "entry", "bioseq", "bioseq-set", "nuc-prot",
196  "pub-set"));
197 
198  // External features
199  arg_desc->AddOptionalKey
200  ("extfeat", "ExtFeat", // was "-F" in `idfetch'
201  "Add features, delimited by ',': "
202  "SNP, SNP_graph, CDD, MGC, HPRD, STS, tRNA, Exon",
204 
205  // Flattened SeqID
206  arg_desc->AddOptionalKey
207  ("flat", "FlatID", // was "-f" in `idfetch'
208  "Flattened SeqID; format can be\n"
209  "\t'type([name][,[accession][,[release][,version]]])'"
210  " [e.g., '5(HUMHBB)'],\n"
211  "\ttype=accession[.version], or type:number",
213 
214  // FASTA-style SeqID
215  arg_desc->AddOptionalKey
216  ("fasta", "FastaID", // was "-s" in `idfetch'
217  "FASTA-style SeqID, in the form \"type|data\"; choices are\n"
218  "\tlcl|int lcl|str bbs|int bbm|int gim|int gb|acc|loc emb|acc|loc\n"
219  "\tpir|acc|name sp|acc|name pat|country|patent|seq ref|acc|name|rel\n"
220  "\tgnl|db|id gi|int dbj|acc|loc prf|acc|name pdb|entry|chain\n"
221  "\ttpg|acc|name tpe|acc|name tpd|acc|name",
223 
224  // Generate GI list by Entrez query
225  arg_desc->AddOptionalKey
226  ("query", "EntrezQueryString", // was "-q" in `idfetch'
227  "Generate GI list by Entrez query given on command line",
229  arg_desc->AddOptionalKey
230  ("qf", "EntrezQueryFile", // was "-Q" in `idfetch'
231  "Generate GI list by Entrez query in given file",
233 
234  // Program description
235  string prog_description =
236  "Fetch SeqEntry from ID server by its GI ID, possibly obtained from\n"
237  "its SeqID or an Entrez query";
238  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
239  prog_description, false);
240 
241 
242  arg_desc->AddDefaultKey
243  ("repeat", "repeat",
244  "Repeat fetch number of times",
246 
247  // timeout
248  arg_desc->AddOptionalKey
249  ("timeout", "Timeout",
250  "Network connection timeout in seconds",
252 
253  // Pass argument descriptions to the application
254  //
255 
256  SetupArgDescriptions(arg_desc.release());
257 }
258 
259 
260 // Workaround for "replace_if"
261 inline bool s_IsControl(char c)
262 {
263  return iscntrl((unsigned char) c) ? true : false;
264 }
265 
266 
268 {
269  // Process command line args
270  const CArgs& args = GetArgs();
271 
272  // Setup and tune logging facilities
273  if ( args["log"] ) {
274  m_ResetDiagStream = true;
275  SetDiagStream( &args["log"].AsOutputFile() );
276  }
277 #ifdef _DEBUG
278  // SetDiagTrace(eDT_Enable);
281 #endif
282 
284 
285  if ( args["timeout"] ) {
286  int timeout = args["timeout"].AsInteger();
287  STimeout tmo;
288  tmo.sec = timeout;
289  tmo.usec = 0;
290  m_ID1Client.SetTimeout(&tmo);
291  }
292 
293  // Make sure the combination of arguments is valid
294  {{
295  int id_count = 0;
296  const string& fmt = args["fmt"].AsString();
297 
298  if (args["gi"]) id_count++;
299  if (args["in"]) id_count++;
300  if (args["flat"]) id_count++;
301  if (args["fasta"]) id_count++;
302  if (args["query"]) id_count++;
303  if (args["qf"]) id_count++;
304 
305  if (id_count != 1) {
306  NCBI_THROW(CArgException,eNoArg,
307  "You must supply exactly one argument"
308  " indicating what to look up.");
309  }
310  if ((args["query"] || args["qf"] || fmt == "docsum")
311  && !args["db"]) {
312  ERR_POST("No Entrez database supplied. Try -db Nucleotide or "
313  "-db Protein.");
314  return -1;
315  }
316  if ((fmt == "genbank" || fmt == "genpept" || fmt == "quality")
317  && args["lt"].AsString() != "entry") {
318  ERR_POST("The output format '" << fmt
319  << "' is only available for Seq-Entries.");
320  return -1;
321  }
322  }}
323 
324  m_E2Client.SetDefaultRequest().SetTool("id1_fetch");
325 
326  // Open output file
327  m_OutputFile = &args["out"].AsOutputFile();
328 
329  // Set up object manager
331  m_Scope = new CScope(*m_ObjMgr);
333  m_Scope->AddDefaults();
334 
335  int repeat = args["repeat"].AsInteger();
336  for ( int pass = 0; pass < repeat; ++pass ) {
337  if (args["gi"]) {
338  if ( !LookUpGI(GI_FROM(TIntId, args["gi"].AsIntId())) )
339  return -1;
340  }
341 
342  if (args["fasta"]) {
343  TGi gi = LookUpFastaSeqID(args["fasta"].AsString());
344  if (gi <= ZERO_GI || !LookUpGI(gi)) {
345  return -1;
346  }
347  }
348 
349  if (args["flat"]) {
350  TGi gi = LookUpFlatSeqID(args["flat"].AsString());
351  if (gi <= ZERO_GI || !LookUpGI(gi)) {
352  return -1;
353  }
354  }
355 
356  if (args["in"]) {
357  CNcbiIstream& is = args["in"].AsInputFile();
358  while (is && !is.eof()) {
359  string id;
360  TGi gi;
361 
362  is >> id;
363  if (id.empty()) {
364  break;
365  }
366  if (id.find('|') != NPOS) {
367  gi = LookUpFastaSeqID(id);
368  } else if (id.find_first_of(":=(") != NPOS) {
369  gi = LookUpFlatSeqID(id);
370  } else {
371  gi = LookUpRawSeqID(id);
372  }
373 
374  if (gi <= ZERO_GI || !LookUpGI(gi)) {
375  return -1;
376  }
377  }
378  }
379 
380  if (args["query"] || args["qf"]) {
381  // Form query
384 
385  if (args["query"]) {
386  e2_element->SetStr(args["query"].AsString());
387  } else {
388  string str;
389  NcbiStreamToString(&str, args["qf"].AsInputFile());
390  replace_if(str.begin(), str.end(), s_IsControl, ' ');
391  }
392 
393  // Make the actual query
395  {{
397  eb.SetReturn_UIDs(true);
399  query.SetExp().push_back(e2_element);
400  query.SetDb() = CEntrez2_db_id(args["db"].AsString());
401  reply = m_E2Client.AskEval_boolean(eb);
402  }}
403  if ( !reply->GetCount() ) {
404  ERR_POST("Entrez query returned no results.");
405  return -1;
406  }
407 
408  // Query succeeded; proceed to next stage of lookup
410  = reply->GetUids().GetConstUidIterator();
411  !it.AtEnd(); ++it) {
413  return -1;
414  }
415  }
416  }
417  }
418  return 0;
419 }
420 
421 
423 {
424  const CArgs& args = GetArgs();
425  const string& fmt = args["fmt"].AsString();
426  const string& lt = args["lt"].AsString();
427  CConstRef<CSerialObject> reply_object;
428  bool use_objmgr = false;
429 
430  if (lt == "none") {
431  *m_OutputFile << gi << NcbiEndl;
432  return true; // Done
433  } else if (fmt == "docsum") {
434  // Handling this here costs some efficiency when the GI came
435  // from an Entrez query in the first place, but wins on generality.
436  CEntrez2_id_list uids;
437  uids.SetDb() = CEntrez2_db_id(args["db"].AsString());
438  uids.Resize(1);
439  {{
442  }}
443  uids.PackUids();
445  if ( !docs->GetCount() ) {
446  ERR_POST("Entrez query returned no results.");
447  return false;
448  }
449 
450  string caption, title;
452  it; ++it) {
453  // Should this be case-insensitive?
454  if (it->GetField_name() == "Caption") {
455  caption = it->GetField_value();
456  } else if (it->GetField_name() == "Title") {
457  title = it->GetField_value();
458  }
459  }
460  *m_OutputFile << '>';
461  if ( !caption.empty() ) {
462  *m_OutputFile << caption;
463  }
464  *m_OutputFile << ' ';
465  if ( !title.empty() ) {
466  *m_OutputFile << title;
467  }
468  } else if (lt == "entry") {
469  if ( args["maxplex"] || args["extfeat"] ) {
470  CRef<CID1server_back> id1_reply(new CID1server_back);
471  CRef<CID1server_maxcomplex> maxcomplex(new CID1server_maxcomplex);
472  int mp = eEntry_complexities_entry;
473  if ( args["maxplex"] ) {
474  string maxplex = args["maxplex"].AsString();
475  if ( maxplex == "bioseq" ) {
476  mp = eEntry_complexities_bioseq;
477  }
478  else if ( maxplex == "bioseq-set" ) {
479  mp = eEntry_complexities_bioseq_set;
480  }
481  else if ( maxplex == "nuc-prot" ) {
482  mp = eEntry_complexities_nuc_prot;
483  }
484  else if ( maxplex == "pub-set" ) {
485  mp = eEntry_complexities_pub_set;
486  }
487  }
488  if ( args["extfeat"] ) {
489  int ff = 0;
490  vector<string> extfeat;
491  NStr::Split(args["extfeat"].AsString(), ",", extfeat);
492  ITERATE ( vector<string>, it, extfeat ) {
493  if ( *it == "SNP" ) {
494  ff |= 1 << 0;
495  }
496  else if ( *it == "SNP_graph" ) {
497  ff |= 1 << 2;
498  }
499  else if ( *it == "CDD" ) {
500  ff |= 1 << 3;
501  }
502  else if ( *it == "MGC" ) {
503  ff |= 1 << 4;
504  }
505  else if ( *it == "HPRD" ) {
506  ff |= 1 << 5;
507  }
508  else if ( *it == "STS" ) {
509  ff |= 1 << 6;
510  }
511  else if ( *it == "tRNA" ) {
512  ff |= 1 << 7;
513  }
514  else if ( *it == "Exon" ) {
515  ff |= 1 << 9;
516  }
517  else {
518  ERR_POST("Unknown extfeat type: "<<*it);
519  }
520  }
521  mp |= ~ff << 4;
522  }
523  maxcomplex->SetMaxplex(mp);
524  maxcomplex->SetGi(gi);
525  reply_object = m_ID1Client.AskGetsefromgi(*maxcomplex, id1_reply);
526  }
527  else {
528  use_objmgr = true;
529  }
530  } else if (lt == "state") {
531  CRef<CID1server_back> id1_reply(new CID1server_back);
532  int state = m_ID1Client.AskGetgistate(gi, id1_reply);
533  if (fmt == "fasta") {
534  *m_OutputFile << "gi = " << gi << ", states: ";
535  switch (state & 0xff) {
536  case 0: *m_OutputFile << "NONEXISTENT"; break; // was "NOT EXIST"
537  case 10: *m_OutputFile << "DELETED"; break;
538  case 20: *m_OutputFile << "REPLACED"; break;
539  case 40: *m_OutputFile << "LIVE"; break;
540  default: *m_OutputFile << "UNKNOWN"; break;
541  }
542  if (state & 0x100) {
543  *m_OutputFile << "|SUPPRESSED";
544  }
545  if (state & 0x200) {
546  *m_OutputFile << "|WITHDRAWN";
547  }
548  if (state & 0x400) {
549  *m_OutputFile << "|CONFIDENTIAL";
550  }
551  } else {
552  reply_object = id1_reply;
553  }
554  } else if (lt == "ids") {
555 #if 1
556  CRef<CID1server_back> id1_reply(new CID1server_back);
557  CID1server_back::TIds ids
558  = m_ID1Client.AskGetseqidsfromgi(gi, id1_reply);
559  if (fmt == "fasta") {
560  WriteFastaIDs(ids);
561  } else {
562  reply_object = id1_reply;
563  }
564 #else
565  use_objmgr = true;
566 #endif
567  } else if (lt == "history" || lt == "revisions") {
568  CRef<CID1server_back> id1_reply(new CID1server_back);
569  // ignore result -- it's simpler to use id1_reply
570  if (lt == "history") {
571  m_ID1Client.AskGetgihist(gi, id1_reply);
572  } else {
573  m_ID1Client.AskGetgirev(gi, id1_reply);
574  }
575  if (fmt == "fasta") {
576  WriteHistoryTable(*id1_reply);
577  } else {
578  reply_object = id1_reply;
579  }
580  }
581 
582  CBioseq_Handle handle;
583  if (use_objmgr) {
585  // What about db, ent, and maxplex?
586  CSeq_id id;
587  id.SetGi(gi);
588  handle = m_Scope->GetBioseqHandle(id);
589  if ( !handle ) {
590  ERR_FATAL("Bioseq not found: " << id.DumpAsFasta());
591  }
592  reply_object = handle.GetTopLevelEntry().GetCompleteSeq_entry();
593  }
594  else if ( dynamic_cast<const CSeq_entry*>(reply_object.GetPointer()) &&
595  ((fmt == "fasta" && (lt == "ids" || lt == "entry")) ||
596  fmt == "quality" ||
597  fmt == "genbank" ||
598  fmt == "genpept") ) {
599  // these formatting modes require CBioseq_Handle
601  const CSeq_entry& se = dynamic_cast<const CSeq_entry&>(*reply_object);
603  CSeq_id id;
604  id.SetGi(gi);
605  handle = m_Scope->GetBioseqHandleFromTSE(id, tse);
606  if ( !handle ) {
607  ERR_FATAL( "Bioseq not found: " << id.DumpAsFasta());
608  }
609  }
610 
611  // Dump server response in the specified format
613  if (fmt == "asn") {
615  } else if (fmt == "asnb") {
617  } else if (fmt == "xml") {
619  } else if (fmt == "fasta" && lt == "ids") {
620  if (use_objmgr) {
621  WriteFastaIDs(handle.GetBioseqCore()->GetId());
622  }
623  } else if (fmt == "fasta" && lt == "entry") {
627  out.Write(handle);
628  } else if (fmt == "quality") {
629  WriteQualityScores(handle);
630  } else if (fmt == "genbank" || fmt == "genpept") {
631  bool gp = fmt == "genpept";
632  CSeq_entry_Handle entry = handle.GetTopLevelEntry();
633 
634  CFlatFileConfig ff_config;
636  ff_config.SetFormatGenbank();
637  ff_config
641  if (gp) {
642  ff_config.SetViewProt();
643  }
644  CFlatFileGenerator ffg(ff_config);
646 
647  ffg.Generate(entry, *m_OutputFile);
648  }
649 
650  if (reply_object.NotEmpty() && format != eSerial_None) {
651  unique_ptr<CObjectOStream> asn_output
653  // *asn_output << *reply_object;
654  asn_output->Write(reply_object, reply_object->GetThisTypeInfo());
655  }
656 
657  if (fmt != "asnb") {
659  }
660 
661  return true; // Done
662 }
663 
664 
665 // Cleanup
667 {
668  if ( m_ResetDiagStream ) {
669  SetDiagStream(0);
670  }
671 }
672 
673 
675 {
676  CSeq_id id(s);
677  return m_ID1Client.AskGetgi(id);
678 }
679 
680 
682 {
683  try {
685  if (id.IsGi()) {
686  return id.GetGi();
687  } else {
688  return m_ID1Client.AskGetgi(id);
689  }
690  } catch (CSeqIdException&) {
691  return INVALID_GI;
692  }
693 }
694 
695 
697 {
698  CSeq_id::E_Choice type = static_cast<CSeq_id::E_Choice>(atoi(s.c_str()));
699  SIZE_TYPE pos = s.find_first_of(":=(");
700  if (pos == NPOS) {
701  THROW_TRACE_ARGS(runtime_error, "Malformatted flat ID " + s);
702  }
703  string data = s.substr(pos + 1);
704 
705  switch (s[pos]) {
706  case ':':
707  case '=':
708  {
709  CSeq_id id(type, data, kEmptyStr);
710  return m_ID1Client.AskGetgi(id);
711  }
712  case '(':
713  {
714  data.erase(data.end() - 1);
715  // remove last character, which should be ')'
716  vector<string> pieces;
717  NStr::Split(data, ",", pieces);
718  pieces.resize(4, kEmptyStr);
719  // name acc rel ver -> acc name ver rel
720  CSeq_id id(type, pieces[1], pieces[0],
721  pieces[3].empty() ? 0 : NStr::StringToInt(pieces[3]),
722  pieces[2]);
723  return m_ID1Client.AskGetgi(id);
724  }
725  default: // can't happen, but shut the compiler up
726  return INVALID_GI;
727  }
728 }
729 
730 
732 {
733  ITERATE (list< CRef< CSeq_id > >, it, ids) {
734  if (it != ids.begin()) {
735  *m_OutputFile << '|';
736  }
737  (*it)->WriteAsFasta(*m_OutputFile);
738  }
739 }
740 
741 
742 // for formatting text
744 {
745 public:
746  CTextColumn() : m_Width(0) { }
747  CTextColumn& Add(string s) {
748  m_Strings.push_back(s);
749  if (s.size() > m_Width)
750  m_Width = s.size();
751  return *this;
752  }
753  string Get(unsigned int index) const {
754  const string& s = m_Strings[index];
755  return s + string(m_Width - s.size(), ' ');
756  }
757  SIZE_TYPE Width() const { return m_Width; }
758  size_t Height() const { return m_Strings.size(); }
759 
760 private:
762  vector<string> m_Strings;
763 };
764 
765 
767 {
768  CTextColumn gis, dates, dbs, numbers;
769  gis.Add("GI").Add("--");
770  dates.Add("Loaded").Add("------");
771  dbs.Add("DB").Add("--");
772  numbers.Add("Retrieval No.").Add("-------------");
773  for (CTypeConstIterator<CSeq_hist_rec> it = ConstBegin(id1_reply);
774  it; ++it) {
775  TGi gi = ZERO_GI;
776  string dbname, number;
777 
778  if ( it->GetDate().IsStr() ) {
779  dates.Add(it->GetDate().GetStr());
780  } else {
781  CNcbiOstrstream oss;
782  const CDate_std& date = it->GetDate().GetStd();
783  oss << setfill('0') << setw(2) << date.GetMonth() << '/'
784  << setw(2) << date.GetDay() << '/' << date.GetYear();
785  dates.Add(CNcbiOstrstreamToString(oss));
786  }
787 
788  ITERATE (CSeq_hist_rec::TIds, it2, it->GetIds()) {
789  if ( (*it2)->IsGi() ) {
790  gi = (*it2)->GetGi();
791  } else if ( (*it2)->IsGeneral() ) {
792  dbname = (*it2)->GetGeneral().GetDb();
793  const CObject_id& tag = (*it2)->GetGeneral().GetTag();
794  if ( tag.IsStr() ) {
795  number = tag.GetStr();
796  } else {
797  number = NStr::IntToString(tag.GetId());
798  }
799  }
800  }
801 
802  gis.Add(NStr::NumericToString(gi));
803  dbs.Add(dbname);
804  numbers.Add(number);
805  }
806 
807  for (unsigned int n = 0; n < gis.Height(); n++) {
808  *m_OutputFile << gis.Get(n) << " " << dates.Get(n) << " "
809  << dbs.Get(n) << " " << numbers.Get(n) << NcbiEndl;
810  }
811 }
812 
813 
814 
816 {
817  /* Test case:
818  * /net/ncbi/ncbi/ftp/genbank/quality_scores/gbvrt.qscore.gz
819  * GI 13508865: gotseqentry.set.annot.data.graph
820  * >AL590146.2 Phrap Quality (Length:121086, Min: 31, Max: 99)
821  * 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99 99
822  * ...
823  */
824  string id = FindBestChoice(handle.GetBioseqCore()->GetId(), CSeq_id::Score)
825  ->GetSeqIdString(true);
826 
827  for (CGraph_CI it(handle); it; ++it) {
828  string title = it->GetTitle();
829  if (title.find("uality") == NPOS) {
830  continue;
831  }
832 
833  const CByte_graph& data = it->GetGraph().GetByte();
834  *m_OutputFile << '>' << id << ' ' << title
835  << " (Length: " << it->GetNumval()
836  << ", Min: " << data.GetMin()
837  << ", Max: " << data.GetMax() << ')' << NcbiEndl;
838  for (SIZE_TYPE n = 0; n < data.GetValues().size(); ++n) {
839  *m_OutputFile << setw(3) << static_cast<int>(data.GetValues()[n]);
840  if (n % 20 == 19) {
841  *m_OutputFile << NcbiEndl;
842  }
843  }
844  }
845 }
846 
847 END_NCBI_SCOPE
848 
849 
850 
851 /////////////////////////////////////////////////////////////////////////////
852 // MAIN
853 //
854 
855 USING_NCBI_SCOPE;
856 
857 int main(int argc, const char* argv[])
858 {
859  return CId1FetchApp().AppMain(argc, argv);
860 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CArgAllow_Int8s –.
Definition: ncbiargs.hpp:1706
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1751
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgException –.
Definition: ncbiargs.hpp:120
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
CByte_graph –.
Definition: Byte_graph.hpp:66
void GetDate(string *label, const string &format) const
Append a custom string representation of the date to the label.
Definition: Date_std.cpp:159
CEntrez2_boolean_element –.
CEntrez2_boolean_exp –.
CEntrez2_db_id –.
CEntrez2_eval_boolean –.
void Resize(size_t size)
TUidIterator GetUidIterator()
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
void SetFormatGenbank(void)
CFlatFileConfig & SetHideSNPFeatures(bool val=true)
void SetMode(const TMode &mode)
CFlatFileConfig & SetShowContigFeatures(bool val=true)
CFlatFileConfig & SetShowContigSources(bool val=true)
void Generate(const CSeq_entry_Handle &entry, CFlatItemOStream &item_os, const multiout &={})
SAnnotSelector & SetAnnotSelector(void)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CGraph_CI –.
Definition: graph_ci.hpp:234
void SetAllowDeadEntries(bool ok)
Definition: id1_client.hpp:67
@ID1server_back.hpp User-defined methods of the data storage class.
void WriteHistoryTable(const CID1server_back &id1_reply)
Definition: id1_fetch.cpp:766
void WriteQualityScores(CBioseq_Handle &handle)
Definition: id1_fetch.cpp:815
TGi LookUpFlatSeqID(const string &s)
Definition: id1_fetch.cpp:696
CID1Client m_ID1Client
Definition: id1_fetch.cpp:112
CEntrez2Client m_E2Client
Definition: id1_fetch.cpp:113
virtual int Run(void)
Run the application.
Definition: id1_fetch.cpp:267
CRef< CObjectManager > m_ObjMgr
Definition: id1_fetch.cpp:114
TGi LookUpRawSeqID(const string &s)
Definition: id1_fetch.cpp:681
CRef< CScope > m_Scope
Definition: id1_fetch.cpp:115
virtual void Init(void)
Initialize the application.
Definition: id1_fetch.cpp:120
TGi LookUpFastaSeqID(const string &s)
Definition: id1_fetch.cpp:674
bool LookUpGI(TGi gi)
Definition: id1_fetch.cpp:422
bool m_ResetDiagStream
Definition: id1_fetch.cpp:116
CNcbiOstream * m_OutputFile
Definition: id1_fetch.cpp:111
void WriteFastaIDs(const list< CRef< CSeq_id > > &ids)
Definition: id1_fetch.cpp:731
virtual void Exit(void)
Cleanup on application exit.
Definition: id1_fetch.cpp:666
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CScope –.
Definition: scope.hpp:92
CSeqIdException –.
Definition: Seq_id.hpp:1001
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
string Get(unsigned int index) const
Definition: id1_fetch.cpp:753
CTextColumn & Add(string s)
Definition: id1_fetch.cpp:747
SIZE_TYPE m_Width
Definition: id1_fetch.cpp:761
vector< string > m_Strings
Definition: id1_fetch.cpp:762
size_t Height() const
Definition: id1_fetch.cpp:758
SIZE_TYPE Width() const
Definition: id1_fetch.cpp:757
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
#define INVALID_GI
Definition: ncbimisc.hpp:1089
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:620
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:618
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eIntId
Convertible to TIntId (int or Int8 depending on NCBI_INT8_GI)
Definition: ncbiargs.hpp:593
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
string
Definition: cgiapp.hpp:687
#define ERR_FATAL(message)
Posting fatal error and abort.
Definition: ncbidiag.hpp:240
void SetDiagPostFlag(EDiagPostFlag flag)
Set the specified flag (globally).
Definition: ncbidiag.cpp:6070
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:8083
@ eDPF_All
All flags (except for the "unusual" ones!)
Definition: ncbidiag.hpp:718
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define THROW_TRACE_ARGS(exception_class,...)
Throw trace.
Definition: ncbiexpt.hpp:269
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
EIO_Status SetTimeout(const STimeout *timeout, EIO_Event direction=eIO_ReadWrite)
Definition: rpcbase.hpp:251
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_None
Definition: serialdef.hpp:72
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
@ fParse_NoFASTA
Don't bother checking for a tag.
Definition: Seq_id.hpp:91
@ fParse_AnyRaw
Definition: Seq_id.hpp:83
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
@ fInstantiateGaps
honor specifed gap mode; on by default
Definition: sequence.hpp:774
@ fAssembleParts
assemble FAR delta sequences; on by dflt
Definition: sequence.hpp:773
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
Definition: scope.cpp:331
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
#define kMax_Int
Definition: ncbi_limits.h:184
#define kMax_I8
Definition: ncbi_limits.h:221
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
size_t NcbiStreamToString(string *s, CNcbiIstream &is, size_t pos=0)
Input the entire contents of an istream into a string (NULL causes drain).
Definition: ncbistre.cpp:296
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
unsigned int usec
microseconds (modulo 1,000,000)
Definition: ncbi_types.h:78
unsigned int sec
seconds
Definition: ncbi_types.h:77
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
void SetTool(const TTool &value)
Assign a value to Tool data member.
virtual TRequest & SetDefaultRequest(void)
void SetReturn_UIDs(TReturn_UIDs value)
Assign a value to Return_UIDs data member.
virtual CRef< CEntrez2_boolean_reply > AskEval_boolean(const CEntrez2_eval_boolean &req, TReply *reply=0)
void SetDb(const TDb &value)
Assign a value to Db data member.
void SetQuery(TQuery &value)
Assign a value to Query data member.
virtual CRef< CEntrez2_docsum_list > AskGet_docsum(const CEntrez2_id_list &req, TReply *reply=0)
TYear GetYear(void) const
Get the Year member data.
Definition: Date_std_.hpp:426
TMonth GetMonth(void) const
Get the Month member data.
Definition: Date_std_.hpp:473
TDay GetDay(void) const
Get the Day member data.
Definition: Date_std_.hpp:520
virtual NCBI_NS_NCBI::TGi AskGetgi(const CSeq_id &req, TReply *reply=0)
Definition: id1_client_.cpp:98
virtual list< CRef< CID1Seq_hist > > AskGetgihist(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
virtual list< CRef< CID1Seq_hist > > AskGetgirev(const NCBI_NS_NCBI::TGi &req, TReply *reply=0)
TGi & SetGi(void)
Select the variant.
Definition: Seq_id_.hpp:896
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
list< CRef< CSeq_id > > TIds
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
USING_SCOPE(NCBI_NS_NCBI::objects)
bool s_IsControl(char c)
Definition: id1_fetch.cpp:261
yy_size_t n
constexpr bool empty(list< Ts... >) noexcept
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
int iscntrl(Uchar c)
Definition: ncbictype.hpp:63
Defines unified interface to application:
Process information in the NCBI Registry, including working with configuration files.
static Format format
Definition: njn_ioutil.cpp:53
The Object manager core.
static BOOL number
Definition: pcregrep.c:193
Timeout structure.
Definition: ncbi_types.h:76
static string query
Definition: type.c:6
Modified on Wed Apr 17 13:10:29 2024 by modify_doxy.py rev. 669887