NCBI C++ ToolKit
biosample_chk.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: biosample_chk.cpp 97547 2022-07-27 16:13:18Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * check biosource and structured comment descriptors against biosample database
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistre.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbienv.hpp>
38 #include <corelib/ncbiargs.hpp>
39 #include <corelib/ncbiutil.hpp>
40 
41 #include <serial/serial.hpp>
42 #include <serial/objistr.hpp>
43 #include <serial/objectio.hpp>
44 
46 #include <connect/ncbi_util.h>
48 
49 // Objects includes
51 #include <objects/seq/Bioseq.hpp>
55 #include <objects/seq/Seq_inst.hpp>
67 #include <objects/seq/Pubdesc.hpp>
68 #include <objects/pub/Pub.hpp>
74 #include <objects/biblio/Affil.hpp>
84 #include <util/line_reader.hpp>
86 #include <util/format_guess.hpp>
87 
89 
90 // Object Manager includes
92 #include <objmgr/scope.hpp>
93 #include <objmgr/seq_descr_ci.hpp>
94 #include <objmgr/bioseq_handle.hpp>
95 #include <objmgr/bioseq_ci.hpp>
96 #include <objmgr/seqdesc_ci.hpp>
97 
99 #ifdef HAVE_NCBI_VDB
101 #endif
104 
105 
108 
109 #include <common/test_assert.h> /* This header must go last */
110 
111 
112 using namespace ncbi;
113 using namespace objects;
114 using namespace xml;
115 
116 const char * BIOSAMPLE_CHK_APP_VER = "1.0";
117 
118 /////////////////////////////////////////////////////////////////////////////
119 //
120 // Demo application
121 //
122 
123 
125 {
126 public:
128  m_ReportStream(0),
129  m_UseDevServer(false)
130  {}
131 
132  virtual ~CBiosampleHandler() {}
133 
134  virtual void ProcessBioseq(CBioseq_Handle bh) {}
135  virtual bool NeedsReportStream() { return false; }
136  virtual void AddSummary() {}
137 
138  void SetReportStream(CNcbiOstream* stream) { m_ReportStream = stream; }
139 
140 protected:
143 };
144 
145 
147 {
148 public:
151  virtual void ProcessBioseq(CBioseq_Handle bh);
152  virtual bool NeedsReportStream() { return true; }
153  virtual void AddSummary();
154 
155 protected:
157 };
158 
159 
161 {
162  vector<string> ids = biosample_util::GetBiosampleIDs(bsh);
163  if (ids.empty()) {
164  return;
165  }
166 
167  for (const auto &it : ids) {
168  if (m_Status.find(it) == m_Status.end()) {
170  m_Status.insert(new_pair);
171  }
172  }
173 }
174 
176 {
177  if (m_Status.empty()) {
178  *m_ReportStream << "No BioSample IDs found" << endl;
179  } else {
180  biosample_util::GetBiosampleStatus(m_Status, m_UseDevServer);
181  biosample_util::TStatuses::iterator it = m_Status.begin();
182  while (it != m_Status.end()) {
183  *m_ReportStream << it->first << "\t" << biosample_util::GetBiosampleStatusName(it->second) << endl;
184  ++it;
185  }
186  }
187  m_Status.clear();
188 }
189 
190 
192 {
193 public:
194  CBiosampleChkApp(void);
195 
196  virtual void Init(void);
197  virtual int Run (void);
198 
199  void ReadClassMember(CObjectIStream& in,
200  const CObjectInfo::CMemberIterator& member);
201 
202 private:
203 
204  void Setup(const CArgs& args);
205 
206  unique_ptr<CObjectIStream> OpenFile(const CArgs& args);
207  unique_ptr<CObjectIStream> OpenFile(const string &fname);
208  void SaveFile(const string &fname, bool useBinaryOutputFormat);
209 
211  void PushToRecord(CBioseq_Handle bh);
212 
213  void ProcessBioseqForUpdate(CBioseq_Handle bh);
214  void ProcessBioseqHandle(CBioseq_Handle bh);
215  void ProcessSeqEntry(CRef<CSeq_entry> se);
216  void ProcessSeqEntry(void);
217  void ProcessSet(void);
218  void ProcessSeqSubmit(void);
219  void ProcessAsnInput (void);
220  void ProcessList (const string& fname);
221  void ProcessFileList (const string& fname);
222  int ProcessOneDirectory(const string& dir_name, const string& file_suffix, const string& file_mask, bool recurse);
223  void ProcessOneFile(string fname);
224  void ProcessReleaseFile(const CArgs& args);
225  CRef<CSeq_entry> ReadSeqEntry(void);
226  CRef<CBioseq_set> ReadBioseqSet(void);
227 
228  void CreateBiosampleUpdateWebService(biosample_util::TBiosampleFieldDiffList& diffs, bool del_okay);
230  void PrintDiffs(biosample_util::TBiosampleFieldDiffList& diffs);
231  void PrintTable(CRef<CSeq_table> table);
232 
233  CRef<CScope> BuildScope(void);
234 
235  // for mode 3, biosample_push
236  void UpdateBioSource (CBioseq_Handle bh, const CBioSource& src);
237  vector<CRef<CSeqdesc> > GetBiosampleDescriptors(string fname);
238  vector<CRef<CSeqdesc> > GetBiosampleDescriptorsFromSeqSubmit();
239  vector<CRef<CSeqdesc> > GetBiosampleDescriptorsFromSeqEntry();
240  vector<CRef<CSeqdesc> > GetBiosampleDescriptorsFromSeqEntry(const CSeq_entry& se);
241 
243  unique_ptr<CObjectIStream> m_In;
245 
246  size_t m_Level;
247 
252 
253  enum E_Mode {
254  e_report_diffs = 1, // Default - report diffs between biosources on records with biosample accessions
255  // and biosample data
258  e_take_from_biosample, // update with qualifiers from BioSample, stop if conflict
259  e_take_from_biosample_force, // update with qualifiers from BioSample, no stop on conflict
260  e_report_status, // make table with list of BioSample IDs and statuses
261  e_update_with, // use web API for update (with delete)
262  e_update_no // use web API for update (no delete)
263  };
264 
265  enum E_ListType {
266  e_none = 0,
268  e_files
269  };
270 
271  int m_Mode;
278  string m_IDPrefix;
279  string m_HUPDate;
282  string m_Owner;
283  string m_Comment;
284 
286 
287  size_t m_Processed;
289 
292  vector<CRef<CSeqdesc> > m_Descriptors;
293 
295 
297 };
298 
299 
301  m_ObjMgr(0), m_Continue(false),
302  m_Level(0), m_ReportStream(0), m_NeedReportHeader(true), m_AsnOut(0),
303  m_LogStream(0), m_Mode(e_report_diffs), m_ReturnCode(0),
304  m_StructuredCommentPrefix(""), m_CompareStructuredComments(true),
305  m_FirstSeqOnly(false), m_IDPrefix(""), m_HUPDate(""),
306  m_BioSampleAccession(""), m_BioProjectAccession(""),
307  m_Owner(""), m_Comment(""),
308  m_Processed(0), m_Unprocessed(0), m_Handler(NULL)
309 {
310 }
311 
312 
314 {
315  // Prepare command line descriptions
316 
317  // Create
318  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
319 
320  arg_desc->AddOptionalKey
321  ("p", "Directory", "Path to ASN.1 Files",
323  arg_desc->AddOptionalKey
324  ("i", "InFile", "Single Input File",
326  arg_desc->AddOptionalKey(
327  "o", "OutFile", "Single Output File",
329  arg_desc->AddOptionalKey(
330  "f", "Filter", "Substring Filter",
332  arg_desc->AddDefaultKey
333  ("x", "String", "File Selection Substring", CArgDescriptions::eString, ".sqn");
334  arg_desc->AddFlag("u", "Recurse");
335  arg_desc->AddFlag("d", "Use development Biosample server");
336 
337  arg_desc->AddDefaultKey("a", "a",
338  "ASN.1 Type (a Automatic, z Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit) or accession list (l)",
340  "a");
341 
342  arg_desc->AddFlag("b", "Output binary ASN.1");
343  //arg_desc->AddFlag("c", "Batch File is Compressed");
344  arg_desc->AddFlag("M", "Process only first sequence in file (master)");
345  arg_desc->AddOptionalKey("R", "BioSampleIDPrefix", "BioSample ID Prefix", CArgDescriptions::eString);
346  arg_desc->AddOptionalKey("HUP", "HUPDate", "Hold Until Publish Date", CArgDescriptions::eString);
347 
348  arg_desc->AddOptionalKey(
349  "L", "OutFile", "Log File",
351 
352  arg_desc->AddDefaultKey(
353  "m", "mode", "Mode:\n"
354  "\t1 create update file\n"
355  "\t2 generate file for creating new biosample entries\n"
356  "\t3 push source info from one file (-i) to others (-p)\n"
357  "\t4 update with source qualifiers from BioSample unless conflict\n"
358  "\t5 update with source qualifiers from BioSample (continue with conflict))\n"
359  "\t6 report transaction status\n"
360  "\t7 use web API for update (with delete)\n"
361  "\t8 use web API for update (no delete)\n",
364  arg_desc->SetConstraint("m", constraint);
365 
366  arg_desc->AddOptionalKey(
367  "P", "Prefix", "StructuredCommentPrefix", CArgDescriptions::eString);
368 
369  arg_desc->AddOptionalKey(
370  "biosample", "BioSampleAccession", "BioSample Accession to use for sequences in record. Report error if sequences contain a reference to a different BioSample accession.", CArgDescriptions::eString);
371  arg_desc->AddOptionalKey(
372  "bioproject", "BioProjectAccession", "BioProject Accession to use for sequences in record. Report error if sequences contain a reference to a different BioProject accession.", CArgDescriptions::eString);
373  arg_desc->AddOptionalKey("comment", "BioSampleComment", "Comment to use for creating new BioSample xml", CArgDescriptions::eString);
374 
375  arg_desc->AddOptionalKey("apikey_file", "BioSampleWebAPIKey", "File containing Web API Key needed to update BioSample database", CArgDescriptions::eString);
376 
377  // Program description
378  string prog_description = "BioSample Checker\n";
379  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
380  prog_description, false);
381 
382  // Pass argument descriptions to the application
383  SetupArgDescriptions(arg_desc.release());
384 
385 }
386 
387 
389 {
390  // Process file based on its content
391  // Unless otherwise specifien we assume the file in hand is
392  // a Seq-entry ASN.1 file, other option are a Seq-submit or NCBI
393  // Release file (batch processing) where we process each Seq-entry
394  // at a time.
395  string header = m_In->ReadFileHeader();
396 
397  bool unhandled = false;
398  try {
399  if (header == "Seq-submit" ) { // Seq-submit
401  } else if ( header == "Seq-entry" ) { // Seq-entry
402  ProcessSeqEntry();
403  } else if (header == "Bioseq-set" ) { // Bioseq-set
404  ProcessSet();
405  } else {
406  unhandled = true;
407  }
408  } catch (CException& e) {
409  if (NStr::StartsWith(e.GetMsg(), "duplicate Bioseq id")) {
410  *m_LogStream << e.GetMsg();
411  exit(4);
412  } else {
413  throw e;
414  }
415  }
416  if (unhandled) {
417  NCBI_THROW(CException, eUnknown, "Unhandled type " + header);
418  }
419 
420 }
421 
422 
423 void CBiosampleChkApp::ProcessList (const string& fname)
424 {
425  // Process file with list of accessions
426 
429 #ifdef HAVE_NCBI_VDB
431 #endif
432  CScope scope(*objmgr);
433  scope.AddDefaults();
434 
435  CRef<ILineReader> lr = ILineReader::New (fname);
436  while ( !lr->AtEOF() ) {
437  CTempString line = *++*lr;
438  if (!NStr::IsBlank(line)) {
439  try {
440  CRef<CSeq_id> id(new CSeq_id(line));
441  if (id) {
442  CBioseq_Handle bsh = scope.GetBioseqHandle(*id);
443  if (bsh) {
444  ProcessBioseqHandle(bsh);
445  } else {
446  *m_LogStream << "Unable to fetch Bioseq for " << line << endl;
447  string label = "";
448  id->GetLabel(&label);
449  *m_LogStream << " (interpreted as " << label << ")" << endl;
450  m_Unprocessed++;
451  }
452  }
453  } catch (CException& e) {
454  *m_LogStream << e.GetMsg() << endl;
455  m_Unprocessed++;
456  }
457  }
458  }
459 
460 }
461 
462 
463 void CBiosampleChkApp::ProcessFileList (const string& fname)
464 {
465  // Process file with list of files
466 
469  CScope scope(*objmgr);
470  scope.AddDefaults();
471 
472  m_ListType = e_none;
473  CRef<ILineReader> lr = ILineReader::New (fname);
474  while ( !lr->AtEOF() ) {
475  CTempString line = *++*lr;
476  if (!NStr::IsBlank(line)) {
477  ProcessOneFile(line);
478  }
479  }
481 }
482 
483 
485 {
486  const CArgs& args = GetArgs();
487 
488  bool need_to_close_report = false;
489  bool need_to_close_asn = false;
490 
491  if (!m_ReportStream &&
494  string path = fname;
495  size_t pos = NStr::Find(path, ".", NStr::eCase, NStr::eReverseSearch);
496  if (pos != string::npos) {
497  path = path.substr(0, pos);
498  }
499  path = path + ".val";
500  m_Table.Reset(new CSeq_table());
501  m_Table->SetNum_rows(0);
502  m_ReportStream = new CNcbiOfstream(path.c_str());
503  if (!m_ReportStream)
504  {
505  NCBI_THROW(CException, eUnknown, "Unable to open " + path);
506  }
507  need_to_close_report = true;
508  m_NeedReportHeader = true;
511  }
512  }
514  string path = fname;
515  size_t pos = NStr::Find(path, ".", NStr::eCase, NStr::eReverseSearch);
516  if (pos != string::npos) {
517  path = path.substr(0, pos);
518  }
519  path = path + ".out";
520  SaveFile(path, args["b"]);
521  need_to_close_asn = true;
522  }
523 
524  m_Diffs.clear();
525  switch (m_ListType) {
526  case e_accessions:
527  ProcessList (fname);
528  break;
529  case e_files:
530  ProcessFileList (fname);
531  break;
532  case e_none:
533  m_In = OpenFile(fname);
534  if (m_In.get() == nullptr) {
535  NCBI_THROW(CException, eUnknown, "Unable to open " + fname);
536  }
537  if (!m_In->InGoodState()) {
538  NCBI_THROW(CException, eUnknown, "Unable to open " + fname);
539  }
540  ProcessAsnInput();
541  break;
542  }
543 
544  if (m_Mode == e_report_diffs) {
546  }
547  if (m_Mode == e_update_with) {
549  } else if (m_Mode == e_update_no) {
551  }
552  if (m_Handler != NULL) {
554  }
555 
556  // TODO! Must free diffs
557  m_Diffs.clear();
558 
559  if (need_to_close_report) {
560  if (m_Mode == e_take_from_biosample) {
562  m_Table->Reset();
563  m_Table = new CSeq_table();
564  m_Table->SetNum_rows(0);
565  }
566  m_ReportStream->flush();
567  m_ReportStream = 0;
568  }
569  if (need_to_close_asn) {
570  m_AsnOut->flush();
571  m_AsnOut->close();
572  m_AsnOut = 0;
573  }
574 }
575 
576 
578 {
579  // Get seq-entry to process
581 
583 }
584 
585 
587 {
588  vector<CRef<CSeqdesc> > descriptors;
589 
590  CRef<CScope> scope = BuildScope();
591  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(se);
593  if (bi) {
594  CSeqdesc_CI src_desc_ci(*bi, CSeqdesc::e_Source);
595  if (src_desc_ci) {
596  CRef<CSeqdesc> src_desc(new CSeqdesc());
597  src_desc->Assign(*src_desc_ci);
598  descriptors.push_back(src_desc);
599  }
600  }
601 
602  return descriptors;
603 }
604 
605 
607 {
608  vector<CRef<CSeqdesc> > descriptors;
610 
611  // Get seq-submit to process
613 
614  // Validae Seq-submit
615  CRef<CScope> scope = BuildScope();
616  if (ss->GetData().IsEntrys() && ! ss->GetData().GetEntrys().empty()) {
617  descriptors = GetBiosampleDescriptorsFromSeqEntry(**(ss->GetData().GetEntrys().begin()));
618  }
619  return descriptors;
620 }
621 
622 
623 vector<CRef<CSeqdesc> > CBiosampleChkApp::GetBiosampleDescriptors(string fname)
624 {
625  m_In = OpenFile(fname);
626 
627  // Process file based on its content
628  // Unless otherwise specifien we assume the file in hand is
629  // a Seq-entry ASN.1 file, other option are a Seq-submit or NCBI
630  // Release file (batch processing) where we process each Seq-entry
631  // at a time.
632 
633  string header = m_In->ReadFileHeader();
634 
635  vector<CRef<CSeqdesc> > descriptors;
636  if (header == "Seq-submit" ) { // Seq-submit
637  descriptors = GetBiosampleDescriptorsFromSeqSubmit();
638  } else if ( header == "Seq-entry" ) { // Seq-entry
639  descriptors = GetBiosampleDescriptorsFromSeqEntry();
640 
641  } else {
642  NCBI_THROW(CException, eUnknown, "Unhandled type " + header);
643  }
644  return descriptors;
645 }
646 
647 
648 int CBiosampleChkApp::ProcessOneDirectory(const string& dir_name, const string& file_suffix, const string& file_mask, bool recurse)
649 {
650  int num_of_files = 0;
651 
652  CDir dir(dir_name);
653  CDir::TEntries files (dir.GetEntries(file_mask, CDir::eFile));
654  for (const auto &ii : files) {
655  string fname = ii->GetName();
656  if (ii->IsFile() &&
657  (!file_suffix.empty() || NStr::Find (fname, file_suffix) != string::npos)) {
658  ++num_of_files;
659  string fname = CDirEntry::MakePath(dir_name, ii->GetName());
660  ProcessOneFile (fname);
661  }
662  }
663  if (recurse) {
664  CDir::TEntries subdirs (dir.GetEntries("", CDir::eDir));
665  for (const auto &ii : subdirs) {
666  string subdir = ii->GetName();
667  if (ii->IsDir() && !NStr::Equal(subdir, ".") && !NStr::Equal(subdir, "..")) {
668  string subname = CDirEntry::MakePath(dir_name, ii->GetName());
669  num_of_files += ProcessOneDirectory (subname, file_suffix, file_mask, recurse);
670  }
671  }
672  }
673  if (!num_of_files)
674  {
675  NCBI_THROW(CException, eUnknown, "No input '" + file_mask + "' files found in directory '" + dir_name + "'");
676  }
677  return num_of_files;
678 }
679 
680 
682 {
683  const CArgs& args = GetArgs();
684  Setup(args);
685 
686  m_Mode = args["m"].AsInteger();
687  m_FirstSeqOnly = args["M"].AsBoolean();
688  m_IDPrefix = args["R"] ? args["R"].AsString() : "";
689  m_HUPDate = args["HUP"] ? args["HUP"].AsString() : "";
690  m_BioSampleAccession = args["biosample"] ? args["biosample"].AsString() : "";
691  m_BioProjectAccession = args["bioproject"] ? args["bioproject"].AsString() : "";
692  m_Comment = args["comment"] ? args["comment"].AsString() : "";
693 
694  string apikey_file = args["apikey_file"] ? args["apikey_file"].AsString() : "";
695  if (!apikey_file.empty()) {
696  ifstream is(apikey_file.c_str());
697  is >> m_BioSampleWebAPIKey;
698  }
699 
700  if (m_Mode == e_report_status) {
702  }
703 
704  if (args["o"]) {
706  //|| m_Mode == e_take_from_biosample
707  || (m_Handler != NULL && m_Handler->NeedsReportStream())) {
708  m_ReportStream = &(args["o"].AsOutputFile());
709  if (!m_ReportStream)
710  {
711  NCBI_THROW(CException, eUnknown, "Unable to open " + args["o"].AsString());
712  }
713  if (m_Handler) {
715  }
716  if (m_Mode == e_take_from_biosample) {
717  m_Table.Reset(new CSeq_table());
718  m_Table->SetNum_rows(0);
719  }
720  } else {
721  SaveFile(args["o"].AsString(), args["b"]);
722  }
723  } else if (m_Mode == e_update_with || m_Mode == e_update_no) {
725  if (!m_ReportStream)
726  {
727  NCBI_THROW(CException, eUnknown, "Unable to open " + args["o"].AsString());
728  }
729  if (m_Handler) {
731  }
732  if (m_Mode == e_take_from_biosample) {
733  m_Table.Reset(new CSeq_table());
734  m_Table->SetNum_rows(0);
735  }
736  }
737 
738  m_LogStream = args["L"] ? &(args["L"].AsOutputFile()) : &NcbiCout;
739  m_StructuredCommentPrefix = args["P"] ? args["P"].AsString() : "";
742  }
743 
744  m_UseDevServer = args["d"].AsBoolean();
745 
747  // error
748  *m_LogStream << "Structured comment prefix is only appropriate for generating a biosample table." << endl;
749  return 1;
750  }
751 
752  if (m_Mode == e_report_diffs) {
754  }
755 
756  // Process file based on its content
757  // Unless otherwise specified we assume the file in hand is
758  // a Seq-entry ASN.1 file, other option are a Seq-submit or NCBI
759  // Release file (batch processing) where we process each Seq-entry
760  // at a time.
761  if (NStr::Equal(args["a"].AsString(), "l")) {
763  } else if (NStr::Equal(args["a"].AsString(), "f")) {
765  } else {
766  m_ListType = e_none;
767  }
768 
769  string dir_name = (args["p"]) ? args["p"].AsString() : "";
770  string file_suffix = (args["f"]) ? args["f"].AsString() : "";
771  string file_mask = (args["x"]) ? args["x"].AsString() : ".sqn";
772  file_mask = "*" + file_mask;
773  bool dir_recurse = args["u"];
776  if (m_ReportStream) {
778  } else {
780  }
781  } else if ( m_Mode == e_push) {
782  if (m_ListType != e_none) {
783  // error
784  *m_LogStream << "List type (-a l or -a f) is not appropriate for push mode." << endl;
785  return 1;
786  } else if (!args["p"] || !args["i"]) {
787  // error
788  *m_LogStream << "Both directory containing contigs (-p) and master file (-i) are required for push mode." << endl;
789  return 1;
790  } else {
791  m_Descriptors = GetBiosampleDescriptors(args["i"].AsString());
792  ProcessOneDirectory (dir_name, file_suffix, file_mask, dir_recurse);
793  }
794  } else if ( args["p"] ) {
795  ProcessOneDirectory (dir_name, file_suffix, file_mask, dir_recurse);
796  if (m_Mode == e_take_from_biosample) {
797  if (m_Table && m_Table->GetNum_rows() > 0) {
799  }
800  }
801  } else {
802  if (args["i"]) {
803  ProcessOneFile (args["i"].AsString());
804  if (m_Mode == e_take_from_biosample) {
805  if (m_Table && m_Table->GetNum_rows() > 0) {
807  }
808  }
809  }
810  }
811 
812  if (m_Unprocessed > 0) {
813  if (m_Mode != e_report_diffs) {
814  *m_LogStream << m_Unprocessed << " results failed" << endl;
815  }
816  return 1;
817  } else {
818  return m_ReturnCode;
819  }
820 }
821 
822 
824 {
825  CRef<CScope> scope(new CScope (*m_ObjMgr));
826  scope->AddDefaults();
827 
828  return scope;
829 }
830 
831 
834  const CObjectInfo::CMemberIterator& member)
835 {
836  m_Level++;
837 
838  if ( m_Level == 1 ) {
839  size_t n = 0;
840  // Read each element separately to a local TSeqEntry,
841  // process it somehow, and... not store it in the container.
842  for ( CIStreamContainerIterator i(in, member); i; ++i ) {
843  try {
844  // Get seq-entry to process
846  i >> *se;
847 
849 
850  m_Diffs.clear();
851  ProcessSeqEntry(se);
853  // TODO! Must free diffs
854  m_Diffs.clear();
855 
856  if (m_ReportStream) {
857  *m_ReportStream << "Elapsed = " << sw.Elapsed() << endl;
858  }
859  n++;
860  } catch (std::exception e) {
861  if ( !m_Continue ) {
862  throw;
863  }
864  // should we issue some sort of warning?
865  }
866  }
867  } else {
868  in.ReadClassMember(member);
869  }
870 
871  m_Level--;
872 }
873 
874 
876 (const CArgs& args)
877 {
878  CRef<CBioseq_set> seqset(new CBioseq_set);
879 
880  // Register the Seq-entry hook
881  CObjectTypeInfo set_type = CType<CBioseq_set>();
882  set_type.FindMember("seq-set").SetLocalReadHook(*m_In, this);
883 
884  // Read the CBioseq_set, it will call the hook object each time we
885  // encounter a Seq-entry
886  *m_In >> *seqset;
887 }
888 
889 
891 {
894 
895  return se;
896 }
897 
898 
900 {
901  CRef<CBioseq_set> set(new CBioseq_set());
903 
904  return set;
905 }
906 
907 
909 {
910  if (table->GetNum_rows() == 0) {
911  // do nothing
912  return;
913  }
914 
915  for (const auto &it : table->GetColumns()) {
916  *m_ReportStream << it->GetHeader().GetTitle() << "\t";
917  }
918  *m_ReportStream << endl;
919  for (size_t row = 0; row < (size_t)table->GetNum_rows(); row++) {
920  for (const auto &it : table->GetColumns()) {
921  if (row < it->GetData().GetString().size()) {
922  *m_ReportStream << it->GetData().GetString()[row] << "\t";
923  } else {
924  *m_ReportStream << "\t";
925  }
926  }
927  *m_ReportStream << endl;
928  }
929 }
930 
931 
933 {
934  if (diffs.empty()) {
935  if (m_Processed == 0) {
936  *m_ReportStream << "No results processed" << endl;
937  } else {
938  *m_ReportStream << "No differences found" << endl;
939  }
940  } else {
941  if (m_NeedReportHeader) {
942  biosample_util::CBiosampleFieldDiff::PrintHeader(*m_ReportStream, false);
943  m_NeedReportHeader = false;
944  }
945 
946  for (const auto &it : diffs) {
947  it->Print(*m_ReportStream, false);
948  }
949  }
950  if (m_Unprocessed > 0) {
951  *m_ReportStream << m_Unprocessed << " results failed" << endl;
952  }
953 }
954 
955 
957 {
958  PrintDiffs(diffs);
959 }
960 
961 
963 {
964  if (diffs.empty()) {
965  return;
966  }
967 
968  vector< CRef<biosample_util::CBiosampleFieldDiff> > add_item;
969  vector< CRef<biosample_util::CBiosampleFieldDiff> > change_item;
970  vector< CRef<biosample_util::CBiosampleFieldDiff> > delete_item;
971  vector< CRef<biosample_util::CBiosampleFieldDiff> > change_organism;
972 
973  set<string> ids;
974 
975  for (const auto &it : diffs) {
976  string id = it->GetBioSample();
977  string smp = it->GetSampleVal();
978  string src = it->GetSrcVal();
979  string fld = it->GetFieldName();
980  bool blank_smp = NStr::IsBlank(smp);
981  bool blank_src = NStr::IsBlank(src);
982  if (blank_smp && blank_src) {
983  continue;
984  }
985  if (smp == src) {
986  continue;
987  }
988  ids.insert(id);
989  if (fld == "Organism Name") {
990  change_organism.push_back(it);
991  } else if (blank_smp) {
992  add_item.push_back(it);
993  } else if (blank_src) {
994  if (del_okay) {
995  delete_item.push_back(it);
996  }
997  } else {
998  change_item.push_back(it);
999  }
1000  }
1001 
1002  CJson_Document req;
1003  CJson_Object top_obj = req.SetObject();
1004  CJson_Array biosample_array = top_obj.insert_array("update");
1005 
1006  CJson_Object options_obj = top_obj.insert_object("options");
1007  options_obj.insert("attribute_synonyms", "true");
1008 
1009  for (auto& id : ids) {
1010  CJson_Object obj1 = biosample_array.push_back_object();
1011  obj1.insert("samples", id);
1012 
1013  if (! add_item.empty()) {
1014  CJson_Object add_obj = obj1.insert_object("add");
1015  CJson_Array add_arr = add_obj.insert_array("attribute");
1016  for (auto& itm : add_item) {
1017  CJson_Object obj2 = add_arr.push_back_object();
1018  obj2.insert("name", itm->GetFieldName());
1019  obj2.insert("new_value", itm->GetSrcVal());
1020  }
1021  }
1022 
1023  if (! delete_item.empty()) {
1024  CJson_Object del_obj = obj1.insert_object("delete");
1025  CJson_Array del_arr = del_obj.insert_array("attribute");
1026  for (auto& itm : delete_item) {
1027  CJson_Object obj2 = del_arr.push_back_object();
1028  obj2.insert("name", itm->GetFieldName());
1029  obj2.insert("old_value", itm->GetSampleVal());
1030  }
1031  }
1032 
1033  if (! change_item.empty() || ! change_organism.empty()) {
1034  CJson_Object chg_obj = obj1.insert_object("change");
1035  if (! change_organism.empty()) {
1036  CJson_Object chg_org = chg_obj.insert_object("organism");
1037  for (auto& itm : change_organism) {
1038  chg_org.insert("new_value", itm->GetSrcVal());
1039  }
1040  }
1041  if (! change_item.empty()) {
1042  CJson_Array chg_arr = chg_obj.insert_array("attribute");
1043  for (auto& itm : change_item) {
1044  string fld = itm->GetFieldName();
1045  if (fld == "Tax ID") {
1046  continue;
1047  }
1048  CJson_Object obj2 = chg_arr.push_back_object();
1049  obj2.insert("name", fld);
1050  obj2.insert("old_value", itm->GetSampleVal());
1051  obj2.insert("new_value", itm->GetSrcVal());
1052  }
1053  }
1054  }
1055  }
1056 
1057  if ( ids.size() > 1 ) {
1058  *m_LogStream << "ERROR: More than one BioSample ID is not supported by -m 7." << endl;
1059  exit(6);
1060  }
1061 
1062  string sData = req.ToString();
1063 
1064  NcbiCout << sData << endl;
1065 
1066  // BioSample update
1067  string sUrl = "https://api-int.ncbi.nlm.nih.gov/biosample/update/";
1068  if (m_UseDevServer) {
1069  sUrl = "https://dev-api-int.ncbi.nlm.nih.gov/biosample/update/";
1070  }
1071  string sContentType = "application/json; charset=utf-8";
1072 
1073  CUrl curl(sUrl);
1074  CHttpHeaders headers;
1075  headers.SetValue("NCBI-BioSample-Authorization", m_BioSampleWebAPIKey);
1076  CHttpResponse response = g_HttpPost(curl, headers, sData, sContentType);
1077 
1078  if (response.GetStatusCode() != 200) {
1079  NcbiStreamCopy(cout, response.ErrorStream());
1080  cout << endl;
1081  } else {
1082  NcbiStreamCopy(cout, response.ContentStream());
1083  cout << endl;
1084  }
1085 }
1086 
1087 
1089 {
1090  vector<string> unprocessed_ids;
1094  m_Processed,
1095  unprocessed_ids,
1099  &m_cache);
1100  if (! new_diffs.empty()) {
1101  m_Diffs.insert(m_Diffs.end(), new_diffs.begin(), new_diffs.end());
1102  for (const auto &id : unprocessed_ids) {
1103  *m_LogStream << "Failed to retrieve BioSample data for " << id << endl;
1104  }
1105  m_Unprocessed += unprocessed_ids.size();
1106  }
1107 }
1108 
1109 
1111 {
1112  for (const auto &it : m_Descriptors) {
1113  if (it->IsSource()) {
1114  UpdateBioSource(bh, it->GetSource());
1115  }
1116  }
1117 }
1118 
1119 
1121 {
1122  vector<string> biosample_ids = biosample_util::GetBiosampleIDs(bh);
1123 
1125  // error
1127  *m_LogStream << label << " has conflicting BioSample Accession " << biosample_ids[0] << endl;
1128  return;
1129  }
1130 
1131  if (biosample_ids.empty()) {
1132  // for report mode, do not report if no biosample ID
1133  return;
1134  }
1135 
1136  for (const auto &id : biosample_ids) {
1138  if (descr) {
1139  m_Descriptors.clear();
1140  copy(descr->Set().begin(), descr->Set().end(),
1141  back_inserter(m_Descriptors));
1142  PushToRecord(bh);
1143  m_Descriptors.clear();
1144  }
1145  }
1146 
1147 }
1148 
1149 
1151 {
1152  switch (m_Mode) {
1153  case e_report_diffs:
1154  GetBioseqDiffs(bh);
1155  break;
1156  case e_generate_biosample:
1157  try {
1159  bh,
1160  m_IDPrefix,
1163  m_Owner,
1164  m_HUPDate,
1165  m_Comment,
1169  } catch (CException& e) {
1170  *m_LogStream << e.GetMsg() << endl;
1171  }
1172  break;
1173  case e_push:
1174  PushToRecord(bh);
1175  break;
1176  case e_take_from_biosample:
1177  m_Diffs.clear();
1178  GetBioseqDiffs(bh);
1180  m_ReturnCode = 1;
1181  string sequence_id = biosample_util::GetBestBioseqLabel(bh);
1182  *m_LogStream << "Conflicts found for " << sequence_id << endl;
1183  try {
1185  bh, *m_Table,
1186  true,
1189  } catch (CException& e) {
1190  *m_LogStream << e.GetMsg() << endl;
1191  }
1192  } else {
1194  }
1195  break;
1198  break;
1199  case e_update_with:
1200  case e_update_no:
1201  GetBioseqDiffs(bh);
1202  break;
1203  default:
1204  if (m_Handler != NULL) {
1205  m_Handler->ProcessBioseq(bh);
1206  }
1207  break;
1208  }
1209 
1210 }
1211 
1212 
1214 {
1215  CRef<CScope> scope = BuildScope();
1216  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*se);
1217  CBioseq_CI bi(seh, CSeq_inst::eMol_na);
1218  while (bi) {
1219  ProcessBioseqHandle(*bi);
1220  if (m_FirstSeqOnly) {
1221  break;
1222  }
1223  ++bi;
1224  }
1225  scope->RemoveTopLevelSeqEntry(seh);
1226 }
1227 
1228 
1230 {
1231  // Get seq-entry to process
1233 
1234  ProcessSeqEntry(se);
1235 
1236  // write out copy after processing, if requested
1237  if (m_AsnOut) {
1238  *m_AsnOut << *se;
1239  }
1240 }
1241 
1242 
1244 {
1245  // Get Bioseq-set to process
1247  if (set && set->IsSetSeq_set()) {
1248  for (const auto &se : set->GetSeq_set()) {
1249  ProcessSeqEntry(se);
1250  }
1251  }
1252 
1253  // write out copy after processing, if requested
1254  if (m_AsnOut) {
1255  *m_AsnOut << *set;
1256  }
1257 }
1258 
1259 
1261 {
1263 
1264  // Get seq-submit to process
1266 
1267  m_Owner = "";
1268  // get owner from Seq-submit to use if no pub is found
1269  if (ss->IsSetSub()) {
1270  if (ss->GetSub().IsSetCit()
1271  && ss->GetSub().GetCit().IsSetAuthors()
1272  && ss->GetSub().GetCit().GetAuthors().IsSetAffil()) {
1274  } else if (ss->GetSub().IsSetContact() && ss->GetSub().GetContact().IsSetContact()
1275  && ss->GetSub().GetContact().GetContact().IsSetAffil()) {
1277  }
1278  }
1279 
1280  // Process Seq-submit
1281  CRef<CScope> scope = BuildScope();
1282  if (ss->GetData().IsEntrys()) {
1283  for (const auto &se : ss->GetData().GetEntrys()) {
1284  ProcessSeqEntry(se);
1285  }
1286  }
1287  // write out copy after processing, if requested
1288  if (m_AsnOut) {
1289  *m_AsnOut << *ss;
1290  }
1291 }
1292 
1293 static bool s_IsEmptyBioSource(const CSeqdesc& src)
1294 {
1295  return !src.GetSource().IsSetSubtype() && !src.GetSource().IsSetGenome() && !src.GetSource().IsSetOrigin() &&
1296  (!src.GetSource().IsSetOrg() || (!src.GetSource().IsSetOrgname() && !src.GetSource().IsSetTaxname() && !src.GetSource().IsSetDivision()));
1297 }
1298 
1300 {
1301  CSeqdesc_CI src_desc_ci(bh, CSeqdesc::e_Source);
1302 
1303  CBioseq_EditHandle beh = bh.GetEditHandle();
1304  // Removes empty BioSources
1305  for (; src_desc_ci;) {
1306 
1307  if (s_IsEmptyBioSource(*src_desc_ci)) {
1308  const CSeqdesc& cur_descr = *src_desc_ci;
1309  ++src_desc_ci;
1310  beh.RemoveSeqdesc(cur_descr);
1311  }
1312  else {
1313  break;
1314  }
1315  }
1316 
1317  if (!src_desc_ci) {
1318  CRef<CSeqdesc> new_desc(new CSeqdesc());
1319  new_desc->SetSource().Assign(src);
1321 
1322  if (parent && parent.IsSetClass() && parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1323  CBioseq_set_EditHandle bseh = parent.GetEditHandle();
1324  bseh.AddSeqdesc(*new_desc);
1325  } else {
1326  beh.AddSeqdesc(*new_desc);
1327  }
1328  } else {
1329 
1330  const CBioSource& bs = src_desc_ci->GetSource();
1331  CBioSource* old_src = const_cast<CBioSource *> (&bs);
1332  old_src->UpdateWithBioSample(src, true, true);
1333 
1334  // Removes the rest of empty BioSources
1335  for (++src_desc_ci; src_desc_ci;) {
1336 
1337  if (s_IsEmptyBioSource(*src_desc_ci)) {
1338  const CSeqdesc& cur_descr = *src_desc_ci;
1339  ++src_desc_ci;
1340  beh.RemoveSeqdesc(cur_descr);
1341  }
1342  else {
1343  ++src_desc_ci;
1344  }
1345  }
1346  }
1347 }
1348 
1349 
1351 {
1352  // Setup application registry and logs for CONNECT library
1354  CORE_SetREG(REG_cxx2c(&GetConfig(), false));
1355  // Setup MT-safety for CONNECT library
1356  // CORE_SetLOCK(MT_LOCK_cxx2c());
1357 
1358  // Create object manager
1360 }
1361 
1362 
1363 unique_ptr<CObjectIStream> CBiosampleChkApp::OpenFile(const CArgs& args)
1364 {
1365  string fname = args["i"].AsString();
1366  return CBiosampleChkApp::OpenFile(fname);
1367 }
1368 
1369 unique_ptr<CObjectIStream> CBiosampleChkApp::OpenFile(const string &fname)
1370 {
1372 
1373  unique_ptr<CNcbiIstream> hold_stream(new CNcbiIfstream (fname.c_str(), ios::binary));
1374  CNcbiIstream* InputStream = hold_stream.get();
1375 
1376  CFormatGuess::EFormat formatGuess = CFormatGuess::Format(*InputStream);
1377 
1378  CCompressStream::EMethod method;
1379  switch (formatGuess)
1380  {
1381  case CFormatGuess::eGZip: method = CCompressStream::eGZipFile; break;
1382  case CFormatGuess::eBZip2: method = CCompressStream::eBZip2; break;
1383  case CFormatGuess::eLzo: method = CCompressStream::eLZO; break;
1384  default: method = CCompressStream::eNone; break;
1385  }
1386  if (method != CCompressStream::eNone)
1387  {
1388  CDecompressIStream* decompress(new CDecompressIStream(*InputStream, method, CCompressStream::fDefault, eTakeOwnership));
1389  hold_stream.release();
1390  hold_stream.reset(decompress);
1391  InputStream = hold_stream.get();
1392  formatGuess = CFormatGuess::Format(*InputStream);
1393  }
1394 
1395  unique_ptr<CObjectIStream> objectStream;
1396  switch (formatGuess)
1397  {
1402  objectStream.reset(CObjectIStream::Open(format, *InputStream, eTakeOwnership));
1403  hold_stream.release();
1404  break;
1405  default:
1406  break;
1407  }
1408  return objectStream;
1409 }
1410 
1411 void CBiosampleChkApp::SaveFile(const string &fname, bool useBinaryOutputFormat)
1412 {
1413  ios::openmode mode = ios::out;
1414  m_AsnOut = new CNcbiOfstream(fname.c_str(), mode);
1415  if (!m_AsnOut)
1416  {
1417  NCBI_THROW(CException, eUnknown, "Unable to open " + fname);
1418  }
1419  if ( useBinaryOutputFormat ) {
1421  } else {
1423  }
1424 }
1425 
1426 
1427 /////////////////////////////////////////////////////////////////////////////
1428 // MAIN
1429 
1430 
1431 int main(int argc, const char* argv[])
1432 {
1433  return CBiosampleChkApp().AppMain(argc, argv, 0, eDS_Default, 0);
1434 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool s_IsEmptyBioSource(const CSeqdesc &src)
const char * BIOSAMPLE_CHK_APP_VER
int main(int argc, const char *argv[])
string OwnerFromAffil(const CAffil &affil)
vector< CRef< CBiosampleFieldDiff > > TBiosampleFieldDiffList
bool ResolveSuppliedBioSampleAccession(const string &biosample_accession, vector< string > &biosample_ids)
bool DoDiffsContainConflicts(const TBiosampleFieldDiffList &diffs, CNcbiOstream *log)
string GetBestBioseqLabel(CBioseq_Handle bsh)
EStatus GetBiosampleStatus(const string &accession, bool use_dev_server=false, TStatuses *cache=NULL)
map< string, CRef< CSeq_descr > > TBioSamples
EStatus
@ eStatus_Unknown
map< string, EStatus > TStatuses
void PrintBioseqXML(CBioseq_Handle bh, const string &id_prefix, CNcbiOstream *report_stream, const string &bioproject_accession, const string &default_owner, const string &hup_date, const string &comment, bool first_seq_only, bool report_structured_comments, const string &expected_prefix)
CRef< CSeq_descr > GetBiosampleData(const string &accession, bool use_dev_server=false, TBioSamples *cache=NULL)
string GetBiosampleStatusName(EStatus status)
TBiosampleFieldDiffList GetBioseqDiffs(CBioseq_Handle bh, const string &biosample_accession, size_t &num_processed, vector< string > &unprocessed_ids, bool use_dev_server=false, bool compare_structured_comments=false, const string &expected_prefix="", TBioSamples *cache=NULL)
pair< string, biosample_util::EStatus > TStatus
vector< string > GetBiosampleIDs(CBioseq_Handle bh)
void AddBioseqToTable(CBioseq_Handle bh, CSeq_table &table, bool with_id, bool include_comments=false, const string &expected_prefix="")
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1751
CArgAllow –.
Definition: ncbiargs.hpp:1488
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
bool IsSetDivision(void) const
Definition: BioSource.cpp:395
void UpdateWithBioSample(const CBioSource &biosample, bool force, bool is_local_copy=false)
Definition: BioSource.cpp:704
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
void Setup(const CArgs &args)
string m_StructuredCommentPrefix
void ProcessBioseqHandle(CBioseq_Handle bh)
string m_BioProjectAccession
biosample_util::TBioSamples m_cache
void ProcessSeqSubmit(void)
void SaveFile(const string &fname, bool useBinaryOutputFormat)
void UpdateBioSource(CBioseq_Handle bh, const CBioSource &src)
vector< CRef< CSeqdesc > > GetBiosampleDescriptorsFromSeqEntry()
biosample_util::TBiosampleFieldDiffList m_Diffs
CRef< CSeq_entry > ReadSeqEntry(void)
void ProcessReleaseFile(const CArgs &args)
CNcbiOfstream * m_AsnOut
void PushToRecord(CBioseq_Handle bh)
CRef< CObjectManager > m_ObjMgr
void ProcessAsnInput(void)
CRef< CBioseq_set > ReadBioseqSet(void)
unique_ptr< CObjectIStream > OpenFile(const CArgs &args)
void CreateBiosampleUpdateWebService(biosample_util::TBiosampleFieldDiffList &diffs, bool del_okay)
vector< CRef< CSeqdesc > > m_Descriptors
void ReadClassMember(CObjectIStream &in, const CObjectInfo::CMemberIterator &member)
This method will be called at approriate time when the object of requested type is to be read.
CNcbiOstream * m_LogStream
void ProcessSeqEntry(void)
virtual void Init(void)
Initialize the application.
void ProcessList(const string &fname)
bool m_CompareStructuredComments
void PrintDiffs(biosample_util::TBiosampleFieldDiffList &diffs)
CNcbiOstream * m_ReportStream
int ProcessOneDirectory(const string &dir_name, const string &file_suffix, const string &file_mask, bool recurse)
void PrintResults(biosample_util::TBiosampleFieldDiffList &diffs)
void GetBioseqDiffs(CBioseq_Handle bh)
void ProcessFileList(const string &fname)
virtual int Run(void)
Run the application.
void ProcessOneFile(string fname)
vector< CRef< CSeqdesc > > GetBiosampleDescriptorsFromSeqSubmit()
vector< CRef< CSeqdesc > > GetBiosampleDescriptors(string fname)
CRef< CScope > BuildScope(void)
void ProcessBioseqForUpdate(CBioseq_Handle bh)
unique_ptr< CObjectIStream > m_In
CBiosampleHandler * m_Handler
CRef< CSeq_table > m_Table
void PrintTable(CRef< CSeq_table > table)
virtual void AddSummary()
virtual ~CBiosampleHandler()
virtual bool NeedsReportStream()
virtual void ProcessBioseq(CBioseq_Handle bh)
CNcbiOstream * m_ReportStream
void SetReportStream(CNcbiOstream *stream)
virtual bool NeedsReportStream()
virtual void AddSummary()
biosample_util::TStatuses m_Status
virtual void ProcessBioseq(CBioseq_Handle bh)
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_EditHandle –.
CBioseq_set_Handle –.
CDecompressIStream –.
CDir –.
Definition: ncbifile.hpp:1695
EFormat
The formats are checked in the same order as declared here.
@ eBZip2
bzip2 compressed file
@ eBinaryASN
Binary ASN.1.
@ eLzo
lzo compressed file
@ eGZip
GNU zip compressed file.
@ eTextASN
Text ASN.1.
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
HTTP response.
Reading (iterating through) elements of containers (SET OF, SEQUENCE OF).
Definition: objectio.hpp:164
CJson_Object push_back_object(void)
Add object type element to the end of the array.
std::string ToString(TJson_Write_Flags flags=fJson_Write_IndentWithSpace, unsigned int indent_char_count=4) const
Convert the contents of the node into string.
CJson_Object SetObject(void)
Get JSON object contents of the node.
CJson_Array insert_array(const CJson_Node::TKeyType &name)
Insert array type element into the object.
void insert(const CJson_Node::TKeyType &name)
Insert null element into the object.
CJson_Object insert_object(const CJson_Node::TKeyType &name)
Insert object type element into the object.
CObjectIStream –.
Definition: objistr.hpp:93
CObjectInfoMI –.
Definition: objectiter.hpp:432
CObjectTypeInfo –.
Definition: objectinfo.hpp:94
Read hook for data member of a containing object (eg, SEQUENCE)
Definition: objhook.hpp:78
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
CStopWatch –.
Definition: ncbitime.hpp:1938
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CUrl –.
Definition: ncbi_url.hpp:353
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams &params, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: wgsloader.cpp:85
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
size_type size() const
Definition: set.hpp:132
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static void Init(void)
Definition: cursor6.c:76
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eTakeOwnership
An object can take ownership of another.
Definition: ncbi_types.h:136
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define NULL
Definition: ncbistd.hpp:225
EMethod
Compression/decompression methods.
Definition: stream_util.hpp:98
@ eLZO
LZO (LZO1X)
@ eNone
no compression method (copy "as is")
Definition: stream_util.hpp:99
@ eGZipFile
.gz file (including concatenated files)
@ fDefault
Use algorithm-specific defaults.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
TEntries GetEntries(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
Definition: ncbifile.cpp:3846
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
list< TEntry > TEntries
Definition: ncbifile.hpp:1750
@ eDir
Directory.
Definition: ncbifile.hpp:784
@ eFile
Regular file.
Definition: ncbifile.hpp:783
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
CNcbiIstream & ContentStream(void) const
Get input stream.
int GetStatusCode(void) const
Get response status code.
void SetValue(CHeaderNameConverter name, CTempString value)
Remove all existing values with the name, set the new value.
CNcbiIstream & ErrorStream(void) const
Get input stream containing error message (e.g.
CHttpResponse g_HttpPost(const CUrl &url, CTempString data, const CHttpParam &param=CHttpParam())
Shortcut for POST request.
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
Definition: objectinfo.hpp:762
CMemberIterator FindMember(const string &memberName) const
Find class member by its name.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
void SetLocalReadHook(CObjectIStream &stream, CReadClassMemberHook *hook) const
Definition: objectiter.cpp:96
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
CBioseq_set_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
TClass GetClass(void) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
bool AddSeqdesc(CSeqdesc &d) const
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
bool IsSetClass(void) const
bool AddSeqdesc(CSeqdesc &d) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &d) const
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCout
Definition: ncbistre.hpp:543
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
bool NcbiStreamCopy(CNcbiOstream &os, CNcbiIstream &is)
Copy the entire contents of stream "is" to stream "os".
Definition: ncbistre.cpp:211
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
void Run(void)
Enter the main loop.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
@ eStart
Start timer immediately after creating.
Definition: ncbitime.hpp:1942
LOG LOG_cxx2c(void)
Create LOG on top of C++ Toolkit CNcbiDiag.
void CORE_SetREG(REG rg)
Set the registry (no registry if "rg" is passed zero) – to be used by the core internals.
Definition: ncbi_util.c:692
REG REG_cxx2c(IRWRegistry *reg, bool pass_ownership=false)
Convert a C++ Toolkit registry object to a REG registry.
void CORE_SetLOG(LOG lg)
Set the log handle (no logging if "lg" is passed zero) – to be used by the core internals (CORE LOG).
Definition: ncbi_util.c:123
static const char label[]
bool IsSetAffil(void) const
author affiliation Check if a value has been assigned to Affil data member.
Definition: Auth_list_.hpp:498
const TAffil & GetAffil(void) const
Get the Affil member data.
Definition: Author_.hpp:476
const TAffil & GetAffil(void) const
Get the Affil member data.
Definition: Auth_list_.hpp:510
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_sub_.hpp:357
bool IsSetAffil(void) const
Check if a value has been assigned to Affil data member.
Definition: Author_.hpp:464
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
Definition: Cit_sub_.hpp:345
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
Definition: BioSource_.hpp:447
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:397
void SetNum_rows(TNum_rows value)
Assign a value to Num_rows data member.
Definition: Seq_table_.hpp:402
TNum_rows GetNum_rows(void) const
Get the Num_rows member data.
Definition: Seq_table_.hpp:393
virtual void Reset(void)
Reset the whole object.
Definition: Seq_table_.cpp:57
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
TSource & SetSource(void)
Select the variant.
Definition: Seqdesc_.cpp:572
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
const TContact & GetContact(void) const
Get the Contact member data.
const TCit & GetCit(void) const
Get the Cit member data.
const TEntrys & GetEntrys(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsSetSub(void) const
Check if a value has been assigned to Sub data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
const TContact & GetContact(void) const
Get the Contact member data.
bool IsSetCit(void) const
citation for this submission Check if a value has been assigned to Cit data member.
bool IsSetContact(void) const
who to contact Check if a value has been assigned to Contact data member.
bool IsSetContact(void) const
WARNING: this will replace the above Check if a value has been assigned to Contact data member.
static CStopWatch sw
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
exit(2)
int i
yy_size_t n
Lightweight interface for getting lines of data with minimal memory copying.
static char * subname
Definition: mdb_load.c:26
constexpr auto add_item(T item, list< Ts... > l) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
Magic spell ;-) needed for some weird compilers... very empiric.
XML library namespace.
Definition: attributes.hpp:57
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
static Format format
Definition: njn_ioutil.cpp:53
std::istream & in(std::istream &in_, double &x_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
void PrintResults(CNcbiOstream &ostream, const vector< string > &idmap, CDbIndex::TSeqNum qnum, const vector< CSRSearch::SResultData > &results, const string &idstr1, const string &idstr2="")
C++ I/O stream wrappers to compress/decompress data on-the-fly.
#define row(bind, expected)
Definition: string_bind.c:73
Definition: smp.h:26
CRef< CScope > BuildScope(void)
Modified on Wed Apr 17 13:08:16 2024 by modify_doxy.py rev. 669887