NCBI C++ ToolKit
ssub_fork.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ssub_fork.cpp 92181 2020-12-22 18:07:28Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Justin Foley
27 *
28 * File Description:
29 * seq-submit splitter application
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbiapp.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbiexpt.hpp>
35 #include <serial/serial.hpp>
36 #include <serial/objistr.hpp>
37 #include <serial/objostr.hpp>
38 #include <serial/serial.hpp>
41 #include <corelib/ncbidiag.hpp>
44 #include <objects/seq/Bioseq.hpp>
45 #include <objects/seq/Seq_inst.hpp>
48 #include "splitter_exception.hpp"
49 #include <math.h>
50 
53 
56 
57 namespace seqsubmit_split
58 {
59 
60 typedef vector<CRef<CSeq_entry> > TSeqEntryArray;
61 typedef list<CRef<CSeq_entry> > TSeqEntryList;
62 
64 {
65 public:
66  virtual CRef<CSerialObject> BuildObject() const = 0;
67  virtual TSeqEntryList& GetListOfEntries(CSerialObject& obj) const = 0;
68 };
69 
71 {
72 
73 public:
74  void Init();
75  int Run();
76 
77 private:
78  CObjectIStream* xInitInputStream() const; // Why CObjectIStream instead of NcbiIstream here?
79 
80  CObjectOStream* xInitOutputStream(const string& output_stub,
81  const TSeqPos output_index,
82  const TSeqPos pad_width,
83  const string& output_extension,
84  const bool binary) const;
85 
86 
88 
89  bool xTryReadInputFile(CRef<CSerialObject>& obj) const;
90 
91  bool xTryProcessSeqEntries(const CObjectHelper& builder, TSeqEntryArray& seq_entry_array,
92  list<CRef<CSerialObject>>& output_array) const;
93 
95  list<CRef<CSerialObject>>& output_array) const;
97  list<CRef<CSerialObject>>& output_array) const;
98 
99  void xWrapSeqEntries(TSeqEntryArray& seq_entry_array,
100  const TSeqPos& bundle_size,
101  TSeqEntryArray& wrapped_entry_array) const;
102 
104  TSeqEntryArray& seq_entry_array) const;
105 
106  void xFlattenSeqEntry(CSeq_entry& seq_entry,
107  const CSeq_descr& seq_descr,
108  TSeqEntryArray& seq_entry_array,
109  bool process_set_of_any_type = false) const;
110 
111  void xMergeSeqDescr(const CSeq_descr& src, CSeq_descr& dst) const;
112 
113 
114  string xGetFileExtension(const string& filename) const;
115 };
116 
117 
119 {
120  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
121  arg_desc->SetUsageContext(
122  GetArguments().GetProgramBasename(),
123  "Split a single large instance of Seq-submit into smaller instances",
124  false);
125  // input
126  {
127  arg_desc->AddKey("i", "InputFile",
128  "Filename for asn.1 input",
130  }
131 
132  {
133  arg_desc->AddDefaultKey("b",
134  "BOOLEAN",
135  "Input asn.1 file in binary mode [T/F]",
137  "F");
138 
139  // arg_desc->SetConstraint("b", &(*new CArgAllow_Strings, "T", "F"));
140  }
141 
142  // output
143  {
144  string description = "Filename stub for asn.1 outputs.\n";
145  description.append("Will append consecutive numbers and a file-type extension to this stub");
146 
147  arg_desc->AddKey("o", "OutputFile",
148  description,
150  }
151 
152  {
153  arg_desc->AddDefaultKey("s",
154  "BOOLEAN",
155  "Output asn.1 files in binary mode [T/F]",
157  "F");
158 
159  }
160 
161  {
162  arg_desc->AddDefaultKey("w",
163  "BOOLEAN",
164  "Wrap output Seq-entries within Seq-submits with Genbank set [T/F]",
166  "F");
167  }
168 
169  // logfile alias
170  {
171  arg_desc->AddAlias("l", "logfile");
172  }
173 
174  // parameters
175  {
176  arg_desc->AddDefaultKey("n",
177  "POSINT",
178  "Number of records in output Seq-submits",
180  "1");
181 
182  string description = "Generate output in sorted order \n";
183  description.append(" 0 - unsorted (in order of appearance in input file;\n");
184  description.append(" 1 - by sequence length from longest to shortest;\n");
185  description.append(" 2 - by sequence length from shortest to longest;\n");
186  description.append(" 3 - by contig/scaffold id.");
187 
188  arg_desc->AddDefaultKey("r",
189  "INTEGER",
190  description,
192  "0");
193 
194  arg_desc->SetConstraint("r",
195  &(*new CArgAllow_Strings,
196  "0", "1", "2", "3"));
197  }
198 
199  // treat input as Seq-entry
200  {
201  arg_desc->AddFlag("e", "Treat input as Seq-entry");
202  }
203 
204  SetupArgDescriptions(arg_desc.release());
205 }
206 
207 
209 {
210  const CArgs& args = GetArgs();
211  if (args["e"].AsBoolean() && args["w"].AsBoolean()) {
212  string err_msg = "Incompatible parameters: do not use -w when -e is used";
213  ERR_POST(err_msg);
214  return 1;
215  }
216 
217  CRef<CSerialObject> input_obj;
218  if (!xTryReadInputFile(input_obj)) {
219  string err_msg = "Could not read input file";
220  ERR_POST(err_msg);
221  return 1;
222  }
223 
224 
225  bool input_as_seq_entry = args["e"].AsBoolean();
226  list<CRef<CSerialObject>> output_array;
227 
228  bool good = input_as_seq_entry ? xTryProcessSeqEntry(input_obj, output_array) : xTryProcessSeqSubmit(input_obj, output_array);
229 
230  if (!good) {
231  string err_msg = "Could not process input file";
232  ERR_POST(err_msg);
233  return 1;
234  }
235 
236  const string output_stub = args["o"].AsString();
237 
238 
239  // Output files should have the same extension as the input file
240  string output_extension = "";
241  if (args["i"]) {
242  output_extension = xGetFileExtension(args["i"].AsString());
243  }
244 
245  int output_index = 0;
246  unique_ptr<CObjectOStream> ostr;
247  bool binary = args["s"].AsBoolean();
248 
249  const TSeqPos pad_width = static_cast<TSeqPos>(log10(output_array.size())) + 1;
250 
251  try {
252  for (auto& it: output_array) {
253  ++output_index;
254  ostr.reset(xInitOutputStream(output_stub,
255  output_index,
256  pad_width,
257  output_extension,
258  binary
259  ));
260  *ostr << *it;
261  }
262  }
263  catch (const CException& e) {
264  string err_msg = "Error while output results. ";
265  err_msg += e.what();
266  ERR_POST(err_msg);
267  return 1;
268  }
269 
270  return 0;
271 }
272 
273 
274 string CSeqSubSplitter::xGetFileExtension(const string& filename) const
275 {
276  string extension = "";
277  vector<string> arr;
278  NStr::Split(filename,".",arr);
279  if (arr.size() > 1) {
280  extension = arr.back();
281  }
282 
283  return extension;
284 }
285 
286 
288  const string& output_stub,
289  const TSeqPos output_index,
290  const TSeqPos pad_width,
291  const string& output_extension,
292  const bool binary) const
293 {
294  if (output_stub.empty()) {
296  eEmptyOutputStub,
297  "Output stub not specified");
298  }
299 
300  string padded_index;
301  {
302  const string padding = string(pad_width, '0');
303  padded_index = NStr::IntToString(output_index);
304 
305  if (padded_index.size() < pad_width) {
306  padded_index = padding.substr(0, pad_width - padded_index.size()) + padded_index;
307  }
308  }
309 
310  string filename = output_stub + "_" + padded_index + "." + output_extension;
311 
313  if (binary) {
314  serial = eSerial_AsnBinary;
315  }
316  CObjectOStream* pOstr = CObjectOStream::Open(filename, serial);
317 
318  if (!pOstr) {
320  eOutputError,
321  "Unable to open output file:" + filename);
322  }
323  return pOstr;
324 }
325 
327 {
329  if (GetArgs()["e"].AsBoolean()) {
330  ret.Reset(new CSeq_entry);
331  }
332  else {
333  ret.Reset(new CSeq_submit);
334  }
335 
336  return ret;
337 }
338 
340 {
341  unique_ptr<CObjectIStream> istr;
342  istr.reset(xInitInputStream());
343 
344  CRef<CSerialObject> input_obj = xGetInputObject();
345 
346  bool ret = true;
347  try {
348  istr->Read( { input_obj, input_obj->GetThisTypeInfo() } );
349  obj = input_obj;
350  }
351  catch (CException&) {
352  ret = false;
353  }
354 
355  return ret;
356 }
357 
358 
359 // I guess that I could make Comparison classes subclasses of CSeqSubSplitter
360 template<class Derived>
361 struct SCompare
362 {
363  bool operator()(const CRef<CSeq_entry>& e1, const CRef<CSeq_entry>& e2) const
364  {
365  const CBioseq& b1 = e1->IsSeq() ? e1->GetSeq() : e1->GetSet().GetNucFromNucProtSet();
366 
367  const CBioseq& b2 = e2->IsSeq() ? e2->GetSeq() : e2->GetSet().GetNucFromNucProtSet();
368 
369  return static_cast<const Derived*>(this)->compare_seq(b1, b2);
370  }
371 
372  bool compare_seq(const CBioseq& b1, const CBioseq& b2) const
373  {
374  return true;
375  }
376 };
377 
378 
379 struct SLongestFirstCompare : public SCompare<SLongestFirstCompare>
380 {
381  bool compare_seq(const CBioseq& b1, const CBioseq& b2) const
382  {
383  if (!b1.IsSetInst() || !b2.IsSetInst()) {
384  NCBI_THROW(CSeqSubSplitException, eInvalidSeqinst, "Bioseq inst not set");
385  }
386 
387  if (!b1.GetInst().IsSetLength() || // Length must be set
388  !b2.GetInst().IsSetLength())
389  {
390  return true;
391  }
392 
393  return (b1.GetInst().GetLength() > b2.GetInst().GetLength());
394  }
395 };
396 
397 
398 struct SShortestFirstCompare : public SCompare<SShortestFirstCompare>
399 {
400  bool compare_seq(const CBioseq& b1, const CBioseq& b2) const
401  {
402  if (!b1.IsSetInst() || !b2.IsSetInst()) {
403  NCBI_THROW(CSeqSubSplitException, eInvalidSeqinst, "Bioseq inst not set");
404  }
405 
406  if (!b1.GetInst().IsSetLength() || // Length must be set
407  !b2.GetInst().IsSetLength())
408  {
409  return true;
410  }
411  return (b1.GetInst().GetLength() < b2.GetInst().GetLength());
412  }
413 };
414 
415 
416 struct SIdCompare : public SCompare<SIdCompare>
417 {
418 
420  {
421  const CBioseq::TId& ids = bioseq.GetId();
422 
423  ITERATE(CBioseq::TId, id_itr, ids) {
424  CConstRef<CSeq_id> id = *id_itr;
425 
426  if (id && id->IsGeneral()) {
427  return id;
428  }
429  }
430 
431  return CConstRef<CSeq_id>();
432  }
433 
434  CConstRef<CSeq_id> xGetId(const CBioseq& bioseq) const
435  {
436  if (bioseq.GetLocalId()) {
437  return CConstRef<CSeq_id>(bioseq.GetLocalId());
438  }
439  return xGetGeneralId(bioseq);
440  }
441 
442 
443  bool compare_seq(const CBioseq& b1, const CBioseq& b2) const
444  {
445  if (!b1.IsSetId() || !b2.IsSetId()) {
447  eInvalidSeqid,
448  "Bioseq id not set");
449  }
450 
451  CConstRef<CSeq_id> id1 = xGetId(b1);
452  CConstRef<CSeq_id> id2 = xGetId(b2);
453 
454  if (id1.IsNull() || id2.IsNull()) {
456  eSeqIdError,
457  "Cannot access bioseq id");
458  }
459 
460  if (id1->IsGeneral() != id2->IsGeneral()) {
462  eSeqIdError,
463  "Inconsistent bioseq ids");
464  }
465 
466  return (id1->CompareOrdered(*id2) < 0);
467  }
468 };
469 
470 
471 
473  const TSeqPos& bundle_size,
474  TSeqEntryArray& wrapped_entry_array) const
475 {
476  TSeqEntryArray::iterator seq_entry_it = seq_entry_array.begin();
477  while (seq_entry_it != seq_entry_array.end()) {
478  CRef<CSeq_entry> seq_entry = Ref(new CSeq_entry());
480  for (TSeqPos i=0; i<bundle_size; ++i) {
481  seq_entry->SetSet().SetSeq_set().push_back(*seq_entry_it);
482  ++seq_entry_it;
483  if (seq_entry_it == seq_entry_array.end()) {
484  break;
485  }
486  }
487  wrapped_entry_array.push_back(seq_entry);
488  }
489 }
490 
491 
493  list<CRef<CSerialObject>>& output_array) const
494 {
495  const CArgs& args = GetArgs();
496 
497  TSeqPos bundle_size = args["n"].AsInteger();
498  TSeqPos sort_order = args["r"].AsInteger();
499  bool wrap_entries = args["w"].AsBoolean(); // Wrap the output Seq-entries
500  // within a Seq-submit in a Genbank set
501 
502  switch(sort_order) {
503  default:
505  eInvalidSortOrder,
506  "Unrecognized sort order: " + NStr::IntToString(sort_order));
507  case 0:
508  break;
509  case 1:
510  stable_sort(seq_entry_array.begin(), seq_entry_array.end(), SLongestFirstCompare());
511  break;
512  case 2:
513  stable_sort(seq_entry_array.begin(), seq_entry_array.end(), SShortestFirstCompare());
514  break;
515  case 3:
516  stable_sort(seq_entry_array.begin(), seq_entry_array.end(), SIdCompare());
517  break;
518  }
519 
520  if (wrap_entries) { // wrap the entries inside a genbank set
521  TSeqEntryArray wrapped_entry_array;
522  xWrapSeqEntries(seq_entry_array, bundle_size, wrapped_entry_array);
523  for(size_t i=0; i<wrapped_entry_array.size(); ++i) {
524  CRef<CSerialObject> seqsub = helper.BuildObject();
525  helper.GetListOfEntries(*seqsub).push_back(wrapped_entry_array[i]);
526  output_array.push_back(seqsub);
527  }
528  } else {
529  TSeqEntryArray::iterator seq_entry_it = seq_entry_array.begin();
530  while (seq_entry_it != seq_entry_array.end()) {
531 
532  CRef<CSerialObject> seqsub = helper.BuildObject();
533  for(TSeqPos i=0; i<bundle_size; ++i) {
534  helper.GetListOfEntries(*seqsub).push_back(*seq_entry_it);
535  ++seq_entry_it;
536  if (seq_entry_it == seq_entry_array.end()) {
537  break;
538  }
539  }
540  output_array.push_back(seqsub);
541  }
542  }
543  return true;
544 }
545 
546 
548 {
549 public:
551  m_seq_submit(seq_submit)
552  {}
553 
555  {
556  CRef<CSeq_submit> seqsub = Ref(new CSeq_submit());
557  seqsub->SetSub(m_seq_submit.SetSub());
558 
559  return seqsub;
560  }
561 
563  {
564  CSeq_submit& sub = dynamic_cast<CSeq_submit&>(obj);
565  return sub.SetData().SetEntrys();
566  }
567 
568 private:
570 };
571 
572 
574 {
575  CSeq_submit* input_sub = dynamic_cast<CSeq_submit*>(obj.GetPointer());
576  if (input_sub == nullptr || !input_sub->IsEntrys()) {
577  ERR_POST("Seq-submit does not contain any entries");
578  return false;
579  }
580 
581  TSeqEntryArray seq_entry_array;
582 
583  xFlattenSeqEntrys(input_sub->SetData().SetEntrys(), seq_entry_array);
584  return xTryProcessSeqEntries(CSeqSubmitHelper(*input_sub), seq_entry_array, output_array);
585 }
586 
587 
589 {
590 public:
592  m_seq_entry(seq_entry)
593  {}
594 
596  {
597  CRef<CSeq_entry> seqentry = Ref(new CSeq_entry());
598  seqentry->SetSet().SetClass(m_seq_entry.GetSet().GetClass());
599 
600  if (m_seq_entry.IsSetDescr()) {
601  seqentry->SetDescr().Assign(m_seq_entry.GetDescr());
602  }
603  return seqentry;
604  }
605 
607  {
608  CSeq_entry& entry = dynamic_cast<CSeq_entry&>(obj);
609  return entry.SetSet().SetSeq_set();
610  }
611 
612 private:
614 };
615 
616 
618 {
619  CSeq_entry* input_entry = dynamic_cast<CSeq_entry*>(obj.GetPointer());
620  if (input_entry == nullptr || !input_entry->IsSet()) {
621  ERR_POST("Seq-entry does not contain any entries");
622  return false;
623  }
624 
625  TSeqEntryArray seq_entry_array;
626  CSeq_descr seq_descr;
627 
628  CRef<CSeq_descr> upper_level_descr;
629  if (input_entry->IsSetDescr()) {
630  upper_level_descr.Reset(new CSeq_descr);
631  upper_level_descr->Assign(input_entry->GetDescr());
632  input_entry->SetDescr().Reset();
633  }
634 
635  xFlattenSeqEntry(*input_entry, seq_descr, seq_entry_array, true);
636  if (upper_level_descr.NotEmpty()) {
637  input_entry->SetDescr().Assign(*upper_level_descr);
638  upper_level_descr.Reset();
639  }
640 
641  return xTryProcessSeqEntries(CSeqEntryHelper(*input_entry), seq_entry_array, output_array);
642 }
643 
644 
646 {
647  const CArgs& args = GetArgs();
648  if (!args["i"]) {
650  eInputError,
651  "Input file unspecified");
652  }
653 
655  if (args["b"].AsBoolean()) {
656  serial = eSerial_AsnBinary;
657  }
658 
659  string infile_str = args["i"].AsString();
660  CNcbiIstream* pInputStream = new CNcbiIfstream(infile_str.c_str(), ios::in | ios::binary);
661 
662  if (pInputStream->fail())
663  {
665  eInputError,
666  "Could not create input stream for \"" + infile_str + "\"");
667  }
668 
669 
670  CObjectIStream* p_istream = CObjectIStream::Open(serial,
671  *pInputStream,
673 
674  if (!p_istream) {
676  eInputError,
677  "Unable to open input file \"" + infile_str + "\"");
678 
679  }
680 
681  return p_istream;
682 }
683 
685 {
686  static constexpr CSeqdesc::E_Choice MULTIPLE_ALLOWED[] = {
697  };
698 
699  return find(begin(MULTIPLE_ALLOWED), end(MULTIPLE_ALLOWED), choice) != end(MULTIPLE_ALLOWED);
700 }
701 
702 static bool NeedToInclude(const CSeqdesc& descr, const CSeq_descr& dst)
703 {
704  bool ret = true;
705 
706  if (dst.IsSet()) {
707 
708  CSeqdesc::E_Choice choice = descr.Which();
709 
710  for (auto& dst_descr: dst.Get()) {
711  if (dst_descr->Which() == choice) {
712 
713  if (dst_descr->Equals(descr)) {
714  ret = false;
715  break;
716  }
717  }
718  }
719  }
720  return ret;
721 }
722 
724 {
725  if (!src.IsSet()) {
726  return;
727  }
728 
729  for (auto& descr: src.Get())
730  {
731  CSeqdesc::E_Choice choice = descr->Which();
732  if (MultipleAllowed(choice)) {
733 
734  if (NeedToInclude(*descr, dst)) {
735  dst.Set().push_back(descr);
736  }
737  }
738  else {
739 
740  if (find_if(dst.Set().begin(), dst.Set().end(), [choice](const CRef<CSeqdesc>& cur_descr) { return cur_descr->Which() == choice; } ) == dst.Set().end()) {
741  dst.Set().push_back(descr);
742  }
743  }
744  }
745 }
746 
747 
749  TSeqEntryArray& seq_entry_array) const
750 {
752  CSeq_entry& seq_entry = **it;
753  CSeq_descr seq_descr;
754  xFlattenSeqEntry(seq_entry, seq_descr, seq_entry_array);
755  }
756 }
757 
758 static bool NeedToProcess(const CSeq_entry& entry, bool set_of_any_type_allowed)
759 {
760  if (entry.IsSeq()) {
761  return false;
762  }
763 
764  bool set_of_allowed_type = set_of_any_type_allowed;
765  if (!set_of_any_type_allowed) {
766  set_of_allowed_type = entry.GetSet().IsSetClass() &&
768  }
769 
770  return set_of_allowed_type;
771 }
772 
774  const CSeq_descr& seq_descr,
775  TSeqEntryArray& seq_entry_array,
776  bool process_set_of_any_type) const
777 {
778  if (NeedToProcess(entry, process_set_of_any_type)) {
779 
780  // Class has to be either genbank or pub_set,
781  // or it can be a top level Seq-entry containing set of any type.
782  // These sets should not have annotations
783  const CBioseq_set& seq_set = entry.GetSet();
784 
785  if (seq_set.IsSetAnnot()) {
786 
787  string class_string = (seq_set.GetClass() == CBioseq_set::eClass_genbank)
788  ? "Genbank set" : "Pub-set";
789 
790  string err_msg = "Wrapper " + class_string + "has non-empty annotation.";
791 
792  NCBI_THROW(CSeqSubSplitException, eInvalidAnnot, err_msg);
793  }
794 
795  CSeq_descr new_descr;
796  ITERATE(CSeq_descr::Tdata, it, seq_descr.Get()) {
797  new_descr.Set().push_back(*it);
798  }
799 
800  if (entry.GetSet().IsSetDescr()) {
801  xMergeSeqDescr(entry.GetSet().GetDescr(), new_descr);
802  }
803 
805  xFlattenSeqEntry(**it, new_descr, seq_entry_array);
806  }
807  }
808  else {
809  CRef<CSeq_entry> new_entry = Ref(&entry);
810 
811  if (seq_descr.IsSet()) {
812  CSeq_descr& entry_descr = (entry.IsSeq())
813  ? entry.SetSeq().SetDescr()
814  : entry.SetSet().SetDescr();
815 
816 
817  xMergeSeqDescr(seq_descr, entry_descr);
818  }
819 
820  seq_entry_array.push_back(new_entry);
821  }
822 }
823 
824 } // namespace
825 
827 
828 
830 
831 int main(int argc, const char** argv)
832 {
833  return seqsubmit_split::CSeqSubSplitter().AppMain(argc, argv, 0, eDS_ToStderr, 0);
834 }
835 
User-defined methods of the data storage class.
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
const CBioseq & GetNucFromNucProtSet(void) const
Definition: Bioseq_set.cpp:227
const CSeq_id * GetLocalId() const
Find a local ID if present.
Definition: Bioseq.cpp:336
CObjectIStream –.
Definition: objistr.hpp:93
CObjectOStream –.
Definition: objostr.hpp:83
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
const CSeq_descr & GetDescr(void) const
Definition: Seq_entry.cpp:120
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
bool IsSetDescr(void) const
Definition: Seq_entry.cpp:106
bool IsEntrys(void) const
Definition: Seq_submit.cpp:54
Base class for all serializable objects.
Definition: serialbase.hpp:150
virtual CRef< CSerialObject > BuildObject() const =0
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const =0
virtual CRef< CSerialObject > BuildObject() const
Definition: ssub_fork.cpp:595
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const
Definition: ssub_fork.cpp:606
CSeqEntryHelper(CSeq_entry &seq_entry)
Definition: ssub_fork.cpp:591
void xFlattenSeqEntrys(CSeq_submit::TData::TEntrys &entries, TSeqEntryArray &seq_entry_array) const
Definition: ssub_fork.cpp:748
bool xTryReadInputFile(CRef< CSerialObject > &obj) const
Definition: ssub_fork.cpp:339
string xGetFileExtension(const string &filename) const
Definition: ssub_fork.cpp:274
bool xTryProcessSeqSubmit(CRef< CSerialObject > &obj, list< CRef< CSerialObject >> &output_array) const
Definition: ssub_fork.cpp:573
void xFlattenSeqEntry(CSeq_entry &seq_entry, const CSeq_descr &seq_descr, TSeqEntryArray &seq_entry_array, bool process_set_of_any_type=false) const
Definition: ssub_fork.cpp:773
bool xTryProcessSeqEntries(const CObjectHelper &builder, TSeqEntryArray &seq_entry_array, list< CRef< CSerialObject >> &output_array) const
Definition: ssub_fork.cpp:492
CObjectOStream * xInitOutputStream(const string &output_stub, const TSeqPos output_index, const TSeqPos pad_width, const string &output_extension, const bool binary) const
Definition: ssub_fork.cpp:287
void xMergeSeqDescr(const CSeq_descr &src, CSeq_descr &dst) const
Definition: ssub_fork.cpp:723
CRef< CSerialObject > xGetInputObject() const
Definition: ssub_fork.cpp:326
void Init()
Initialize the application.
Definition: ssub_fork.cpp:118
void xWrapSeqEntries(TSeqEntryArray &seq_entry_array, const TSeqPos &bundle_size, TSeqEntryArray &wrapped_entry_array) const
Definition: ssub_fork.cpp:472
int Run()
Run the application.
Definition: ssub_fork.cpp:208
CObjectIStream * xInitInputStream() const
Definition: ssub_fork.cpp:645
bool xTryProcessSeqEntry(CRef< CSerialObject > &obj, list< CRef< CSerialObject >> &output_array) const
Definition: ssub_fork.cpp:617
virtual CRef< CSerialObject > BuildObject() const
Definition: ssub_fork.cpp:554
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const
Definition: ssub_fork.cpp:562
CSeqSubmitHelper(CSeq_submit &seq_submit)
Definition: ssub_fork.cpp:550
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:832
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1208
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eTakeOwnership
An object can take ownership of another.
Definition: ncbi_types.h:136
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
Definition: ncbiargs.hpp:590
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
string
Definition: cgiapp.hpp:690
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
@ eDS_ToStderr
To standard error stream.
Definition: ncbidiag.hpp:1782
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
int CompareOrdered(const CSeq_id &sid2) const
Definition: Seq_id.cpp:486
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_genbank
converted genbank
@ eClass_pub_set
all the seqs from a single publication
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
Definition: Seq_inst_.hpp:640
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
@ e_Het
cofactor, etc associated but not bound
Definition: Seqdesc_.hpp:132
@ e_Num
a numbering system
Definition: Seqdesc_.hpp:118
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Dbxref
xref to other databases
Definition: Seqdesc_.hpp:126
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Modelev
model evidence for XM records
Definition: Seqdesc_.hpp:135
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
@ e_Name
a name for this sequence
Definition: Seqdesc_.hpp:114
list< CRef< CSeq_entry > > TEntrys
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
n padding
int i
static bool MultipleAllowed(CSeqdesc::E_Choice choice)
Definition: ssub_fork.cpp:684
static bool NeedToInclude(const CSeqdesc &descr, const CSeq_descr &dst)
Definition: ssub_fork.cpp:702
vector< CRef< CSeq_entry > > TSeqEntryArray
Definition: ssub_fork.cpp:60
static bool NeedToProcess(const CSeq_entry &entry, bool set_of_any_type_allowed)
Definition: ssub_fork.cpp:758
list< CRef< CSeq_entry > > TSeqEntryList
Definition: ssub_fork.cpp:61
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines NCBI C++ diagnostic APIs, classes, and macros.
Defines NCBI C++ exception handling.
T log10(T x_)
std::istream & in(std::istream &in_, double &x_)
USING_SCOPE(objects)
int main(int argc, const char **argv)
Definition: ssub_fork.cpp:831
USING_NCBI_SCOPE
Definition: ssub_fork.cpp:829
bool operator()(const CRef< CSeq_entry > &e1, const CRef< CSeq_entry > &e2) const
Definition: ssub_fork.cpp:363
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
Definition: ssub_fork.cpp:372
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
Definition: ssub_fork.cpp:443
CConstRef< CSeq_id > xGetGeneralId(const CBioseq &bioseq) const
Definition: ssub_fork.cpp:419
CConstRef< CSeq_id > xGetId(const CBioseq &bioseq) const
Definition: ssub_fork.cpp:434
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
Definition: ssub_fork.cpp:381
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
Definition: ssub_fork.cpp:400
static wxAcceleratorEntry entries[3]
Modified on Fri Sep 20 14:58:21 2024 by modify_doxy.py rev. 669887