NCBI C++ ToolKit
asn_cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: asn_cleanup.cpp 101210 2023-11-16 14:19:11Z gotvyans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aaron Ucko, Mati Shomrat, Colleen Bollin, NCBI
27  *
28  * File Description:
29  * runs ExtendedCleanup on ASN.1 files
30  *
31  * ===========================================================================
32  */
33 #include <ncbi_pch.hpp>
34 #include <common/ncbi_source_ver.h>
35 #include <corelib/ncbiapp.hpp>
37 #include <objmgr/util/sequence.hpp>
38 
40 
45 
50 
53 #include "huge_file_cleanup.hpp"
54 
55 #include "read_hooks.hpp"
56 #include "bigfile_processing.hpp"
57 
58 #include <common/ncbi_revision.h>
59 
60 #ifndef NCBI_SC_VERSION
61 # define THIS_IS_TRUNK_BUILD
62 #elif (NCBI_SC_VERSION == 0)
63 # define THIS_IS_TRUNK_BUILD
64 #endif
65 
68 
71 
77 };
78 
79 struct TThreadState {
81  bool m_IsMultiSeq = false;
83 };
84 
85 class CCleanupApp :
86  public CNcbiApplication,
88  public ISubmitBlockHandler,
89  public IProcessorCallback
90 {
91 public:
92  CCleanupApp();
93  void Init() override;
94  int Run() override;
95 
96  bool HandleSubmitBlock(CSubmit_block& block) override;
97  bool HandleSeqEntry(CRef<CSeq_entry>& se) override;
99  bool HandleSeqID(const string& seqID);
100 
101  // IProcessorCallback interface functionality
102  void Process(CRef<CSerialObject>& obj) override;
103 
104 private:
105  // types
106 
107  void x_OpenOStream(const string& filename, const string& dir = kEmptyStr, bool remove_orig_dir = true);
108  void x_CloseOStream();
109  bool x_ProcessBigFile(unique_ptr<CObjectIStream>& is, TTypeInfo asn_type);
110  void x_ProcessOneFile(unique_ptr<CObjectIStream>& is, EProcessingMode mode, TTypeInfo asn_type);
111  void x_ProcessOneFile(const string& filename);
112  void x_ProcessOneDirectory(const string& dirname, const string& suffix);
113 
114  bool x_ProcessHugeFile(edit::CHugeFileProcess& process);
115  bool x_ProcessHugeFileBlob(edit::CHugeFileProcess& process);
116  CConstRef<CSerialObject> x_ProcessTraditionally(edit::CHugeAsnReader& reader);
117  void x_ProcessTraditionally(edit::CHugeFileProcess& process);
118 
119  void x_FeatureOptionsValid(const string& opt);
120  void x_KOptionsValid(const string& opt);
121  void x_XOptionsValid(const string& opt);
122  bool x_ProcessFeatureOptions(const string& opt, CSeq_entry_Handle seh);
124  bool x_ProcessXOptions(const string& opt, CSeq_entry_Handle seh, Uint4 options);
125  bool x_GFF3Batch(CSeq_entry_Handle seh);
130  };
132 
133  bool x_FixCDS(CSeq_entry_Handle seh, Uint4 options, const string& missing_prot_name);
135  bool x_BasicAndExtended(CSeq_entry_Handle entry, const string& label, Uint4 options = 0);
136 
137  bool x_ReportChanges(const string_view prefix, CCleanupChangeCore changes);
138 
139  // template<typename T> void x_WriteToFile(const T& s);
140 
141  // data
142  unique_ptr<edit::CRemoteUpdater> m_remote_updater;
143 
144  unique_ptr<CObjectOStream> m_Out; // output
145  CRef<CObjectManager> m_Objmgr; // Object Manager
146  bool m_do_basic = false;
147  bool m_do_extended = false;
149  bool m_IsHugeSet = false;
150 };
151 
152 
154 {
156 }
157 
159 {
160  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
161  arg_desc->SetUsageContext("", "Perform ExtendedCleanup on an ASN.1 Seq-entry into a flat report");
162 
163  // input
164  {
165  // name
166  arg_desc->AddOptionalKey("i", "InputFile",
167  "Input file name", CArgDescriptions::eInputFile);
168 
169  // input file serial format (AsnText\AsnBinary\XML, default: AsnText)
170  arg_desc->AddOptionalKey("serial", "SerialFormat", "Obsolete; Input file format is now autodetected",
172 
173  // output file serial format (AsnText\AsnBinary\XML, default: AsnText)
174  arg_desc->AddOptionalKey("outformat", "OutputSerialFormat", "Output file format",
176  arg_desc->SetConstraint("outformat", &(*new CArgAllow_Strings,
177  "text", "binary", "XML", "JSON"));
178 
179  // id
180  arg_desc->AddOptionalKey("id", "ID",
181  "Specific ID to display", CArgDescriptions::eString);
182 
183  // input type:
184  arg_desc->AddOptionalKey("type", "AsnType", "Obsolete; ASN.1 object type is now autodetected",
186 
187  // path
188  arg_desc->AddOptionalKey("indir", "path", "Path to files", CArgDescriptions::eDirectory);
189 
190  // suffix
191  arg_desc->AddDefaultKey("x", "suffix", "File Selection Suffix", CArgDescriptions::eString, ".ent");
192 
193  // results
194  arg_desc->AddOptionalKey("outdir", "results", "Path for Results", CArgDescriptions::eDirectory);
195  }
196 
197  // batch processing
198  {
199  arg_desc->AddFlag("batch", "Process NCBI release file (Deprecated)",
201  }
202 
203  // big file processing
204  {
205  arg_desc->AddFlag("bigfile", "Process big files containing many bioseqs");
206  }
207 
208  // output
209  {
210  // name
211  arg_desc->AddOptionalKey("o", "OutputFile",
212  "Output file name", CArgDescriptions::eOutputFile);
213  }
214 
215  // normal cleanup options (will replace -nocleanup and -basic)
216  {
217  arg_desc->AddOptionalKey("K", "Cleanup", "Systemic Cleaning Options\n"
218  "\tb Basic\n"
219  "\ts Extended\n"
220  "\tn Normalize Descriptor Order\n"
221  "\tu Remove Cleanup User-object\n",
223  }
224 
225  // extra cleanup options
226  {
227  arg_desc->AddOptionalKey("F", "Feature", "Feature Cleaning Options\n"
228  "\tr Remove Redundant Gene xref\n"
229  "\ta Adjust for Missing Stop Codon\n"
230  "\tp Clear internal partials\n"
231  "\tz Delete or Update EC Numbers\n"
232  "\td Remove duplicate features\n",
234 
235  arg_desc->AddOptionalKey("X", "Miscellaneous", "Other Cleaning Options\n"
236  "\td Automatic Definition Line\n"
237  "\tw GFF/WGS Genome Cleanup\n"
238  "\tr Regenerate Definition Lines\n"
239  "\tb Batch Cleanup of Multireader Output\n"
240  "\ta Remove Assembly Gaps\n"
241  "\ti Make Influenza Small Genome Sets\n"
242  "\tf Make IRD misc_feats\n",
244 
245  arg_desc->AddFlag("T", "TaxonomyLookup");
246  }
247 
248  // misc
249  {
250  // no-cleanup
251  arg_desc->AddFlag("nocleanup",
252  "Do not perform extended data cleanup prior to formatting");
253  arg_desc->AddFlag("basic",
254  "Perform basic data cleanup prior to formatting");
255  arg_desc->AddFlag("noobj",
256  "Do not create Ncbi_cleanup object");
257 
258  // show progress
259  arg_desc->AddFlag("showprogress",
260  "List ID for which cleanup is occuring");
261  arg_desc->AddFlag("debug", "Save before.sqn");
262 
263  // huge mode
264  arg_desc->AddFlag("huge",
265  "Process file in huge files mode");
266  arg_desc->AddFlag("disable-huge",
267  "Explicitly disable huge files mode");
268  arg_desc->SetDependency("disable-huge",
270  "huge");
271  }
272 
273  // remote
275 
276  SetupArgDescriptions(arg_desc.release());
277 }
278 
279 
280 void CCleanupApp::x_FeatureOptionsValid(const string& opt)
281 {
282  if (NStr::IsBlank(opt)) {
283  return;
284  }
285  string unrecognized;
286  for (char c : opt) {
287  if (! isspace(c)) {
288  if (c != 'r' && c != 'a' && c != 'p' && c != 'z' && c != 'd') {
289  unrecognized += c;
290  }
291  }
292  }
293  if (! unrecognized.empty()) {
294  NCBI_THROW(CArgException, eInvalidArg, "Invalid -F arguments:" + unrecognized);
295  }
296 }
297 
298 
299 void CCleanupApp::x_KOptionsValid(const string& opt)
300 {
301  if (NStr::IsBlank(opt)) {
302  return;
303  }
304  string unrecognized;
305  for (char c : opt) {
306  if (! isspace(c)) {
307  if (c != 'b' && c != 's' && c != 'u' && c != 'n') {
308  unrecognized += c;
309  }
310  }
311  }
312  if (! unrecognized.empty()) {
313  NCBI_THROW(CArgException, eInvalidArg, "Invalid -K arguments:" + unrecognized);
314  }
315 }
316 
317 
318 void CCleanupApp::x_XOptionsValid(const string& opt)
319 {
320  if (NStr::IsBlank(opt)) {
321  return;
322  }
323  string unrecognized;
324  for (char c : opt) {
325  if (! isspace(c)) {
326  if (c != 'w' && c != 'r' && c != 'b' && c != 'a' &&
327  c != 'i' && c != 'f' && c != 'd') {
328  unrecognized += c;
329  }
330  }
331  }
332  if (! unrecognized.empty()) {
333  NCBI_THROW(CArgException, eInvalidArg, "Invalid -X arguments:" + unrecognized);
334  }
335 }
336 
337 
338 bool CCleanupApp::x_ProcessBigFile(unique_ptr<CObjectIStream>& is, TTypeInfo asn_type)
339 {
340  EBigFileContentType content_type = eContentUndefined;
341  if (asn_type == CSeq_entry::GetTypeInfo()) {
342  content_type = eContentSeqEntry;
343  } else if (asn_type == CBioseq_set::GetTypeInfo()) {
344  content_type = eContentBioseqSet;
345  } else if (asn_type == CSeq_submit::GetTypeInfo()) {
346  content_type = eContentSeqSubmit;
347  } else {
348  _ASSERT(0);
349  }
350 
351  return ProcessBigFile(*is, *m_Out, *this, content_type);
352 }
353 
354 void CCleanupApp::x_ProcessOneFile(unique_ptr<CObjectIStream>& is, EProcessingMode mode, TTypeInfo asn_type)
355 {
356  if (mode == eModeBatch) {
357  CGBReleaseFile in(*is.release());
358  in.RegisterHandler([this](CRef<CSeq_entry>& entry) -> bool
359  {
360  return HandleSeqEntry(entry);
361  });
362  in.Read(); // HandleSeqEntry will be called from this function
363  } else if (mode == eModeBigfile) {
364  x_ProcessBigFile(is, asn_type);
365  }
366 }
367 
368 static bool s_IsHugeMode(const CArgs& args, const CNcbiRegistry& cfg)
369 {
370  if (args["disable-huge"])
371  return false;
372  if (args["huge"])
373  return true;
374  return cfg.GetBool("asn_cleanup", "UseHugeFiles", false);
375 }
376 
377 void CCleanupApp::x_ProcessOneFile(const string& filename)
378 {
379  const CArgs& args = GetArgs();
380 
381  m_state = TThreadState();
384 
385  _ASSERT(! NStr::IsBlank(filename));
386 
387  if (args["type"]) {
388  cerr << "Warning: -type argument should not be used; ASN.1 object type is now autodetected." << endl;
389  }
390  if (args["serial"]) {
391  cerr << "Warning: -serial argument should not be used; Input file format is now autodetected." << endl;
392  }
393 
394  CCleanupHugeAsnReader::TOptions options{ 0 };
395  if (m_do_extended) {
397  }
398  if (args["noobj"]) {
400  }
401 
402  if (args["X"] && (NStr::Find(args["X"].AsString(), "i") != NPOS)) {
404  }
405 
406  edit::CHugeFileProcess huge_process(new CCleanupHugeAsnReader(options));
407  huge_process.OpenFile(filename);
408 
409  TTypeInfo asn_type = huge_process.GetFile().m_content;
410  if (! asn_type) {
411  string msg = "Unable to open input file " + filename + ". Content not recognized.";
412  NCBI_THROW(CArgException, eInvalidArg, msg);
413  }
414 
415  // need to set output if -o not specified
416  bool opened_output = false;
417 
418  if (! args["o"] && args["outdir"]) {
419  x_OpenOStream(filename, args["outdir"].AsString());
420  opened_output = true;
421  }
422 
424  m_state.m_IsMultiSeq = false;
425  if (asn_type == CBioseq::GetTypeInfo()) {
426  // always regular mode
427  mode = eModeRegular;
428  } else if (s_IsHugeMode(args, GetConfig())) {
430  } else if (args["batch"]) {
431  mode = eModeBatch;
432  } else if (args["bigfile"]) {
433  mode = eModeBigfile;
434  }
435 
436  if (mode == eModeHugefile) {
437  huge_process.OpenReader();
438  x_ProcessHugeFile(huge_process);
439  } else if (mode == eModeRegular) {
440  huge_process.OpenReader();
441  x_ProcessTraditionally(huge_process);
442  } else {
443  unique_ptr<CObjectIStream> is = huge_process.GetFile().MakeObjStream(0);
444  x_ProcessOneFile(is, mode, asn_type);
445  }
446 
447  m_state.m_changes += dynamic_cast<CCleanupHugeAsnReader&>(huge_process.GetReader()).GetChanges();
448 
449  if (opened_output) {
450  // close output file if we opened one
451  x_CloseOStream();
452  }
453 }
454 
455 void CCleanupApp::x_ProcessTraditionally(edit::CHugeFileProcess& process)
456 {
457  bool proceed = true;
458  size_t num_cleaned = 0;
459  auto reader = process.GetReader();
460 
461  while (proceed) {
462 
463  auto anytop = x_ProcessTraditionally(reader);
464  proceed = anytop;
465 
466  if (anytop) {
467  *m_Out << *anytop;
468  }
469 
470  if (proceed) {
471  ++num_cleaned;
472  }
473  }
474 
475  // if (num_cleaned == 0 || (! first_only && (is->GetFailFlags() & CObjectIStream::fEOF) != CObjectIStream::fEOF)) {
476  // NCBI_THROW(CArgException, eInvalidArg, "Unable to construct Seq-entry object");
477  // }
478 }
479 
481 {
482  auto anytop = reader.ReadAny();
483  if (anytop.Empty())
484  return {};
485 
486  CConstRef<CSerialObject> topobject;
487  CRef<CSeq_entry> topentry;
488 
489  if (anytop->GetThisTypeInfo() == CSeq_entry::GetTypeInfo()) {
491  HandleSeqEntry(topentry);
492  topobject = topentry;
493  } else if (anytop->GetThisTypeInfo() == CSeq_submit::GetTypeInfo()) {
494  auto submit = Ref(CTypeConverter<CSeq_submit>::SafeCast(anytop));
495  if (submit->IsEntrys()) {
496  topentry = submit->SetData().SetEntrys().front();
497  if (submit->IsSetSub()) {
498  HandleSubmitBlock(submit->SetSub());
499  }
500  submit->SetData().SetEntrys().clear();
501  HandleSeqEntry(topentry);
502  submit->SetData().SetEntrys().push_back(topentry);
503  }
504  topobject = submit;
505  } else if (anytop->GetThisTypeInfo() == CBioseq_set::GetTypeInfo()) {
506  auto bioset = Ref(CTypeConverter<CBioseq_set>::SafeCast(anytop));
507  topentry = Ref(new CSeq_entry);
508  topentry->SetSet(*bioset);
509  bioset.Reset();
510  HandleSeqEntry(topentry);
511  if (topentry->IsSet())
512  topobject.Reset(&topentry->GetSet());
513  else
514  topobject.Reset(&topentry->GetSeq());
515  } else if (anytop->GetThisTypeInfo() == CBioseq::GetTypeInfo()) {
516  auto bioseq = Ref(CTypeConverter<CBioseq>::SafeCast(anytop));
517  topentry = Ref(new CSeq_entry);
518  topentry->SetSeq(*bioseq);
519  bioseq.Reset();
520  HandleSeqEntry(topentry);
521  if (topentry->IsSet())
522  topobject.Reset(&topentry->GetSet());
523  else
524  topobject.Reset(&topentry->GetSeq());
525  } else {
526  //_ASSERT(0);
527  }
528  return topobject;
529 }
530 
531 bool CCleanupApp::x_ProcessHugeFileBlob(edit::CHugeFileProcess& process)
532 {
534 
535  CConstRef<CSerialObject> topobject; // top object is used to write output, can be submit, entry, bioseq, bioseq_set
536 
537  CRef<CSeq_submit> submit;
538  CRef<CSeq_entry> topentry;
539 
541 
542  topentry = Ref(new CSeq_entry);
543  auto& reader = dynamic_cast<CCleanupHugeAsnReader&>(process.GetReader());
544 
545  if (reader.GetTopEntry()) {
546  topentry->Assign(*reader.GetTopEntry());
547  } else {
549  topentry->SetSet().SetSeq_set().clear();
550  }
552 
553  if (reader.GetSubmitBlock()) {
554  submit.Reset(new CSeq_submit);
555  submit->SetSub().Assign(*reader.GetSubmitBlock());
556  submit->SetData().SetEntrys().clear();
557  submit->SetData().SetEntrys().push_back(topentry);
558  HandleSubmitBlock(submit->SetSub());
559  }
560 
561  if (submit)
562  topobject = submit;
563  else
564  topobject = topentry;
565 
566  writer.StartWriter(topobject);
567  try {
568  bool proceed = process.ForEachEntry(
570  [this, &writer](CSeq_entry_Handle seh) -> bool {
571  HandleSeqEntry(seh.GetEditHandle());
572  writer.PushNextEntry(seh.GetCompleteSeq_entry());
573  return true;
574  });
575  writer.FinishWriter();
576  return proceed;
577  } catch(...) {
578  writer.CancelWriter();
579  throw;
580  }
581 }
582 
583 bool CCleanupApp::x_ProcessHugeFile(edit::CHugeFileProcess& process)
584 {
585  return process.ForEachBlob([this](edit::CHugeFileProcess& p_process) -> bool {
586  m_state.m_IsMultiSeq = p_process.GetReader().IsMultiSequence();
587  if (m_state.m_IsMultiSeq) {
588  bool proceed = x_ProcessHugeFileBlob(p_process);
589  if (! proceed)
590  return false;
591  } else {
592  auto topobject = x_ProcessTraditionally(p_process.GetReader());
593  m_Out->ResetLocalHooks();
594  *m_Out << *topobject;
595  }
596  return true;
597  });
598 }
599 
600 
601 void CCleanupApp::x_ProcessOneDirectory(const string& dirname, const string& suffix)
602 {
603  CDir dir(dirname);
604 
605  string mask = "*" + suffix;
606  size_t num_files = 0;
607 
609  for (CDir::TEntry ii : files) {
610  if (ii->IsFile()) {
611  string fname = CDirEntry::MakePath(dirname, ii->GetName());
612  x_ProcessOneFile(fname);
613  num_files++;
614  }
615  }
616  if (num_files == 0) {
617  NCBI_THROW(CArgException, eInvalidArg, "No files found!");
618  }
619 }
620 
621 
623 {
624  // initialize conn library
626 
627  const CArgs& args = GetArgs();
628 
629  // flag validation
630  if (args["F"]) {
631  x_FeatureOptionsValid(args["F"].AsString());
632  }
633  if (args["K"]) {
634  x_KOptionsValid(args["K"].AsString());
635  }
636  if (args["X"]) {
637  x_XOptionsValid(args["X"].AsString());
638  }
639  if (args["batch"] && args["bigfile"]) {
640  NCBI_THROW(CArgException, eInvalidArg, "\"batch\" and \"bigfile\" arguments are incompatible. Only one of them may be used.");
641  }
642  if (args["X"] && args["bigfile"]) {
643  NCBI_THROW(CArgException, eInvalidArg, "\"X\" and \"bigfile\" arguments are incompatible. Only one of them may be used.");
644  }
645 
646  if (args["K"]) {
647  if (NStr::Find(args["K"].AsString(), "b") != string::npos) {
648  m_do_basic = true;
649  }
650  if (NStr::Find(args["K"].AsString(), "s") != string::npos) {
651  m_do_basic = true;
652  m_do_extended = true;
653  }
654  } else if (args["X"]) {
655  m_do_basic = true;
656  if (NStr::Find(args["X"].AsString(), "w") != string::npos) {
657  // Extended Cleanup is part of -X w
658  m_do_extended = false;
659  }
660  } else if (args["F"]) {
661  m_do_basic = true;
662  } else {
663  if (args["basic"]) {
664  m_do_basic = true;
665  }
666  if (! args["nocleanup"]) {
667  m_do_extended = true;
668  }
669  }
670 
671  // create object manager
673  if ( !m_Objmgr ) {
674  NCBI_THROW(CArgException, eInvalidArg, "Could not create object manager");
675  }
676 
678 
679  m_remote_updater.reset(new edit::CRemoteUpdater(nullptr));
680 
681  // need to set output (-o) if specified, if not -o and not -outdir need to use standard output
682  bool opened_output = false;
683  if (args["o"]) {
684  string abs_output_path = CDirEntry::CreateAbsolutePath(args["o"].AsString());
685  if (args["i"]) {
686  string fname = args["i"].AsString();
687  if (args["indir"]) {
688  fname = CDirEntry::MakePath(args["indir"].AsString(), fname);
689  }
690  if (abs_output_path == CDirEntry::CreateAbsolutePath(fname)) {
691  ERR_POST("Input and output files should be different");
692  return 1;
693  }
694  }
695  x_OpenOStream(args["o"].AsString(),
696  args["outdir"] ? args["outdir"].AsString() : kEmptyStr,
697  false);
698  opened_output = true;
699  } else if (! args["outdir"] || args["id"]) {
701  opened_output = true;
702  }
703 
704  if (args["id"]) {
705  string seqID = args["id"].AsString();
706  HandleSeqID(seqID);
707  } else if (args["i"]) {
708  string fname = args["i"].AsString();
709  if (args["indir"]) {
710  fname = CDirEntry::MakePath(args["indir"].AsString(), fname);
711  }
712  x_ProcessOneFile(fname);
713  } else if (args["outdir"]) {
714  x_ProcessOneDirectory(args["indir"].AsString(), args["x"].AsString());
715  } else {
716  cerr << "Error: stdin is no longer supported; please use -i" << endl;
717  }
718 
719  if (opened_output) {
720  // close output file if we opened one
721  x_CloseOStream();
722  }
723 
724  if (m_do_basic && !m_do_extended)
725  x_ReportChanges("BasicCleanup", m_state.m_changes);
726  if (m_do_extended)
727  x_ReportChanges("ExtendedCleanup", m_state.m_changes);
728 
729  #ifdef THIS_IS_TRUNK_BUILD
730  m_remote_updater->ReportStats(std::cerr);
731  #endif
732 
733  return 0;
734 }
735 
736 bool CCleanupApp::HandleSeqID(const string& seq_id)
737 {
738  CRef<CScope> scope(new CScope(*m_Objmgr));
739  scope->AddDefaults();
740 
741  CBioseq_Handle bsh;
742  try {
743  CSeq_id SeqId(seq_id);
744  bsh = scope->GetBioseqHandle(SeqId);
745  } catch (CException&) {
746  ERR_FATAL("The ID " << seq_id << " is not a valid seq ID.");
747  }
748 
749  if (! bsh) {
750  ERR_FATAL("Sequence for " << seq_id << " cannot be retrieved.");
751  return false;
752  }
753 
754  CRef<CSeq_entry> entry(new CSeq_entry());
756  HandleSeqEntry(entry);
757  *m_Out << *entry;
758 
759  return true;
760 }
761 
763 {
764  if (NStr::IsBlank(opt)) {
765  return false;
766  }
767  bool any_changes = false;
768  if (NStr::Find(opt, "r") != string::npos) {
769  any_changes |= CCleanup::RemoveUnnecessaryGeneXrefs(seh);
770  }
771  if (NStr::Find(opt, "a") != string::npos) {
772  any_changes |= x_FixCDS(seh, eFixCDS_ExtendToStop, kEmptyStr);
773  }
774  if (NStr::Find(opt, "p") != string::npos) {
775  any_changes |= CCleanup::ClearInternalPartials(seh);
776  }
777  if (NStr::Find(opt, "z") != string::npos) {
778  any_changes |= CCleanup::FixECNumbers(seh);
779  }
780  if (NStr::Find(opt, "d") != string::npos) {
781  any_changes |= x_RemoveDuplicateFeatures(seh);
782  }
783  return any_changes;
784 }
785 
787 {
788  bool any_change = false;
790  if (deleted_feats.empty()) {
791  return false;
792  }
794  for (auto df : deleted_feats) {
796  eh.Remove();
797  any_change = true;
798  }
799  for (auto orph : orphans) {
800  CBioseq_EditHandle eh(orph);
801  eh.Remove();
802  any_change = true;
803  }
804  any_change |= CCleanup::RenormalizeNucProtSets(seh);
805  return any_change;
806 
807 }
808 
809 bool CCleanupApp::x_ProcessXOptions(const string& opt, CSeq_entry_Handle seh, Uint4 options)
810 {
811  bool any_changes = false;
812  if (NStr::Find(opt, "w") != string::npos) {
813  any_changes = CCleanup::WGSCleanup(seh, true, options);
814  }
815  if (NStr::Find(opt, "r") != string::npos) {
816  bool change_defline = CAutoDefWithTaxonomy::RegenerateDefLines(seh);
817  if (change_defline) {
818  any_changes = true;
820  }
821  }
822  if (NStr::Find(opt, "b") != string::npos) {
823  any_changes |= x_GFF3Batch(seh);
824  }
825  if (NStr::Find(opt, "a") != string::npos) {
826  any_changes |= CCleanup::ConvertDeltaSeqToRaw(seh);
827  }
828  if (! m_IsHugeSet && (NStr::Find(opt, "i") != string::npos)) {
829  if (CCleanup::MakeSmallGenomeSet(seh) > 0) {
830  any_changes = true;
831  }
832  }
833  if (NStr::Find(opt, "f") != string::npos) {
835  any_changes = true;
836  }
837  }
838  if (NStr::Find(opt, "d") != string::npos) {
839  CCleanup::AutodefId(seh);
840  any_changes = true;
841  }
842  return any_changes;
843 }
844 
845 
847 {
848  if (! sf.GetData().IsCdregion()) {
849  // not coding region
850  return false;
851  }
852  if (sequence::IsPseudo(sf, b.GetScope())) {
853  return false;
854  }
855 
856  // check for existing stop codon
857  string translation;
858  try {
859  CSeqTranslator::Translate(sf, b.GetScope(), translation, true);
860  } catch (CSeqMapException& e) {
861  cout << e.what() << endl;
862  return false;
863  } catch (CSeqVectorException& e) {
864  cout << e.what() << endl;
865  return false;
866  }
867  if (NStr::EndsWith(translation, "*")) {
868  // already has stop codon
869  return false;
870  }
871 
872  if (CCleanup::ExtendToStopCodon(sf, b, 50)) {
873  feature::RetranslateCDS(sf, b.GetScope());
874  return true;
875  } else {
876  return false;
877  }
878 }
879 
880 
881 bool CCleanupApp::x_FixCDS(CSeq_entry_Handle seh, Uint4 options, const string& missing_prot_name)
882 {
883  bool any_changes = false;
884  for (CBioseq_CI bi(seh, CSeq_inst::eMol_na); bi; ++bi) {
885  any_changes |= CCleanup::SetGeneticCodes(*bi);
887  CConstRef<CSeq_feat> orig = fi->GetSeq_feat();
888  CRef<CSeq_feat> sf(new CSeq_feat());
889  sf->Assign(*orig);
890  bool feat_change = false;
891  if ((options & eFixCDS_FrameFromLoc) &&
892  CCleanup::SetFrameFromLoc(sf->SetData().SetCdregion(), sf->GetLocation(), bi.GetScope())) {
893  feat_change = true;
894  }
895  if ((options & eFixCDS_Retranslate)) {
896  feat_change |= feature::RetranslateCDS(*sf, bi.GetScope());
897  }
898  if ((options & eFixCDS_ExtendToStop) &&
899  x_BatchExtendCDS(*sf, *bi)) {
901  if (mrna && CCleanup::LocationMayBeExtendedToMatch(mrna->GetLocation(), sf->GetLocation())) {
902  CRef<CSeq_feat> new_mrna(new CSeq_feat());
903  new_mrna->Assign(*mrna);
904  if (CCleanup::ExtendStopPosition(*new_mrna, sf)) {
906  efh.Replace(*new_mrna);
907  }
908  }
910  if (gene && CCleanup::LocationMayBeExtendedToMatch(gene->GetLocation(), sf->GetLocation())) {
911  CRef<CSeq_feat> new_gene(new CSeq_feat());
912  new_gene->Assign(*gene);
913  if (CCleanup::ExtendStopPosition(*new_gene, sf)) {
915  efh.Replace(*new_gene);
916  }
917  }
918 
919  feat_change = true;
920  }
921  if (feat_change) {
923  ofh.Replace(*sf);
924  any_changes = true;
925  }
926  // also set protein name if missing, change takes place on protein bioseq
927  if (! NStr::IsBlank(missing_prot_name)) {
928  string current_name = CCleanup::GetProteinName(*sf, seh);
929  if (NStr::IsBlank(current_name)) {
930  CCleanup::SetProteinName(*sf, missing_prot_name, false, seh.GetScope());
931  any_changes = true;
932  }
933  }
934  }
935  }
936  return any_changes;
937 }
938 
939 
941 {
942  bool any_changes = x_FixCDS(seh, kGFF3CDSFixOptions, kEmptyStr);
944  cleanup.SetScope(&(seh.GetScope()));
946  auto changes = cleanup.BasicCleanup(seh, options);
947  any_changes |= (! changes.Empty());
948  changes = cleanup.ExtendedCleanup(seh, options);
949  any_changes |= (! changes.Empty());
950  any_changes |= x_FixCDS(seh, 0, "unnamed protein product");
951 
952  return any_changes;
953 }
954 
955 
957 {
958  if (! m_do_basic && ! m_do_extended) {
959  return false;
960  }
961 
962  bool any_changes = false;
964  cleanup.SetScope(&(entry.GetScope()));
965 
966  if (m_state.m_IsMultiSeq) {
968  // if (submit)
969  // options |= CCleanup::eScope_UseInPlace; // RW-1070 - CCleanup::eScope_UseInPlace is essential
970  }
971 
972  if (m_do_basic && ! m_do_extended) {
973  // perform BasicCleanup
974  try {
975  auto changes = *cleanup.BasicCleanup(entry, options);
976  m_state.m_changes += changes;
977  any_changes = ! changes.Empty();
978  } catch (CException& e) {
979  LOG_POST(Error << "error in basic cleanup: " << e.GetMsg() << label);
980  }
981  }
982 
983  if (m_do_extended) {
984  // perform ExtendedCleanup
985  try {
986  auto changes = *cleanup.ExtendedCleanup(entry, options);
987  m_state.m_changes += changes;
988  any_changes = ! changes.Empty();
989  } catch (CException& e) {
990  LOG_POST(Error << "error in extended cleanup: " << e.GetMsg() << label);
991  }
992  }
993  return any_changes;
994 }
995 
996 
998 {
1000  bool any_changes = false;
1001  try {
1002  auto changes = *cleanup.BasicCleanup(block);
1003  any_changes = x_ReportChanges("BasicCleanup of SubmitBlock", changes);
1004  } catch (CException& e) {
1005  LOG_POST(Error << "error in cleanup of SubmitBlock: " << e.GetMsg());
1006  }
1007  return any_changes;
1008 }
1009 
1010 
1012 {
1013  string label;
1015 
1016  const CArgs& args = GetArgs();
1017 
1018  if (args["showprogress"]) {
1019  LOG_POST(Error << label + "\n");
1020  }
1021 
1022  if (args["debug"]) {
1023  ESerialDataFormat outFormat = eSerial_AsnText;
1024 
1025  unique_ptr<CObjectOStream> debug_out(CObjectOStream::Open(outFormat, "before.sqn",
1027 
1028  *debug_out << *(entry.GetCompleteSeq_entry());
1029  }
1030 
1031  bool any_changes = false;
1032 
1033  if (args["T"]) {
1034  validator::CTaxValidationAndCleanup tval(m_remote_updater->GetUpdateFunc());
1035  any_changes |= tval.DoTaxonomyUpdate(entry, true);
1036  }
1037 
1038  if (args["K"] && NStr::Find(args["K"].AsString(), "u") != string::npos) {
1039  CRef<CSeq_entry> se(const_cast<CSeq_entry*>(entry.GetCompleteSeq_entry().GetPointer()));
1040  any_changes |= CCleanup::RemoveNcbiCleanupObject(*se);
1041  }
1042 
1043  Uint4 options = 0;
1044  if (args["noobj"]) {
1046  }
1047  if (m_IsHugeSet) {
1048  options |= CCleanup::eClean_InHugeSeqSet;
1049  }
1050 
1051  any_changes |= x_BasicAndExtended(entry, label, options);
1052 
1053  if (args["F"]) {
1054  any_changes |= x_ProcessFeatureOptions(args["F"].AsString(), entry);
1055  }
1056  if (args["X"]) {
1057  any_changes |= x_ProcessXOptions(args["X"].AsString(), entry, options);
1058  }
1059  if (args["K"] && NStr::Find(args["K"].AsString(), "n") != string::npos && ! m_do_extended) {
1060  any_changes |= CCleanup::NormalizeDescriptorOrder(entry);
1061  }
1062 
1063  return true;
1064 }
1065 
1067 {
1068  if (! se) {
1069  return false;
1070  }
1071 
1072  auto entryHandle = m_state.m_Scope->AddTopLevelSeqEntry(*se);
1073  if (! entryHandle) {
1074  NCBI_THROW(CArgException, eInvalidArg, "Failed to insert entry to scope.");
1075  }
1076 
1077  if (HandleSeqEntry(entryHandle)) {
1078  if (entryHandle.GetCompleteSeq_entry().GetPointer() != se.GetPointer()) {
1079  se->Assign(*entryHandle.GetCompleteSeq_entry());
1080  }
1081  m_state.m_Scope->RemoveTopLevelSeqEntry(entryHandle);
1082  return true;
1083  }
1084  m_state.m_Scope->RemoveTopLevelSeqEntry(entryHandle);
1085  return false;
1086 }
1087 
1088 void CCleanupApp::x_OpenOStream(const string& filename, const string& dir, bool remove_orig_dir)
1089 {
1090  ESerialDataFormat outFormat = eSerial_AsnText;
1091 
1092  const CArgs& args = GetArgs();
1093  if (args["outformat"]) {
1094  if (args["outformat"].AsString() == "binary") {
1095  outFormat = eSerial_AsnBinary;
1096  } else if (args["outformat"].AsString() == "XML") {
1097  outFormat = eSerial_Xml;
1098  } else if (args["outformat"].AsString() == "JSON") {
1099  outFormat = eSerial_Json;
1100  }
1101  }
1102 
1103  if (NStr::IsBlank(filename)) {
1104  m_Out.reset(CObjectOStream::Open(outFormat, cout));
1105  } else if (! NStr::IsBlank(dir)) {
1106  string base = filename;
1107  if (remove_orig_dir) {
1108  const char buf[2] = { CDirEntry::GetPathSeparator(), 0 };
1109  size_t pos = NStr::Find(base, buf, NStr::eCase, NStr::eReverseSearch);
1110  if (pos != string::npos) {
1111  base = base.substr(pos + 1);
1112  }
1113  }
1114  string fname = CDirEntry::MakePath(dir, base);
1115  m_Out.reset(CObjectOStream::Open(outFormat, fname, eSerial_StdWhenAny));
1116  } else {
1117  m_Out.reset(CObjectOStream::Open(outFormat, filename, eSerial_StdWhenAny));
1118  }
1119 }
1120 
1121 
1123 {
1124  m_Out->Close();
1125  m_Out.reset();
1126 }
1127 
1128 // IProcessorCallback interface functionality
1130 {
1131  // static long long cnt;
1132  // cerr << ++cnt << ' ' << obj->GetThisTypeInfo()->GetName() << '\n';
1133  if (obj->GetThisTypeInfo() == CSeq_entry::GetTypeInfo()) {
1134  CRef<CSeq_entry> entry(dynamic_cast<CSeq_entry*>(obj.GetPointer()));
1135  HandleSeqEntry(entry);
1136  }
1137 }
1138 
1140 {
1141  bool any_changes = false;
1142  auto changes_str = changes.GetDescriptions();
1143  if (changes_str.empty()) {
1144  LOG_POST(Error << "No changes from " << prefix << "\n");
1145  } else {
1146  LOG_POST(Error << "Changes from " << prefix << ":\n");
1147  for (auto it : changes_str) {
1148  LOG_POST(Error << it);
1149  }
1150  any_changes = true;
1151  }
1152  return any_changes;
1153 }
1154 
1156 
1158 
1159 
1160 /////////////////////////////////////////////////////////////////////////////
1161 //
1162 // Main
1163 
1164 int main(int argc, const char** argv)
1165 {
1166  // scan and replace deprecated arguments; RW-1324
1167  for (int i = 1; i < argc; ++i) {
1168  string a = argv[i];
1169  if (a == "-r") {
1170  if ((i+1) < argc) {
1171  string param = argv[i+1];
1172  if (!param.empty() && param[0] != '-') {
1173  argv[i] = "-outdir";
1174  ++i; // skip parameter
1175  cerr << "Warning: deprecated use of -r argument. Please use -outdir instead." << endl;
1176  }
1177  }
1178  } else if (a == "-p") {
1179  argv[i] = "-indir";
1180  cerr << "Warning: argument -p is deprecated. Please use -indir instead." << endl;
1181  } else if (a == "-R") {
1182  argv[i] = "-r";
1183  cerr << "Warning: argument -R is deprecated. Please use -r instead." << endl;
1184  } else if (a == "-gbload") {
1185  argv[i] = "-genbank";
1186  cerr << "Warning: argument -gbload is deprecated. Please use -genbank instead." << endl;
1187  }
1188  }
1189 
1190  // this code converts single argument into multiple, just to simplify testing
1191  list<string> split_args;
1192  vector<const char*> new_argv;
1193 
1194  if (argc==2 && argv && argv[1] && strchr(argv[1], ' ')) {
1195  NStr::Split(argv[1], " ", split_args);
1196 
1197  auto it = split_args.begin();
1198  while (it != split_args.end()) {
1199  auto next = it; ++next;
1200  if (next != split_args.end() &&
1201  ((it->front() == '"' && it->back() != '"') ||
1202  (it->front() == '\'' && it->back() != '\'')))
1203  {
1204  it->append(" "); it->append(*next);
1205  next = split_args.erase(next);
1206  } else
1207  it = next;
1208  }
1209  for (auto& rec: split_args) {
1210  if (rec.front()=='\'' && rec.back()=='\'')
1211  rec=rec.substr(1, rec.length()-2);
1212  }
1213  argc = 1 + int(split_args.size());
1214  new_argv.reserve(argc);
1215  new_argv.push_back(argv[0]);
1216  for (const string& s : split_args) {
1217  new_argv.push_back(s.c_str());
1218  std::cerr << s.c_str() << " ";
1219  }
1220  std::cerr << "\n";
1221 
1222  argv = new_argv.data();
1223  }
1224 
1225  return CCleanupApp().AppMain(argc, argv);
1226 }
USING_SCOPE(objects)
int main(int argc, const char **argv)
EProcessingMode
Definition: asn_cleanup.cpp:72
@ eModeHugefile
Definition: asn_cleanup.cpp:76
@ eModeBatch
Definition: asn_cleanup.cpp:74
@ eModeRegular
Definition: asn_cleanup.cpp:73
@ eModeBigfile
Definition: asn_cleanup.cpp:75
static bool s_IsHugeMode(const CArgs &args, const CNcbiRegistry &cfg)
static const CDataLoadersUtil::TLoaders default_loaders
Definition: asn_cleanup.cpp:69
USING_NCBI_SCOPE
bool ProcessBigFile(CObjectIStream &in, CObjectOStream &out, IProcessorCallback &callback, EBigFileContentType content_type)
EBigFileContentType
@ eContentUndefined
@ eContentSeqSubmit
@ eContentSeqEntry
@ eContentBioseqSet
ncbi::TMaskedQueryRegions mask
AutoPtr –.
Definition: ncbimisc.hpp:401
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgException –.
Definition: ncbiargs.hpp:120
CArgs –.
Definition: ncbiargs.hpp:379
static bool RegenerateDefLines(CSeq_entry_Handle se)
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
bool HandleSeqID(const string &seqID)
void x_CloseOStream()
unique_ptr< edit::CRemoteUpdater > m_remote_updater
void x_FeatureOptionsValid(const string &opt)
const Uint4 kGFF3CDSFixOptions
void x_ProcessOneDirectory(const string &dirname, const string &suffix)
unique_ptr< CObjectOStream > m_Out
bool x_ProcessHugeFile(edit::CHugeFileProcess &process)
bool HandleSubmitBlock(CSubmit_block &block) override
user code for handling a Submit-block goes here.
bool x_ProcessFeatureOptions(const string &opt, CSeq_entry_Handle seh)
bool x_RemoveDuplicateFeatures(CSeq_entry_Handle seh)
int Run() override
Run the application.
bool x_ReportChanges(const string_view prefix, CCleanupChangeCore changes)
bool m_do_extended
void x_OpenOStream(const string &filename, const string &dir=kEmptyStr, bool remove_orig_dir=true)
bool x_ProcessXOptions(const string &opt, CSeq_entry_Handle seh, Uint4 options)
void Process(CRef< CSerialObject > &obj) override
void x_KOptionsValid(const string &opt)
bool x_BasicAndExtended(CSeq_entry_Handle entry, const string &label, Uint4 options=0)
bool x_FixCDS(CSeq_entry_Handle seh, Uint4 options, const string &missing_prot_name)
bool x_ProcessHugeFileBlob(edit::CHugeFileProcess &process)
void x_XOptionsValid(const string &opt)
void Init() override
Initialize the application.
bool x_BatchExtendCDS(CSeq_feat &, CBioseq_Handle)
bool x_GFF3Batch(CSeq_entry_Handle seh)
bool x_ProcessBigFile(unique_ptr< CObjectIStream > &is, TTypeInfo asn_type)
void x_ProcessOneFile(unique_ptr< CObjectIStream > &is, EProcessingMode mode, TTypeInfo asn_type)
CRef< CObjectManager > m_Objmgr
bool HandleSeqEntry(CRef< CSeq_entry > &se) override
user code for handling a Seq-entry goes here.
CConstRef< CSerialObject > x_ProcessTraditionally(edit::CHugeAsnReader &reader)
TThreadState m_state
All the changes made during cleanup.
vector< string_view > GetDescriptions() const
Definition: cleanup.cpp:300
static bool ConvertDeltaSeqToRaw(CSeq_entry_Handle seh, CSeq_inst::EMol filter=CSeq_inst::eMol_not_set)
Definition: cleanup.cpp:4408
static bool RenormalizeNucProtSets(CSeq_entry_Handle seh)
Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore ph...
Definition: cleanup.cpp:4063
static void SetProteinName(CProt_ref &prot, const string &protein_name, bool append)
Definition: cleanup.cpp:1345
static bool FixECNumbers(CSeq_entry_Handle entry)
Fix EC numbers.
Definition: cleanup.cpp:1713
static size_t MakeSmallGenomeSet(CSeq_entry_Handle entry)
Definition: cleanup.cpp:4583
static bool RemoveNcbiCleanupObject(CSeq_entry &seq_entry)
Removes NcbiCleanup User Objects in the Seq-entry.
Definition: cleanup.cpp:1898
static bool ClearInternalPartials(CSeq_loc &loc, bool is_first=true, bool is_last=true)
Clear internal partials.
Definition: cleanup.cpp:1553
static bool ExtendStopPosition(CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
Definition: cleanup.cpp:1072
@ eClean_KeepSingleSeqSet
Definition: cleanup.hpp:79
@ eClean_InHugeSeqSet
Definition: cleanup.hpp:80
@ eClean_NoNcbiUserObjects
Definition: cleanup.hpp:75
@ eClean_KeepTopSet
Definition: cleanup.hpp:78
static bool ExtendToStopCodon(CSeq_feat &f, CBioseq_Handle bsh, size_t limit)
Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partia...
Definition: cleanup.cpp:1113
static const string & GetProteinName(const CProt_ref &prot)
Definition: cleanup.cpp:1467
static bool SetFrameFromLoc(CCdregion &cdregion, const CSeq_loc &loc, CScope &scope)
Chooses best frame based on location 1.
Definition: cleanup.cpp:1253
static bool NormalizeDescriptorOrder(CSeq_descr &descr)
Normalize Descriptor Order on a specific Seq-entry.
Definition: cleanup.cpp:3000
static bool WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_proteins=true, Uint4 options=0, bool run_extended_cleanup=true)
Performs WGS specific cleanup.
Definition: cleanup.cpp:2653
static bool LocationMayBeExtendedToMatch(const CSeq_loc &orig, const CSeq_loc &improved)
Checks whether it is possible to extend the original location up to improved one.
Definition: cleanup.cpp:1333
static bool SetGeneticCodes(CBioseq_Handle bsh)
Sets genetic codes for coding regions on Bioseq-Handle.
Definition: cleanup.cpp:2122
static bool RemoveUnnecessaryGeneXrefs(CSeq_feat &f, CScope &scope)
Removes unnecessary Gene-xrefs.
Definition: cleanup.cpp:779
static bool MakeIRDFeatsFromSourceXrefs(CSeq_entry_Handle entry)
From SQD-4329 For each sequence with a source that has an IRD db_xref, create a misc_feature across t...
Definition: cleanup.cpp:4654
static void AutodefId(CSeq_entry_Handle seh)
Definition: cleanup.cpp:4942
static void SetupObjectManager(const CArgs &args, objects::CObjectManager &obj_mgr, TLoaders loaders=fDefault)
Set up the standard object manager data loaders according to the arguments provided above.
static void AddArgumentDescriptions(CArgDescriptions &arg_desc, TLoaders loaders=fDefault)
Add a standard set of arguments used to configure the object manager.
CDir –.
Definition: ncbifile.hpp:1695
CFeat_CI –.
Definition: feat_ci.hpp:64
Interface for handling Seq-entry objects.
CGBReleaseFile is a utility class to ease the processing of Genbank release files one Seq-entry at a ...
void StartWriter(CConstRef< CSerialObject > topobject)
CNcbiRegistry –.
Definition: ncbireg.hpp:913
CScope –.
Definition: scope.hpp:92
SeqMap related exceptions.
SeqVector related exceptions.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
@ eBoth
Definition: Seq_entry.hpp:94
void GetLabel(string *label, ELabelType type) const
Definition: Seq_entry.cpp:274
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSubmit_block –.
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Definition: typeinfo.hpp:76
CVersionInfo –.
Definition: set.hpp:45
bool empty() const
Definition: set.hpp:133
set< CBioseq_Handle > ListOrphanProteins(CSeq_entry_Handle seh, bool force_refseq=false)
Definition: dup_feats.cpp:142
set< CSeq_feat_Handle > GetDuplicateFeaturesForRemoval(CSeq_entry_Handle seh)
Definition: dup_feats.cpp:62
static void cleanup(void)
Definition: ct_dynamic.c:30
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
void SetVersion(const CVersionInfo &version)
Set the version number for the program.
Definition: ncbiapp.cpp:1155
@ fHidden
Hide it in Usage.
Definition: ncbiargs.hpp:662
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eDirectory
Name of file directory.
Definition: ncbiargs.hpp:598
#define ERR_FATAL(message)
Posting fatal error and abort.
Definition: ncbidiag.hpp:240
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
TEntries GetEntries(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
Definition: ncbifile.cpp:3846
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
Definition: ncbifile.cpp:433
list< TEntry > TEntries
Definition: ncbifile.hpp:1750
@ eFile
Regular file.
Definition: ncbifile.hpp:783
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_StdWhenAny
Definition: serialdef.hpp:132
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_Json
JSON.
Definition: serialdef.hpp:76
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
Definition: feature.cpp:4121
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
Definition: sequence.cpp:1529
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
Definition: sequence.cpp:1261
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
void Remove(void) const
Remove the feature from Seq-annot.
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
virtual bool GetBool(const string &section, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
Definition: ncbireg.cpp:391
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
static const char label[]
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
void SetClass(TClass value)
Assign a value to Class data member.
virtual void Reset(void)
Reset the whole object.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_genbank
converted genbank
virtual void Reset(void)
Reset the whole object.
Definition: Bioseq_.cpp:97
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
char * buf
int i
mdb_mode_t mode
Definition: lmdb++.h:38
#define NCBI_SC_VERSION_PROXY
#define NCBI_TEAMCITY_BUILD_NUMBER_PROXY
unsigned int a
Definition: ncbi_localip.c:102
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isspace(Uchar c)
Definition: ncbictype.hpp:69
std::istream & in(std::istream &in_, double &x_)
double df(double x_, const double &y_)
Definition: njn_root.hpp:189
#define fi
static const char * suffix[]
Definition: pcregrep.c:408
static const char * prefix[]
Definition: pcregrep.c:405
CCleanupChangeCore m_changes
Definition: asn_cleanup.cpp:82
CRef< CScope > m_Scope
Definition: asn_cleanup.cpp:80
#define _ASSERT
Modified on Thu Apr 25 08:20:37 2024 by modify_doxy.py rev. 669887