NCBI C++ ToolKit
multireader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: multireader.cpp 101955 2024-03-08 17:00:27Z gotvyans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig, NCBI
27 *
28 * File Description:
29 * Reader for selected data file formats
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbistl.hpp>
38 #include <corelib/ncbi_system.hpp>
39 #include <util/format_guess.hpp>
40 #include <util/line_reader.hpp>
41 
42 #include <serial/iterator.hpp>
43 #include <serial/objistr.hpp>
44 #include <serial/objostr.hpp>
45 #include <serial/objostrasn.hpp>
46 #include <serial/serial.hpp>
47 
53 
76 //#include <misc/hgvs/hgvs_reader.hpp>
77 
80 
84 
86 
87 #include "multifile_source.hpp"
89 
92 
93 //class CGff3LocationMerger;
94 
96 {
97  return FORMAT(
98  "At ID '" << error.GetID() << "' "
99  "in category '" << static_cast<int>(error.GetCategory()) << "' "
100  "at line " << error.GetLineNum() << ": "
101  << error.GetMsg() << "'");
102 }
103 
104 
105 // ============================================================================
106 class TestCanceler: public ICanceled
107 // ============================================================================
108 {
109  bool IsCanceled() const { return false; };
110 };
111 
112 
113 // ============================================================================
115  public CReaderListener
116 // ============================================================================
117 {
118 public:
120  const IObjtoolsMessage& message)
121  {
122  const CReaderMessage* pReaderMessage =
123  dynamic_cast<const CReaderMessage*>(&message);
124  if (!pReaderMessage || pReaderMessage->Severity() == eDiag_Fatal) {
125  throw;
126  }
127  pReaderMessage->Write(cerr);
128  return true;
129  };
130 };
131 
132 // ============================================================================
134 // ============================================================================
135  : public CNcbiApplication
136 {
137 public:
139  {
140  SetVersion(CVersionInfo(1, 0, 2));
141  }
142 
143  // Create quick simple messages
145  EDiagSev eDiagSev, const string & msg);
147  ostream & ostr, const ILineError & line_error_p);
148  bool ShowingProgress() const { return m_showingProgress; };
149 protected:
150 
151 private:
152  void Init() override;
153  int Run() override;
154 
159  bool xProcessBed(const CArgs&, CNcbiIstream&, CNcbiOstream&);
162  void xProcessGtf(const CArgs&, CNcbiIstream&, CNcbiOstream&);
164  void xProcessGff3(const CArgs&, CNcbiIstream&, CNcbiOstream&);
165  void xProcessGff2(const CArgs&, CNcbiIstream&, CNcbiOstream&);
166  void xProcessGvf(const CArgs&, CNcbiIstream&, CNcbiOstream&);
168  void xProcessAgp(const CArgs&, CNcbiIstream&, CNcbiOstream&);
170  void xProcessFasta(const CArgs&, CNcbiIstream&, CNcbiOstream&);
171  void xProcessRmo(const CArgs&, CNcbiIstream&, CNcbiOstream&);
172  //void xProcessHgvs(const CArgs&, CNcbiIstream&, CNcbiOstream&);
173 
174  void xSetFormat(const CArgs&, CNcbiIstream&);
175  void xSetFlags(const CArgs&, const string&);
176  void xSetFlags(const CArgs&, CNcbiIstream&);
177  void xSetMapper(const CArgs&);
178  void xSetMessageListener(const CArgs&);
179 
180  void xPostProcessAnnot(const CArgs&, CSeq_annot&, const CGff3LocationMerger* =nullptr);
181  void xWriteObject(const CArgs&, CSerialObject&, CNcbiOstream&);
182  void xDumpErrors(CNcbiOstream& );
183 
187  long m_iFlags;
188  string m_AnnotName;
189  string m_AnnotTitle;
192 
193  unique_ptr<CIdMapper> m_pMapper;
194  unique_ptr<CMessageListenerBase> m_pErrors;
195  unique_ptr<CObjtoolsListener> m_pEditErrors;
196 };
197 
198 
199 
200 // ============================================================================
202 // ============================================================================
203  public CMessageListenerBase
204 {
205 public:
207  int iMaxCount,
208  int iMaxLevel,
209  CMultiReaderApp & multi_reader_app)
210  : m_iMaxCount(iMaxCount), m_iMaxLevel(iMaxLevel),
211  m_multi_reader_app(multi_reader_app)
212  {};
213 
215 
216  bool
218  const IObjtoolsMessage& message)
219  {
220  StoreMessage(message);
221  return (message.GetSeverity() <= m_iMaxLevel) && (Count() < m_iMaxCount);
222  };
223 
224  bool
226  const ILineError& err)
227  {
230  return true;
231  }
232  StoreError(err);
233  return (err.Severity() <= m_iMaxLevel) && (Count() < m_iMaxCount);
234  }
235 
236  void
238  const string& msg,
239  const Uint8 bytesDone,
240  const Uint8 dummy)
241  {
243  return;
244  }
245  AutoPtr<ILineError> line_error_p =
247  eDiag_Info,
248  FORMAT("Progress: " << bytesDone << " bytes done."));
249  m_multi_reader_app.WriteMessageImmediately(cerr, *line_error_p);
250  //if (bytesDone > 1000000) {
251  // bIsCanceled = true;
252  //}
253  };
254 
255 protected:
256  size_t m_iMaxCount;
259 };
260 
261 // ============================================================================
263 // ============================================================================
264  public CMessageListenerLevel
265 {
266 public:
268  int level, CMultiReaderApp & multi_reader_app)
269  : CMessageListenerLevel(level),
270  m_multi_reader_app(multi_reader_app) {};
271 
272  void
274  const string& msg,
275  const Uint8 bytesDone,
276  const Uint8 dummy)
277  {
279  return;
280  }
281  AutoPtr<ILineError> line_error_p =
283  eDiag_Info,
284  FORMAT(msg << " (" << bytesDone << " bytes)"));
285  m_multi_reader_app.WriteMessageImmediately(cerr, *line_error_p);
286  //if (bytesDone > 1000000) {
287  // bIsCanceled = true;
288  //}
289  };
290 
291 protected:
292  size_t m_iMaxCount;
295 };
296 
297 // ============================================================================
299 // ============================================================================
300 
301 // ----------------------------------------------------------------------------
303 // ----------------------------------------------------------------------------
304 {
305  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
306 
307  arg_desc->SetUsageContext("", "C++ multi format file reader");
308 
309  //
310  // input / output:
311  //
312 
313  arg_desc->SetCurrentGroup("INPUT / OUTPUT");
314 
315  arg_desc->AddDefaultKey(
316  "input",
317  "File_In",
318  "Input filename",
320  "-");
321  arg_desc->AddAlias("i", "input");
322 
323  arg_desc->AddDefaultKey(
324  "output",
325  "File_Out",
326  "Output filename",
328  arg_desc->AddAlias("o", "output");
329 
330  arg_desc->AddDefaultKey(
331  "indir",
332  "Dir_In",
333  "Input directory",
335  "");
336  arg_desc->AddAlias("p", "indir");
337 
338  arg_desc->AddDefaultKey(
339  "outdir",
340  "Dir_Out",
341  "Output directory",
343  "");
344  arg_desc->AddAlias("r", "outdir");
345 
346  arg_desc->AddDefaultKey(
347  "format",
348  "STRING",
349  "Input file format",
351  "guess");
352  arg_desc->SetConstraint(
353  "format",
354  &(*new CArgAllow_Strings,
355  "bed",
356  "microarray", "bed15",
357  "wig", "wiggle", "bedgraph",
358  "gtf", "gff3", "gff2", "augustus",
359  "gvf",
360  "agp",
361  "newick", "tree", "tre",
362  "vcf",
363  "aln", "align",
364  "fasta",
365  "5colftbl",
366  "ucsc",
367  "hgvs",
368  "psl",
369  "rmo",
370  "guess") );
371 
372  arg_desc->AddDefaultKey("out-format", "FORMAT",
373  "This sets how the output of this program will be formatted. "
374  "Note that for some formats some or all values might have no effect.",
375  CArgDescriptions::eString, "asn_text");
376  arg_desc->SetConstraint(
377  "out-format",
378  &(*new CArgAllow_Strings,
379  "asn_text",
380  "asn_binary",
381  "xml",
382  "json" ) );
383 
384 
385  arg_desc->AddDefaultKey(
386  "flags",
387  "STRING",
388  "Additional flags passed to the reader, as a single flag integer or comma separated flag names",
390  "0" );
391 
392  arg_desc->AddDefaultKey(
393  "name",
394  "STRING",
395  "Name for annotation",
397  "");
398  arg_desc->AddDefaultKey(
399  "title",
400  "STRING",
401  "Title for annotation",
403  "");
404 
405  //
406  // ID mapping:
407  //
408 
409  arg_desc->SetCurrentGroup("ID MAPPING");
410 
411  arg_desc->AddDefaultKey(
412  "mapfile",
413  "File_In",
414  "IdMapper config filename",
416 
417  arg_desc->AddDefaultKey(
418  "genome",
419  "STRING",
420  "UCSC build number",
422  "" );
423 
424  //
425  // Error policy:
426  //
427 
428  arg_desc->SetCurrentGroup("ERROR POLICY");
429 
430  arg_desc->AddFlag(
431  "dumpstats",
432  "write record counts to stderr",
433  true );
434 
435  arg_desc->AddFlag(
436  "xmlmessages",
437  "where possible, print errors, warnings, etc. as XML",
438  true );
439 
440  arg_desc->AddFlag(
441  "checkonly",
442  "check for errors only",
443  true );
444 
445  arg_desc->AddFlag(
446  "noerrors",
447  "suppress error display",
448  true );
449 
450  arg_desc->AddFlag(
451  "lenient",
452  "accept all input format errors",
453  true );
454 
455  arg_desc->AddFlag(
456  "strict",
457  "accept no input format errors",
458  true );
459 
460  arg_desc->AddDefaultKey(
461  "max-error-count",
462  "INTEGER",
463  "Maximum permissible error count",
465  "-1" );
466 
467  arg_desc->AddDefaultKey(
468  "max-error-level",
469  "STRING",
470  "Maximum permissible error level",
472  "warning" );
473 
474  arg_desc->SetConstraint(
475  "max-error-level",
476  &(*new CArgAllow_Strings,
477  "info", "warning", "error" ) );
478 
479  arg_desc->AddFlag("show-progress",
480  "This will show progress messages on stderr, if the underlying "
481  "reader supports that.");
482 
483  //
484  // bed and gff reader specific arguments:
485  //
486 
487  arg_desc->SetCurrentGroup("BED AND GFF READER SPECIFIC");
488 
489  arg_desc->AddFlag(
490  "all-ids-as-local",
491  "turn all ids into local ids",
492  true );
493 
494  arg_desc->AddFlag(
495  "numeric-ids-as-local",
496  "turn integer ids into local ids",
497  true );
498 
499  arg_desc->AddFlag(
500  "3ff",
501  "use BED three feature format",
502  true);
503 
504  arg_desc->AddFlag(
505  "dfm",
506  "use BED directed feature model",
507  true);
508 
509  arg_desc->AddFlag(
510  "genbank",
511  "clean up output for genbank submission",
512  true);
513 
514  arg_desc->AddFlag(
515  "genbank-no-locus-tags",
516  "clean up output for genbank submission, no locus-ag needed",
517  true);
518 
519  arg_desc->AddFlag(
520  "cleanup",
521  "clean up output but without genbank specific extensions",
522  true);
523 
524  arg_desc->AddFlag(
525  "euk",
526  "in -genbank mode, generate any missing mRNA features",
527  true);
528 
529  arg_desc->AddFlag(
530  "gene-xrefs",
531  "generate parent-child xrefs involving genes",
532  true);
533 
534  arg_desc->AddDefaultKey(
535  "locus-tag",
536  "STRING",
537  "Prefix or starting tag for auto generated locus tags",
539  "" );
540 
541  arg_desc->AddOptionalKey(
542  "autosql",
543  "FILENAME",
544  "BED autosql definition file",
546 
547  //
548  // wiggle reader specific arguments:
549  //
550 
551  arg_desc->SetCurrentGroup("WIGGLE READER SPECIFIC");
552 
553  arg_desc->AddFlag(
554  "join-same",
555  "join abutting intervals",
556  true );
557 
558  arg_desc->AddFlag(
559  "as-byte",
560  "generate byte compressed data",
561  true );
562 
563  arg_desc->AddFlag(
564  "as-real",
565  "generate real value data",
566  true );
567 
568  arg_desc->AddFlag(
569  "as-graph",
570  "generate graph object",
571  true );
572 
573  arg_desc->AddFlag(
574  "raw",
575  "iteratively return raw track data",
576  true );
577 
578  //
579  // gff reader specific arguments:
580  //
581 
582  arg_desc->SetCurrentGroup("GFF READER SPECIFIC");
583 
584  arg_desc->AddFlag( // no longer used, retained for backward compatibility
585  "new-code",
586  "use new gff3 reader implementation",
587  true );
588  arg_desc->AddFlag(
589  "old-code",
590  "use old gff3 reader implementation",
591  true );
592 
593  //
594  // gff reader specific arguments:
595  //
596 
597  arg_desc->SetCurrentGroup("GTF READER SPECIFIC");
598 
599  arg_desc->AddFlag( // no longer used, retained for backward compatibility
600  "child-links",
601  "generate gene->mrna and gene->cds xrefs",
602  true );
603 
604  //
605  // alignment reader specific arguments:
606  //
607 
608  arg_desc->SetCurrentGroup("ALIGNMENT READER SPECIFIC");
609 
610  arg_desc->AddDefaultKey(
611  "aln-gapchar",
612  "STRING",
613  "Alignment gap character",
615  "-");
616 
617  arg_desc->AddDefaultKey(
618  "aln-missing",
619  "STRING",
620  "Alignment missing indicator",
622  "");
623 
624  arg_desc->AddDefaultKey(
625  "aln-alphabet",
626  "STRING",
627  "Alignment alphabet",
629  "prot");
630  arg_desc->SetConstraint(
631  "aln-alphabet",
632  &(*new CArgAllow_Strings,
633  "nuc",
634  "prot") );
635 
636  arg_desc->AddDefaultKey(
637  "aln-idval",
638  "STRING",
639  "Alignment sequence ID validation scheme",
641  "");
642 
643  arg_desc->AddFlag(
644  "force-local-ids",
645  "treat all IDs as local IDs",
646  true);
647 
648  arg_desc->AddFlag(
649  "ignore-nexus-info",
650  "ignore char settings in NEXUS format block",
651  true);
652  //
653  // FASTA reader specific arguments:
654  //
655 
656  arg_desc->SetCurrentGroup("FASTA READER SPECIFIC");
657 
658  arg_desc->AddFlag(
659  "parse-mods",
660  "Parse FASTA modifiers on deflines.");
661 
662  arg_desc->AddFlag(
663  "parse-gaps",
664  "Make a delta sequence if gaps found.");
665 
666  arg_desc->AddDefaultKey(
667  "max-id-length",
668  "INTEGER",
669  "Maximum permissible ID length",
671  "0" );
672  arg_desc->SetCurrentGroup("");
673 
674  SetupArgDescriptions(arg_desc.release());
675 }
676 
677 // ----------------------------------------------------------------------------
678 int
680 // ----------------------------------------------------------------------------
681 {
682  m_iFlags = 0;
683 
684  const CArgs& args = GetArgs();
685  string argInFile = args["input"].AsString();
686  string argOutFile = args["output"].AsString();
687  string argInDir = args["indir"].AsString();
688  string argOutDir = args["outdir"].AsString();
689 
690  if ((argInFile != "-") && !argInDir.empty()) {
691  cerr << "multireader: command line args -input and -indir are incompatible."
692  << endl;
693  return 1;
694  }
695  if ((argOutFile != "-") && !argOutDir.empty()) {
696  cerr << "multireader: command line args -output and -outdir are incompatible."
697  << endl;
698  return 1;
699  }
700  if (argInDir.empty() && !argOutDir.empty()) {
701  cerr << "multireader: command line arg -outdir requires -indir."
702  << endl;
703  return 1;
704  }
705  if (argOutDir.empty() && !argInDir.empty()) {
706  cerr << "multireader: command line arg -indir requires -outdir."
707  << endl;
708  return 1;
709  }
710  if (args["genbank"].AsBoolean() && args["genbank-no-locus-tags"].AsBoolean()) {
711  cerr << "multireader: flags -genbank and -genbank-no-locus-tags are mutually "
712  "exclusive"
713  << endl;
714  return 1;
715  }
716  if (!args["locus-tag"].AsString().empty() && args["genbank-no-locus-tags"].AsBoolean()) {
717  cerr << "multireader: flags -locus-tag and -genbank-no-locus-tags are mutually "
718  "exclusive"
719  << endl;
720  return 1;
721  }
722  if (argInFile == "-" && args["format"].AsString() == "guess") {
723  cerr << "multireader: must specify input format (\"-format ...\") if input comes from "
724  "console or pipe"
725  << endl;
726  return 1;
727  }
728 
729  xSetMapper(args);
730  xSetMessageListener(args);
731 
732  if (!argInDir.empty()) {
733  // with tests above, establishes multifile operation
734  string inFilePattern = CDirEntry::MakePath(argInDir, "*", "gff3");
735  string inFile, outFile;
736  CMultiFileSource fileSource(inFilePattern);
737  CMultiFileDestination fileDestination(argOutDir);
738  bool retIn = fileSource.Next(inFile);
739  while (retIn) {
740  if (!fileDestination.Next(inFile, outFile)) {
741  cerr << "multireader: unable to create output file "
742  << outFile << "." << endl;
743  return 1;
744  }
745  CNcbiIfstream istr(inFile, IOS_BASE::binary);
746  CNcbiOfstream ostr(outFile);
747  if (!xProcessSingleFile(args, istr, ostr)) {
748  return 1;
749  }
750  retIn = fileSource.Next(inFile);
751  }
752  }
753  else {
754  // at this point, implies single file operation
755  CNcbiIstream& istr = args["input"].AsInputFile(CArgValue::fBinary);
756  CNcbiOstream& ostr = args["output"].AsOutputFile();
757  if (!xProcessSingleFile(args, istr, ostr)) {
758  return 1;
759  }
760  }
761  return 0;
762 }
763 
764 // -----------------------------------------------------------------------------
765 bool
767  const CArgs& args,
768  CNcbiIstream& istr,
769  CNcbiOstream& ostr)
770 // -----------------------------------------------------------------------------
771 {
772  bool retCode = true;
773 
774  try {
775  xSetFlags(args, args["input"].AsString());
776  switch( m_uFormat ) {
777  default:
778  xProcessDefault(args, istr, ostr);
779  break;
782  xProcessWiggleRaw(args, istr, ostr);
783  }
784  else {
785  xProcessWiggle(args, istr, ostr);
786  }
787  break;
788  case CFormatGuess::eBed:
790  xProcessBedRaw(args, istr, ostr);
791  }
792  else {
793  retCode = xProcessBed(args, istr, ostr);
794  }
795  break;
797  xProcessUCSCRegion(args, istr, ostr);
798  break;
799  case CFormatGuess::eGtf:
802  xProcessGtf(args, istr, ostr);
803  break;
805  xProcessNewick(args, istr, ostr);
806  break;
807  case CFormatGuess::eGff3:
808  xProcessGff3(args, istr, ostr);
809  break;
810  case CFormatGuess::eGff2:
811  xProcessGff2(args, istr, ostr);
812  break;
813  case CFormatGuess::eGvf:
814  xProcessGvf(args, istr, ostr);
815  break;
816  case CFormatGuess::eAgp:
817  xProcessAgp(args, istr, ostr);
818  break;
820  xProcessAlignment(args, istr, ostr);
821  break;
823  xProcess5ColFeatTable(args, istr, ostr);
824  break;
826  xProcessFasta(args, istr, ostr);
827  break;
828  case CFormatGuess::eRmo:
829  xProcessRmo(args, istr, ostr);
830  break;
831  //case CFormatGuess::eHgvs:
832  // xProcessHgvs(args, istr, ostr);
833  // break;
834  }
835  }
836  catch(const CReaderMessage& message) {
837  message.Dump(cerr);
838  retCode = false;
839  }
840  catch(const ILineError&) {
841  AutoPtr<ILineError> line_error_p =
843  eDiag_Fatal, "Reading aborted due to fatal error.");
844  //m_pErrors->PutError(reader_ex); // duplicate!
845  m_pErrors->PutError(*line_error_p);
846  retCode = false;
847  }
848  catch(const CException& e) {
849  CNcbiOstrstream os;
850  os << e.GetMsg();
851  CNcbiOstrstream osEx;
852  e.ReportExtra(osEx);
853  if (!IsOssEmpty(osEx)) {
854  os << " (" << CNcbiOstrstreamToString(osEx) << ')';
855  }
856  AutoPtr<ILineError> line_error_p =
858  "Reading aborted due to fatal error: " + CNcbiOstrstreamToString(os));
859  m_pErrors->PutError(*line_error_p);
860  retCode = false;
861  }
862  catch(const std::exception & std_ex) {
863  AutoPtr<ILineError> line_error_p =
865  eDiag_Fatal,
866  FORMAT(
867  "Reading aborted due to fatal error: " << std_ex.what()));
868  m_pErrors->PutError(*line_error_p);
869  retCode = false;
870  }
871  catch(int) {
872  // hack on top of hackish reporting system
873  retCode = false;
874  }
875  catch(...) {
876  AutoPtr<ILineError> line_error_p =
878  eDiag_Fatal, "Unknown Fatal Error occurred");
879  m_pErrors->PutError(*line_error_p);
880  retCode = false;
881  }
882  xDumpErrors( cerr );
883  return retCode;
884 }
885 
886 // ----------------------------------------------------------------------------
888  const CArgs& args,
889  CNcbiIstream& istr,
890  CNcbiOstream& ostr)
891 // ----------------------------------------------------------------------------
892 {
893  typedef list<CRef<CSeq_annot> > ANNOTS;
894  ANNOTS annots;
895 
896  unique_ptr<CReaderBase> pReader(
898  if (!pReader) {
900  eDiag_Fatal, 1, "File format not supported");
901  throw(fatal);
902  }
903  if (ShowingProgress()) {
904  pReader->SetProgressReportInterval(10);
905  }
906  //TestCanceler canceler;
907  //pReader->SetCanceler(&canceler);
908  pReader->ReadSeqAnnots(annots, istr, m_pErrors.get());
909  for (CRef<CSeq_annot> cit : annots) {
910  xWriteObject(args, *cit, ostr);
911  }
912 }
913 
914 // ----------------------------------------------------------------------------
916  const CArgs& args,
917  CNcbiIstream& istr,
918  CNcbiOstream& ostr)
919 // ----------------------------------------------------------------------------
920 {
921  typedef list<CRef<CSeq_annot> > ANNOTS;
922  ANNOTS annots;
923 
925  if (ShowingProgress()) {
926  reader.SetProgressReportInterval(10);
927  }
928  //TestCanceler canceler;
929  //reader.SetCanceler(&canceler);
930  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
931  for (CRef<CSeq_annot> cit : annots) {
932  xWriteObject(args, *cit, ostr);
933  }
934 }
935 
936 // ----------------------------------------------------------------------------
938  const CArgs& args,
939  CNcbiIstream& istr,
940  CNcbiOstream& ostr)
941 // ----------------------------------------------------------------------------
942 {
943  CWiggleReader reader(m_iFlags);
944  CStreamLineReader lr(istr);
945  CRawWiggleTrack raw;
946  while (reader.ReadTrackData(lr, raw)) {
947  raw.Dump(cerr);
948  }
949 }
950 
951 // ----------------------------------------------------------------------------
953  const CArgs& args,
954  CNcbiIstream& istr,
955  CNcbiOstream& ostr)
956 {
957  // Use ReadSeqAnnot() over ReadSeqAnnots() to keep memory footprint down.
958  CUCSCRegionReader reader(m_iFlags);
959  CStreamLineReader lr(istr);
960  CRef<CSerialObject> pAnnot = reader.ReadObject(lr, m_pErrors.get());
961  if (pAnnot) {
962  xWriteObject(args, *pAnnot, ostr);
963  }
964 }
965 // ----------------------------------------------------------------------------
967  const CArgs& args,
968  CNcbiIstream& istr,
969  CNcbiOstream& ostr)
970 // ----------------------------------------------------------------------------
971 {
972  // Use ReadSeqAnnot() over ReadSeqAnnots() to keep memory footprint down.
974  if (args["autosql"]) {
975  if (!reader.SetAutoSql(args["autosql"].AsString())) {
976  return false;
977  }
978  }
979  if (ShowingProgress()) {
980  reader.SetProgressReportInterval(10);
981  }
982  //TestCanceler canceler;
983  //reader.SetCanceler(&canceler);
984  CStreamLineReader lr( istr );
985  CRef<CSeq_annot> pAnnot = reader.ReadSeqAnnot(lr, m_pErrors.get());
986  while(pAnnot) {
987  xWriteObject(args, *pAnnot, ostr);
988  pAnnot.Reset();
989  pAnnot = reader.ReadSeqAnnot(lr, m_pErrors.get());
990  }
991  return true;
992 }
993 
994 // ----------------------------------------------------------------------------
996  const CArgs& args,
997  CNcbiIstream& istr,
998  CNcbiOstream& ostr)
999 // ----------------------------------------------------------------------------
1000 {
1002  CStreamLineReader lr(istr);
1003  CRawBedTrack raw;
1004  while (reader.ReadTrackData(lr, raw)) {
1005  raw.Dump(cerr);
1006  }
1007 }
1008 
1009 // ----------------------------------------------------------------------------
1011  const CArgs& args,
1012  CNcbiIstream& istr,
1013  CNcbiOstream& ostr)
1014 // ----------------------------------------------------------------------------
1015 {
1016  typedef CGff2Reader::TAnnotList ANNOTS;
1017  ANNOTS annots;
1018 
1019  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1020  return xProcessGff2(args, istr, ostr);
1021  }
1023  if (ShowingProgress()) {
1024  reader.SetProgressReportInterval(10);
1025  }
1026  //TestCanceler canceler;
1027  //reader.SetCanceler(&canceler);
1028  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1029  for (CRef<CSeq_annot> it : annots) {
1030  xPostProcessAnnot(args, *it);
1031  xWriteObject(args, *it, ostr);
1032  }
1033 }
1034 
1035 // ----------------------------------------------------------------------------
1037  const CArgs& args,
1038  CNcbiIstream& istr,
1039  CNcbiOstream& ostr)
1040 // ----------------------------------------------------------------------------
1041 {
1042  typedef CGff2Reader::TAnnotList ANNOTS;
1043  ANNOTS annots;
1044 
1045  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1046  return xProcessGff2(args, istr, ostr);
1047  }
1050  if (ShowingProgress()) {
1051  reader.SetProgressReportInterval(10);
1052  }
1053  //TestCanceler canceler;
1054  //reader.SetCanceler(&canceler);
1055  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1056  for (CRef<CSeq_annot> it : annots) {
1057  const auto& data = it->GetData();
1058  if (data.IsFtable()) {
1059  const auto& features = it->GetData().GetFtable();
1060  if (features.empty()) {
1061  continue;
1062  }
1063  auto pLocationMerger = reader.GetLocationMerger();
1064  xPostProcessAnnot(args, *it, pLocationMerger.get());
1065  }
1066  else {
1067  xPostProcessAnnot(args, *it);
1068  }
1069  xWriteObject(args, *it, ostr);
1070  }
1071 }
1072 
1073 // ----------------------------------------------------------------------------
1075  const CArgs& args,
1076  CNcbiIstream& istr,
1077  CNcbiOstream& ostr)
1078 // ----------------------------------------------------------------------------
1079 {
1080  typedef CGff2Reader::TAnnotList ANNOTS;
1081  ANNOTS annots;
1082 
1084  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1085  for (CRef<CSeq_annot> cit : annots) {
1086  xWriteObject(args, *cit, ostr);
1087  }
1088 }
1089 
1090 /*// ----------------------------------------------------------------------------
1091 void CMultiReaderApp::xProcessHgvs(
1092  const CArgs& args,
1093  CNcbiIstream& istr,
1094  CNcbiOstream& ostr)
1095 // ----------------------------------------------------------------------------
1096 {
1097  typedef vector<CRef<CSeq_annot> > ANNOTS;
1098  ANNOTS annots;
1099 
1100  CHgvsReader reader;
1101  reader.ReadSeqAnnots(annots, istr, m_pErrors);
1102  for (CRef<CSeq_annot> cit : annots) {
1103  xWriteObject(args, *cit, ostr);
1104  }
1105 }*/
1106 
1107 // ----------------------------------------------------------------------------
1109  const CArgs& args,
1110  CNcbiIstream& istr,
1111  CNcbiOstream& ostr)
1112 // ----------------------------------------------------------------------------
1113 {
1114  typedef CGff2Reader::TAnnotList ANNOTS;
1115  ANNOTS annots;
1116 
1117  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1118  return xProcessGff2(args, istr, ostr);
1119  }
1120  if (args["format"].AsString() == "gff3") { // process as plain GFF3
1121  return xProcessGff3(args, istr, ostr);
1122  }
1124  if (ShowingProgress()) {
1125  reader.SetProgressReportInterval(10);
1126  }
1127  //TestCanceler canceler;
1128  //reader.SetCanceler(&canceler);
1129  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1130  for (CRef<CSeq_annot> cit : annots) {
1131  xWriteObject(args, *cit, ostr);
1132  }
1133 }
1134 
1135 // ----------------------------------------------------------------------------
1137  const CArgs& args,
1138  CNcbiIstream& istr,
1139  CNcbiOstream& ostr)
1140 // ----------------------------------------------------------------------------
1141 {
1142  while (!istr.eof()) {
1143  unique_ptr<TPhyTreeNode> pTree(ReadNewickTree(istr));
1145  pTree.get());
1146  xWriteObject(args, *btc, ostr);
1147  }
1148 }
1149 
1150 
1151 // ----------------------------------------------------------------------------
1153  const CArgs& args,
1154  CNcbiIstream& istr,
1155  CNcbiOstream& ostr)
1156 // ----------------------------------------------------------------------------
1157 {
1158  CAgpToSeqEntry reader(m_iFlags);
1159 
1160  const int iErrCode = reader.ReadStream(istr);
1161  if( iErrCode != 0 ) {
1163  "AGP reader failed with code " +
1164  NStr::NumericToString(iErrCode), 0 );
1165  }
1166 
1168  xWriteObject(args, **it, ostr);
1169  }
1170 }
1171 
1172 // ----------------------------------------------------------------------------
1174  const CArgs& args,
1175  CNcbiIstream& istr,
1176  CNcbiOstream& ostr)
1177 // ----------------------------------------------------------------------------
1178 {
1179  if (!istr) {
1180  return;
1181  }
1183  CRef<ILineReader> pLineReader = ILineReader::New(istr);
1184  while(!pLineReader->AtEOF()) {
1185  CRef<CSeq_annot> pSeqAnnot =
1186  reader.ReadSeqAnnot(*pLineReader, m_pErrors.get());
1187  if( pSeqAnnot &&
1188  pSeqAnnot->IsFtable() &&
1189  !pSeqAnnot->GetData().GetFtable().empty()) {
1190  xWriteObject(args, *pSeqAnnot, ostr);
1191  }
1192  }
1193 }
1194 
1195 // ----------------------------------------------------------------------------
1197  const CArgs& args,
1198  CNcbiIstream& istr,
1199  CNcbiOstream& ostr)
1200 // ----------------------------------------------------------------------------
1201 {
1202  CRepeatMaskerReader reader;
1203  CRef<ILineReader> pLineReader = ILineReader::New(istr);
1204  while(istr) {
1205  CRef<CSeq_annot> pSeqAnnot =
1206  reader.ReadSeqAnnot(*pLineReader, m_pErrors.get());
1207  if( ! pSeqAnnot || ! pSeqAnnot->IsFtable() ||
1208  pSeqAnnot->GetData().GetFtable().empty() )
1209  {
1210  // empty annot
1211  break;
1212  }
1213  xWriteObject(args, *pSeqAnnot, ostr);
1214  }
1215 }
1216 
1217 // ----------------------------------------------------------------------------
1219  const CArgs& args,
1220  CNcbiIstream& istr,
1221  CNcbiOstream& ostr)
1222 // ----------------------------------------------------------------------------
1223 {
1224  CStreamLineReader line_reader(istr);
1225 
1226  CFastaReader reader(line_reader, m_iFlags);
1227  auto maxIdLength = args["max-id-length"].AsInteger();
1228  if (maxIdLength != 0) {
1229  reader.SetMaxIDLength(maxIdLength);
1230  }
1231 
1232  CRef<CSeq_entry> pSeqEntry = reader.ReadSeqEntry(line_reader, m_pErrors.get());
1233  xWriteObject(args, *pSeqEntry, ostr);
1234 }
1235 
1236 // ----------------------------------------------------------------------------
1238  const CArgs& args,
1239  CNcbiIstream& istr,
1240  CNcbiOstream& ostr)
1241 // ----------------------------------------------------------------------------
1242 {
1243  CFastaReader::TFlags fFlags = 0;
1244  if( args["parse-mods"] ) {
1245  fFlags |= CFastaReader::fAddMods;
1246  }
1247  CAlnReader reader(istr);
1249  if (args["aln-alphabet"].AsString() == "nuc") {
1251  }
1252  try {
1254  (args["all-ids-as-local"].AsBoolean() ?
1257  reader.Read(flags, m_pErrors.get());
1258  CRef<CSeq_entry> pEntry = reader.GetSeqEntry(fFlags, m_pErrors.get());
1259  if (pEntry) {
1260  xWriteObject(args, *pEntry, ostr);
1261  }
1262  }
1263  catch (std::exception&) {
1264  }
1265 }
1266 
1267 // ----------------------------------------------------------------------------
1269  const CArgs& args,
1270  CNcbiIstream& istr )
1271 // ----------------------------------------------------------------------------
1272 {
1274  string format = args["format"].AsString();
1275  const string& strProgramName = GetProgramDisplayName();
1276 
1277  if (NStr::StartsWith(strProgramName, "wig") || format == "wig" ||
1278  format == "wiggle" || format == "bedgraph") {
1280  return;
1281  }
1282  if (NStr::StartsWith(strProgramName, "bed") || format == "bed") {
1284  return;
1285  }
1286  if (NStr::StartsWith(strProgramName, "b15") || format == "bed15" ||
1287  format == "microarray") {
1289  return;
1290  }
1291  if (NStr::StartsWith(strProgramName, "gtf") || format == "gtf") {
1293  return;
1294  }
1295  if (NStr::StartsWith(strProgramName, "gtf") || format == "augustus") {
1297  return;
1298  }
1299  if (NStr::StartsWith(strProgramName, "gff3") || format == "gff3") {
1301  return;
1302  }
1303  if (NStr::StartsWith(strProgramName, "gff2") || format =="gff2") {
1305  return;
1306  }
1307  if (NStr::StartsWith(strProgramName, "agp")) {
1309  return;
1310  }
1311 
1312  if (NStr::StartsWith(strProgramName, "newick") ||
1313  format == "newick" || format == "tree" || format == "tre") {
1315  return;
1316  }
1317  if (NStr::StartsWith(strProgramName, "gvf") || format == "gvf") {
1319  return;
1320  }
1321  if (NStr::StartsWith(strProgramName, "aln") || format == "align" ||
1322  format == "aln") {
1324  return;
1325  }
1326  if (NStr::StartsWith(strProgramName, "hgvs") || format == "hgvs") {
1328  return;
1329  }
1330  if( NStr::StartsWith(strProgramName, "fasta") || format == "fasta" ) {
1332  return;
1333  }
1334  if( NStr::StartsWith(strProgramName, "feattbl") || format == "5colftbl" ) {
1336  return;
1337  }
1338  if( NStr::StartsWith(strProgramName, "vcf") || format == "vcf" ) {
1340  return;
1341  }
1342  if( NStr::StartsWith(strProgramName, "ucsc") || format == "ucsc" ) {
1344  return;
1345  }
1346  if ( NStr::StartsWith(strProgramName, "psl") || format == "psl" ) {
1348  return;
1349  }
1352  }
1353 }
1354 
1355 // ----------------------------------------------------------------------------
1357  const CArgs& args,
1358  const string& filename )
1359 // ----------------------------------------------------------------------------
1360 {
1361  CNcbiIfstream istr(filename);
1362  xSetFlags(args, istr);
1363  istr.close();
1364 }
1365 
1366 // ----------------------------------------------------------------------------
1368  const CArgs& args,
1369  CNcbiIstream& istr)
1370 // ----------------------------------------------------------------------------
1371 {
1373  xSetFormat(args, istr);
1374  }
1375 
1376  m_AnnotName = args["name"].AsString();
1377  m_AnnotTitle = args["title"].AsString();
1378  m_bCheckOnly = args["checkonly"];
1379  m_bXmlMessages = args["xmlmessages"];
1380 
1381  switch( m_uFormat ) {
1382 
1383  case CFormatGuess::eWiggle:
1385  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1386  if ( args["join-same"] ) {
1388  }
1389  //by default now. But still tolerate if explicitly specified.
1390  if (!args["as-real"]) {
1392  }
1393  if ( args["as-graph"] ) {
1395  }
1396 
1397  if ( args["raw"] ) {
1399  }
1400  break;
1401 
1402  case CFormatGuess::eBed:
1404  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1405  if ( args["all-ids-as-local"] ) {
1407  }
1408  if ( args["numeric-ids-as-local"] ) {
1410  }
1411  if ( args["raw"] ) {
1413  }
1414  if ( args["3ff"] ) {
1416  }
1417  if ( args["dfm"] ) {
1419  }
1420  break;
1421 
1422  case CFormatGuess::eGtf:
1424  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1425  if ( args["all-ids-as-local"] ) {
1427  }
1428  if ( args["numeric-ids-as-local"] ) {
1430  }
1431  if ( args["child-links"] ) {
1433  }
1434  if (args["genbank-no-locus-tags"]) {
1436  }
1437  if (args["genbank"]) {
1439  if (args["locus-tag"]) {
1441  }
1442  }
1443  break;
1444 
1445  case CFormatGuess::eGff3:
1447  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1448  if ( args["gene-xrefs"] ) {
1450  }
1451  if (args["genbank-no-locus-tags"]) {
1454  }
1455  if ( args["genbank"] ) {
1458  if (args["locus-tag"]) {
1460  }
1461  }
1462  break;
1463 
1464  case CFormatGuess::eFasta: {
1465  auto flagsStr = args["flags"].AsString();
1467  if( args["parse-mods"] ) {
1469  }
1470  if( args["parse-gaps"] ) {
1472  }
1473 
1474  try {
1475  m_iFlags |= NStr::StringToInt(flagsStr, 0, 16);
1476  }
1477  catch(const CStringException&) {
1478  list<string> stringFlags;
1479  NStr::Split(flagsStr, ",", stringFlags);
1480  CFastaReader::AddStringFlags(stringFlags, m_iFlags);
1481  }
1482  break;
1483  }
1484 
1486  auto flagsStr = args["flags"].AsString();
1487  try {
1488  m_iFlags |= NStr::StringToInt(flagsStr, 0, 16);
1489  }
1490  catch (const CStringException&) {
1491  list<string> stringFlags;
1492  NStr::Split(flagsStr, ",", stringFlags);
1494  }
1495  break;
1496  }
1497 
1498  default:
1500  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1501  break;
1502  }
1503 }
1504 
1505 // ----------------------------------------------------------------------------
1507  const CArgs& args,
1508  CSeq_annot& annot,
1509  const CGff3LocationMerger* pLocationMerger)
1510  // ----------------------------------------------------------------------------
1511 {
1512  static unsigned int startingLocusTagNumber = 1;
1513  static unsigned int startingFeatureId = 1;
1514 
1515  if (!args["genbank"].AsBoolean() && !args["genbank-no-locus-tags"].AsBoolean()) {
1516  if (args["cleanup"]) {
1517  CCleanup cleanup;
1518  cleanup.BasicCleanup(annot);
1519  }
1520  return;
1521  }
1522 
1523  // all other processing only applies to feature tables
1524  if (!annot.GetData().IsFtable()) {
1525  return;
1526  }
1527 
1528  string prefix, offset;
1529  if (NStr::SplitInTwo(args["locus-tag"].AsString(), "_", prefix, offset)) {
1531  if (tail != -1) {
1532  startingLocusTagNumber = tail;
1533  }
1534  else {
1535  if (!offset.empty()) {
1536  //bads news
1538  "Invalid locus tag: Only one \"_\", and suffix must be numeric", 0);
1539  }
1540  }
1541  }
1542  else {
1543  prefix = args["locus-tag"].AsString();
1544  }
1545 
1546  edit::CFeatTableEdit fte(
1547  annot, 0, prefix, startingLocusTagNumber, startingFeatureId, m_pErrors.get());
1548  fte.InferPartials();
1549  fte.GenerateMissingParentFeatures(args["euk"].AsBoolean(), pLocationMerger);
1550  if (args["genbank"].AsBoolean() && !fte.AnnotHasAllLocusTags()) {
1551  if (!prefix.empty()) {
1552  fte.GenerateLocusTags();
1553  }
1554  else {
1555  AutoPtr<ILineError> line_error_p =
1557  eDiag_Fatal, "Need prefix to generate missing locus tags but none was provided");
1558  this->WriteMessageImmediately(cerr, *line_error_p);
1559  throw(0);
1560  }
1561  }
1562  fte.GenerateProteinAndTranscriptIds();
1563  //fte.InstantiateProducts();
1564  fte.ProcessCodonRecognized();
1565  fte.EliminateBadQualifiers();
1566  fte.SubmitFixProducts();
1567 
1568  startingLocusTagNumber = fte.PendingLocusTagNumber();
1569  startingFeatureId = fte.PendingFeatureId();
1570 
1571  CCleanup cleanup;
1572  cleanup.BasicCleanup(annot);
1573 }
1574 
1575 
1576 // ----------------------------------------------------------------------------
1578  EDiagSev eDiagSev, const string & msg)
1579 // ----------------------------------------------------------------------------
1580 {
1581  // For creating quick messages generated by CMultiReaderApp itself
1582  class CLineErrorForMsg : public CLineError
1583  {
1584  public:
1585  CLineErrorForMsg(EDiagSev eDiagSev, const string & msg)
1586  : CLineError(
1588  eDiagSev,
1590  NStr::TruncateSpaces(msg),
1592  };
1593  return AutoPtr<ILineError>(new CLineErrorForMsg(eDiagSev, msg));
1594 }
1595 
1596 
1597 // ----------------------------------------------------------------------------
1599  ostream & ostr, const ILineError & line_error)
1600 // ----------------------------------------------------------------------------
1601 {
1602  // For example, progress messages and fatal errors should be written
1603  // immediately.
1604  if( m_bXmlMessages ) {
1605  line_error.DumpAsXML(ostr);
1606  } else {
1607  line_error.Dump(ostr);
1608  }
1609  ostr.flush();
1610 }
1611 
1612 // ----------------------------------------------------------------------------
1614  const CArgs & args,
1615  CSerialObject& object, // potentially modified by mapper
1616  CNcbiOstream& ostr)
1617 // ----------------------------------------------------------------------------
1618 {
1619  if (m_pMapper.get()) {
1620  m_pMapper->MapObject(object);
1621  }
1622  if (m_bCheckOnly) {
1623  return;
1624  }
1625  const string out_format = args["out-format"].AsString();
1626  unique_ptr<MSerial_Format> pOutFormat;
1627  if( out_format == "asn_text" ) {
1628  pOutFormat.reset( new MSerial_Format_AsnText );
1629  } else if( out_format == "asn_binary" ) {
1630  pOutFormat.reset( new MSerial_Format_AsnBinary );
1631  } else if( out_format == "xml" ) {
1632  pOutFormat.reset( new MSerial_Format_Xml );
1633  } else if( out_format == "json" ) {
1634  pOutFormat.reset( new MSerial_Format_Json );
1635  } else {
1636  NCBI_USER_THROW_FMT("Unsupported out-format: " << out_format);
1637  }
1638  ostr << *pOutFormat << object;
1639  ostr.flush();
1640 }
1641 
1642 // ----------------------------------------------------------------------------
1643 void
1645  const CArgs& args)
1646 // ----------------------------------------------------------------------------
1647 {
1648  string strBuild = args["genome"].AsString();
1649  string strMapFile = args["mapfile"].AsString();
1650 
1651  if (strBuild.empty() && strMapFile.empty()) {
1652  return;
1653  }
1654  if (!strMapFile.empty()) {
1655  CNcbiIfstream* pMapFile = new CNcbiIfstream(strMapFile);
1656  m_pMapper.reset(
1657  new CIdMapperConfig(*pMapFile, strBuild, false, m_pErrors.get()));
1658  }
1659  else {
1660  m_pMapper.reset(new CIdMapperBuiltin(strBuild, false, m_pErrors.get()));
1661  }
1662 }
1663 
1664 // ----------------------------------------------------------------------------
1665 void
1667  const CArgs& args )
1668 // ----------------------------------------------------------------------------
1669 {
1670 
1671  //
1672  // By default, allow all errors up to the level of "warning" but nothing
1673  // more serious. -strict trumps everything else, -lenient is the second
1674  // strongest. In the absence of -strict and -lenient, -max-error-count and
1675  // -max-error-level become additive, i.e. both are enforced.
1676  //
1677  if ( args["noerrors"] ) { // not using error policy at all
1678  return;
1679  }
1680  m_showingProgress = args["show-progress"];
1681 
1682  if ( args["strict"] ) {
1683  m_pErrors.reset(new CMessageListenerStrict());
1684  } else if ( args["lenient"] ) {
1685  m_pErrors.reset(new CMessageListenerLenient());
1686  } else {
1687  int iMaxErrorCount = args["max-error-count"].AsInteger();
1688  int iMaxErrorLevel = eDiag_Warning;
1689  string strMaxErrorLevel = args["max-error-level"].AsString();
1690  if ( strMaxErrorLevel == "info" ) {
1691  iMaxErrorLevel = eDiag_Info;
1692  }
1693  else if ( strMaxErrorLevel == "error" ) {
1694  iMaxErrorLevel = eDiag_Error;
1695  }
1696 
1697  if ( iMaxErrorCount == -1 ) {
1698  m_pErrors.reset(
1699  new CMyMessageListenerCustomLevel(iMaxErrorLevel, *this));
1700  } else {
1701  m_pErrors.reset(
1703  iMaxErrorCount, iMaxErrorLevel, *this));
1704  }
1705  }
1706  // if progress requested, wrap the m_pErrors so that progress is shown
1707  if (ShowingProgress()) {
1708  m_pErrors->SetProgressOstream( &cerr );
1709  }
1710 }
1711 
1712 // ----------------------------------------------------------------------------
1714  CNcbiOstream& ostr)
1715 // ----------------------------------------------------------------------------
1716 {
1717  if (m_pErrors && m_pErrors->Count() > 0 ) {
1718  if( m_bXmlMessages ) {
1719  m_pErrors->DumpAsXML(ostr);
1720  } else {
1721  m_pErrors->Dump(ostr);
1722  }
1723  }
1724 }
1725 
1726 // ----------------------------------------------------------------------------
1727 int main(int argc, const char* argv[])
1728 // ----------------------------------------------------------------------------
1729 {
1730  // Execute main application function
1731  return CMultiReaderApp().AppMain(argc, argv);
1732 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void fatal(const char *msg,...)
Definition: attributes.c:18
static CBioSource dummy
AutoPtr –.
Definition: ncbimisc.hpp:401
virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)
Read an AGP file from the given input stream.
Definition: agp_util.cpp:1084
This class is used to turn an AGP file into a vector of Seq-entry's.
vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec
This is the way the results will be returned Each Seq-entry contains just one Bioseq,...
TSeqEntryRefVec & GetResult(void)
This gets the results found, but don't call before finalizing.
class CAlnReader supports importing a large variety of text-based alignment formats into standard dat...
Definition: aln_reader.hpp:100
void Read(bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr)
EReadFlags
Read the file This are the main functions.
Definition: aln_reader.hpp:208
void SetAlphabet(const string &value)
Definition: aln_reader.hpp:371
CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)
Definition: aln_reader.cpp:722
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CReaderBase implementation that reads BED data files, either a single object or all objects found.
Definition: bed_reader.hpp:109
virtual bool ReadTrackData(ILineReader &, CRawBedTrack &, ILineErrorListener *=nullptr)
virtual bool SetAutoSql(const string &)
Definition: bed_reader.cpp:278
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read a single object from given line reader containing BED data.
Definition: bed_reader.cpp:267
@ fDirectedFeatureModel
Definition: bed_reader.hpp:127
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: readfeat.cpp:3606
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
Definition: readfeat.cpp:3738
Class implements different ad-hoc unreliable file format identifications.
EFormat
The formats are checked in the same order as declared here.
@ eFiveColFeatureTable
Five-column feature table.
@ eVcf
VCF, CVcfReader.
@ eGff2
GFF2, CGff2Reader, any GFF-like that doesn't fit the others.
@ eBed
UCSC BED file format, CBedReader.
@ eGtf
New GTF, CGtfReader.
@ eGvf
GVF, CGvfReader.
@ eHgvs
HGVS, CHgvsParser.
@ eAgp
AGP format assembly, AgpRead.
@ eGff3
GFF3, CGff3Reader.
@ eGtf_POISENED
Old and Dead GFF/GTF style annotations.
@ eNewick
Newick file.
@ eFasta
FASTA format sequence record, CFastaReader.
@ eUnknown
unknown format
@ eGffAugustus
GFFish output of Augustus Gene Prediction.
@ eRmo
RepeatMasker Output.
@ eUCSCRegion
USCS Region file format.
@ eAlignment
Text alignment.
@ ePsl
PSL alignment format.
@ eBed15
UCSC BED15 or microarray format.
@ eWiggle
UCSC WIGGLE file format.
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=nullptr) override
Read all objects from given insput stream, returning them as a vector of Seq-annots.
shared_ptr< CGff3LocationMerger > GetLocationMerger()
@ fGenerateChildXrefs
Definition: gtf_reader.hpp:218
IdMapper implementation using hardcoded values.
Definition: idmapper.hpp:275
IdMapper implementation using an external configuration file.
Definition: idmapper.hpp:189
size_t Count() const override
void StoreError(const ILineError &err)
void StoreMessage(const IObjtoolsMessage &message)
bool Next(const std::string &, string &)
bool Next(std::string &)
void xProcessGff2(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggle(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xDumpErrors(CNcbiOstream &)
unique_ptr< CMessageListenerBase > m_pErrors
void xSetMessageListener(const CArgs &)
void xSetMapper(const CArgs &)
void xWriteObject(const CArgs &, CSerialObject &, CNcbiOstream &)
void xProcessAgp(const CArgs &, CNcbiIstream &, CNcbiOstream &)
bool xProcessBed(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void WriteMessageImmediately(ostream &ostr, const ILineError &line_error_p)
void xProcessAlignment(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessRmo(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessDefault(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessFasta(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessBedRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGtf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGff3(const CArgs &, CNcbiIstream &, CNcbiOstream &)
unique_ptr< CObjtoolsListener > m_pEditErrors
void xProcessUCSCRegion(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xPostProcessAnnot(const CArgs &, CSeq_annot &, const CGff3LocationMerger *=nullptr)
CFormatGuess::EFormat m_uFormat
int Run() override
Run the application.
void xSetFormat(const CArgs &, CNcbiIstream &)
void Init() override
Initialize the application.
void xSetFlags(const CArgs &, const string &)
bool ShowingProgress() const
unique_ptr< CIdMapper > m_pMapper
bool xProcessSingleFile(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessNewick(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggleRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGvf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcess5ColFeatTable(const CArgs &, CNcbiIstream &, CNcbiOstream &)
static AutoPtr< ILineError > sCreateSimpleMessage(EDiagSev eDiagSev, const string &msg)
bool PutMessage(const IObjtoolsMessage &message)
CMultiReaderApp & m_multi_reader_app
CMyMessageListenerCustomLevel(int level, CMultiReaderApp &multi_reader_app)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
This is used for processing progress messages.
CMyMessageListenerCustom(int iMaxCount, int iMaxLevel, CMultiReaderApp &multi_reader_app)
bool PutMessage(const IObjtoolsMessage &message)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
This is used for processing progress messages.
bool PutError(const ILineError &err)
Store error in the container, and return true if error was stored fine, and return false if the calle...
CMultiReaderApp & m_multi_reader_app
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
virtual void Dump(CNcbiOstream &out) const
Definition: message.cpp:71
void Dump(CNcbiOstream &ostr) const
Definition: bed_reader.cpp:224
void Dump(CNcbiOstream &ostr) const
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
@ fNumericIdsAsLocal
numeric identifiers are local IDs
Definition: reader_base.hpp:76
@ fAllIdsAsLocal
all identifiers are local IDs
Definition: reader_base.hpp:78
list< CRef< CSeq_annot > > TAnnotList
Definition: reader_base.hpp:90
void SetProgressReportInterval(unsigned int intv)
virtual void ReadSeqAnnots(TAnnots &annots, CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read all objects from given insput stream, returning them as a vector of Seq-annots.
static CReaderBase * GetReader(CFormatGuess::EFormat format, TReaderFlags flags=0, CReaderListener *=nullptr)
Allocate a CReaderBase derived reader object based on the given file format.
virtual void Write(CNcbiOstream &out) const override
virtual EDiagSev Severity() const
Implements a concrete class for reading RepeatMasker output from tabular form and rendering it as ASN...
Definition: rm_reader.hpp:690
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pMessageListener=0)
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: rm_reader.cpp:775
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
Base class for all serializable objects.
Definition: serialbase.hpp:150
Simple implementation of ILineReader for i(o)streams.
CStringException –.
Definition: ncbistr.hpp:4506
CRef< CSerialObject > ReadObject(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
CVersionInfo –.
virtual bool ReadTrackData(ILineReader &, CRawWiggleTrack &, ILineErrorListener *=nullptr)
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
virtual void DumpAsXML(CNcbiOstream &out) const
Definition: line_error.hpp:372
virtual void Dump(CNcbiOstream &out) const
Definition: line_error.hpp:362
virtual EDiagSev Severity(void) const
Definition: line_error.hpp:370
@ eProblem_GeneralParsingError
Definition: line_error.hpp:106
@ eProblem_ProgressInfo
Definition: line_error.hpp:104
vector< unsigned int > TVecOfLines
Definition: line_error.hpp:128
virtual EProblem Problem(void) const =0
virtual EDiagSev GetSeverity(void) const =0
bool IsCanceled() const
static void cleanup(void)
Definition: ct_dynamic.c:30
static uch flags
CRef< objects::CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
Operators to edit gaps in sequences.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
const string & GetProgramDisplayName(void) const
Get the application's "display" name.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void SetVersion(const CVersionInfo &version)
Set the version number for the program.
Definition: ncbiapp.cpp:1135
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
@ eDirectory
Name of file directory.
Definition: ncbiargs.hpp:598
@ fBinary
Open file in binary mode.
Definition: ncbiargs.hpp:263
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
virtual void ReportExtra(ostream &out) const
Report "non-standard" attributes.
Definition: ncbiexpt.cpp:428
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
Definition: ncbiexpt.hpp:724
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
#define FORMAT(message)
Format message using iostreams library.
Definition: ncbiexpt.hpp:672
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
virtual CRef< CSeq_entry > ReadSeqEntry(ILineReader &lr, ILineErrorListener *pErrors)
Read an object from a given line reader, render it as a single Seq-entry, if possible.
Definition: fasta.cpp:304
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
Definition: fasta.cpp:222
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void SetMaxIDLength(Uint4 max_len)
If this is set, an exception will be thrown if a Sequence ID exceeds the given length.
Definition: fasta.cpp:485
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fNoSplit
Don't split out ambiguous sequence regions.
Definition: fasta.hpp:99
@ fParseGaps
Make a delta sequence if gaps found.
Definition: fasta.hpp:91
@ fDisableParseRange
No ranges in seq-ids. Ranges part of seq-id instead.
Definition: fasta.hpp:114
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
bool IsOssEmpty(CNcbiOstrstream &oss)
Definition: ncbistre.hpp:831
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
Definition: ncbistr.cpp:457
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
bool IsFtable(void) const
Check if variant Ftable is selected.
Definition: Seq_annot_.hpp:615
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
Lightweight interface for getting lines of data with minimal memory copying.
USING_SCOPE(objects)
string s_AlnErrorToString(const CAlnError &error)
Definition: multireader.cpp:95
CMultiReaderMessageListener newStyleMessageListener
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Definition: multireader.cpp:90
constexpr bool empty(list< Ts... >) noexcept
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
#define nullptr
Definition: ncbimisc.hpp:45
The NCBI C++/STL use hints.
static Format format
Definition: njn_ioutil.cpp:53
static const char * prefix[]
Definition: pcregrep.c:405
TPhyTreeNode * ReadNewickTree(CNcbiIstream &is)
Newick format input.
int offset
Definition: replacements.h:160
static const char *const features[]
Modified on Thu Mar 28 17:11:47 2024 by modify_doxy.py rev. 669887