NCBI C++ ToolKit
multireader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: multireader.cpp 100597 2023-08-15 14:28:46Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig, NCBI
27 *
28 * File Description:
29 * Reader for selected data file formats
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbistl.hpp>
38 #include <corelib/ncbi_system.hpp>
39 #include <util/format_guess.hpp>
40 #include <util/line_reader.hpp>
41 
42 #include <serial/iterator.hpp>
43 #include <serial/objistr.hpp>
44 #include <serial/objostr.hpp>
45 #include <serial/objostrasn.hpp>
46 #include <serial/serial.hpp>
47 
53 
75 //#include <misc/hgvs/hgvs_reader.hpp>
76 
79 
83 
85 
86 #include "multifile_source.hpp"
88 
91 
92 //class CGff3LocationMerger;
93 
95 {
96  return FORMAT(
97  "At ID '" << error.GetID() << "' "
98  "in category '" << static_cast<int>(error.GetCategory()) << "' "
99  "at line " << error.GetLineNum() << ": "
100  << error.GetMsg() << "'");
101 }
102 
103 
104 // ============================================================================
105 class TestCanceler: public ICanceled
106 // ============================================================================
107 {
108  bool IsCanceled() const { return false; };
109 };
110 
111 
112 // ============================================================================
114  public CReaderListener
115 // ============================================================================
116 {
117 public:
119  const IObjtoolsMessage& message)
120  {
121  const CReaderMessage* pReaderMessage =
122  dynamic_cast<const CReaderMessage*>(&message);
123  if (!pReaderMessage || pReaderMessage->Severity() == eDiag_Fatal) {
124  throw;
125  }
126  pReaderMessage->Write(cerr);
127  return true;
128  };
129 };
130 
131 // ============================================================================
133 // ============================================================================
134  : public CNcbiApplication
135 {
136 public:
138  {
139  SetVersion(CVersionInfo(1, 0, 2));
140  }
141 
142  // Create quick simple messages
144  EDiagSev eDiagSev, const string & msg);
146  ostream & ostr, const ILineError & line_error_p);
147  bool ShowingProgress() const { return m_showingProgress; };
148 protected:
149 
150 private:
151  void Init() override;
152  int Run() override;
153 
158  bool xProcessBed(const CArgs&, CNcbiIstream&, CNcbiOstream&);
161  void xProcessGtf(const CArgs&, CNcbiIstream&, CNcbiOstream&);
163  void xProcessGff3(const CArgs&, CNcbiIstream&, CNcbiOstream&);
164  void xProcessGff2(const CArgs&, CNcbiIstream&, CNcbiOstream&);
165  void xProcessGvf(const CArgs&, CNcbiIstream&, CNcbiOstream&);
167  void xProcessAgp(const CArgs&, CNcbiIstream&, CNcbiOstream&);
169  void xProcessFasta(const CArgs&, CNcbiIstream&, CNcbiOstream&);
170  void xProcessRmo(const CArgs&, CNcbiIstream&, CNcbiOstream&);
171  //void xProcessHgvs(const CArgs&, CNcbiIstream&, CNcbiOstream&);
172 
173  void xSetFormat(const CArgs&, CNcbiIstream&);
174  void xSetFlags(const CArgs&, const string&);
175  void xSetFlags(const CArgs&, CNcbiIstream&);
176  void xSetMapper(const CArgs&);
177  void xSetMessageListener(const CArgs&);
178 
179  void xPostProcessAnnot(const CArgs&, CSeq_annot&, const CGff3LocationMerger* =nullptr);
180  void xWriteObject(const CArgs&, CSerialObject&, CNcbiOstream&);
181  void xDumpErrors(CNcbiOstream& );
182 
186  long m_iFlags;
187  string m_AnnotName;
188  string m_AnnotTitle;
191 
192  unique_ptr<CIdMapper> m_pMapper;
193  unique_ptr<CMessageListenerBase> m_pErrors;
194  unique_ptr<CObjtoolsListener> m_pEditErrors;
195 };
196 
197 
198 
199 // ============================================================================
201 // ============================================================================
202  public CMessageListenerBase
203 {
204 public:
206  int iMaxCount,
207  int iMaxLevel,
208  CMultiReaderApp & multi_reader_app)
209  : m_iMaxCount(iMaxCount), m_iMaxLevel(iMaxLevel),
210  m_multi_reader_app(multi_reader_app)
211  {};
212 
214 
215  bool
217  const IObjtoolsMessage& message)
218  {
219  StoreMessage(message);
220  return (message.GetSeverity() <= m_iMaxLevel) && (Count() < m_iMaxCount);
221  };
222 
223  bool
225  const ILineError& err)
226  {
229  return true;
230  }
231  StoreError(err);
232  return (err.Severity() <= m_iMaxLevel) && (Count() < m_iMaxCount);
233  }
234 
235  void
237  const string& msg,
238  const Uint8 bytesDone,
239  const Uint8 dummy)
240  {
242  return;
243  }
244  AutoPtr<ILineError> line_error_p =
246  eDiag_Info,
247  FORMAT("Progress: " << bytesDone << " bytes done."));
248  m_multi_reader_app.WriteMessageImmediately(cerr, *line_error_p);
249  //if (bytesDone > 1000000) {
250  // bIsCanceled = true;
251  //}
252  };
253 
254 protected:
255  size_t m_iMaxCount;
258 };
259 
260 // ============================================================================
262 // ============================================================================
263  public CMessageListenerLevel
264 {
265 public:
267  int level, CMultiReaderApp & multi_reader_app)
268  : CMessageListenerLevel(level),
269  m_multi_reader_app(multi_reader_app) {};
270 
271  void
273  const string& msg,
274  const Uint8 bytesDone,
275  const Uint8 dummy)
276  {
278  return;
279  }
280  AutoPtr<ILineError> line_error_p =
282  eDiag_Info,
283  FORMAT(msg << " (" << bytesDone << " bytes)"));
284  m_multi_reader_app.WriteMessageImmediately(cerr, *line_error_p);
285  //if (bytesDone > 1000000) {
286  // bIsCanceled = true;
287  //}
288  };
289 
290 protected:
291  size_t m_iMaxCount;
294 };
295 
296 // ============================================================================
298 // ============================================================================
299 
300 // ----------------------------------------------------------------------------
302 // ----------------------------------------------------------------------------
303 {
304  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
305 
306  arg_desc->SetUsageContext("", "C++ multi format file reader");
307 
308  //
309  // input / output:
310  //
311 
312  arg_desc->SetCurrentGroup("INPUT / OUTPUT");
313 
314  arg_desc->AddDefaultKey(
315  "input",
316  "File_In",
317  "Input filename",
319  "-");
320  arg_desc->AddAlias("i", "input");
321 
322  arg_desc->AddDefaultKey(
323  "output",
324  "File_Out",
325  "Output filename",
327  arg_desc->AddAlias("o", "output");
328 
329  arg_desc->AddDefaultKey(
330  "indir",
331  "Dir_In",
332  "Input directory",
334  "");
335  arg_desc->AddAlias("p", "indir");
336 
337  arg_desc->AddDefaultKey(
338  "outdir",
339  "Dir_Out",
340  "Output directory",
342  "");
343  arg_desc->AddAlias("r", "outdir");
344 
345  arg_desc->AddDefaultKey(
346  "format",
347  "STRING",
348  "Input file format",
350  "guess");
351  arg_desc->SetConstraint(
352  "format",
353  &(*new CArgAllow_Strings,
354  "bed",
355  "microarray", "bed15",
356  "wig", "wiggle", "bedgraph",
357  "gtf", "gff3", "gff2", "augustus",
358  "gvf",
359  "agp",
360  "newick", "tree", "tre",
361  "vcf",
362  "aln", "align",
363  "fasta",
364  "5colftbl",
365  "ucsc",
366  "hgvs",
367  "psl",
368  "rmo",
369  "guess") );
370 
371  arg_desc->AddDefaultKey("out-format", "FORMAT",
372  "This sets how the output of this program will be formatted. "
373  "Note that for some formats some or all values might have no effect.",
374  CArgDescriptions::eString, "asn_text");
375  arg_desc->SetConstraint(
376  "out-format",
377  &(*new CArgAllow_Strings,
378  "asn_text",
379  "asn_binary",
380  "xml",
381  "json" ) );
382 
383 
384  arg_desc->AddDefaultKey(
385  "flags",
386  "STRING",
387  "Additional flags passed to the reader, as a single flag integer or comma separated flag names",
389  "0" );
390 
391  arg_desc->AddDefaultKey(
392  "name",
393  "STRING",
394  "Name for annotation",
396  "");
397  arg_desc->AddDefaultKey(
398  "title",
399  "STRING",
400  "Title for annotation",
402  "");
403 
404  //
405  // ID mapping:
406  //
407 
408  arg_desc->SetCurrentGroup("ID MAPPING");
409 
410  arg_desc->AddDefaultKey(
411  "mapfile",
412  "File_In",
413  "IdMapper config filename",
415 
416  arg_desc->AddDefaultKey(
417  "genome",
418  "STRING",
419  "UCSC build number",
421  "" );
422 
423  //
424  // Error policy:
425  //
426 
427  arg_desc->SetCurrentGroup("ERROR POLICY");
428 
429  arg_desc->AddFlag(
430  "dumpstats",
431  "write record counts to stderr",
432  true );
433 
434  arg_desc->AddFlag(
435  "xmlmessages",
436  "where possible, print errors, warnings, etc. as XML",
437  true );
438 
439  arg_desc->AddFlag(
440  "checkonly",
441  "check for errors only",
442  true );
443 
444  arg_desc->AddFlag(
445  "noerrors",
446  "suppress error display",
447  true );
448 
449  arg_desc->AddFlag(
450  "lenient",
451  "accept all input format errors",
452  true );
453 
454  arg_desc->AddFlag(
455  "strict",
456  "accept no input format errors",
457  true );
458 
459  arg_desc->AddDefaultKey(
460  "max-error-count",
461  "INTEGER",
462  "Maximum permissible error count",
464  "-1" );
465 
466  arg_desc->AddDefaultKey(
467  "max-error-level",
468  "STRING",
469  "Maximum permissible error level",
471  "warning" );
472 
473  arg_desc->SetConstraint(
474  "max-error-level",
475  &(*new CArgAllow_Strings,
476  "info", "warning", "error" ) );
477 
478  arg_desc->AddFlag("show-progress",
479  "This will show progress messages on stderr, if the underlying "
480  "reader supports that.");
481 
482  //
483  // bed and gff reader specific arguments:
484  //
485 
486  arg_desc->SetCurrentGroup("BED AND GFF READER SPECIFIC");
487 
488  arg_desc->AddFlag(
489  "all-ids-as-local",
490  "turn all ids into local ids",
491  true );
492 
493  arg_desc->AddFlag(
494  "numeric-ids-as-local",
495  "turn integer ids into local ids",
496  true );
497 
498  arg_desc->AddFlag(
499  "3ff",
500  "use BED three feature format",
501  true);
502 
503  arg_desc->AddFlag(
504  "dfm",
505  "use BED directed feature model",
506  true);
507 
508  arg_desc->AddFlag(
509  "genbank",
510  "clean up output for genbank submission",
511  true);
512 
513  arg_desc->AddFlag(
514  "genbank-no-locus-tags",
515  "clean up output for genbank submission, no locus-ag needed",
516  true);
517 
518  arg_desc->AddFlag(
519  "cleanup",
520  "clean up output but without genbank specific extensions",
521  true);
522 
523  arg_desc->AddFlag(
524  "euk",
525  "in -genbank mode, generate any missing mRNA features",
526  true);
527 
528  arg_desc->AddFlag(
529  "gene-xrefs",
530  "generate parent-child xrefs involving genes",
531  true);
532 
533  arg_desc->AddDefaultKey(
534  "locus-tag",
535  "STRING",
536  "Prefix or starting tag for auto generated locus tags",
538  "" );
539 
540  arg_desc->AddOptionalKey(
541  "autosql",
542  "FILENAME",
543  "BED autosql definition file",
545 
546  //
547  // wiggle reader specific arguments:
548  //
549 
550  arg_desc->SetCurrentGroup("WIGGLE READER SPECIFIC");
551 
552  arg_desc->AddFlag(
553  "join-same",
554  "join abutting intervals",
555  true );
556 
557  arg_desc->AddFlag(
558  "as-byte",
559  "generate byte compressed data",
560  true );
561 
562  arg_desc->AddFlag(
563  "as-real",
564  "generate real value data",
565  true );
566 
567  arg_desc->AddFlag(
568  "as-graph",
569  "generate graph object",
570  true );
571 
572  arg_desc->AddFlag(
573  "raw",
574  "iteratively return raw track data",
575  true );
576 
577  //
578  // gff reader specific arguments:
579  //
580 
581  arg_desc->SetCurrentGroup("GFF READER SPECIFIC");
582 
583  arg_desc->AddFlag( // no longer used, retained for backward compatibility
584  "new-code",
585  "use new gff3 reader implementation",
586  true );
587  arg_desc->AddFlag(
588  "old-code",
589  "use old gff3 reader implementation",
590  true );
591 
592  //
593  // gff reader specific arguments:
594  //
595 
596  arg_desc->SetCurrentGroup("GTF READER SPECIFIC");
597 
598  arg_desc->AddFlag( // no longer used, retained for backward compatibility
599  "child-links",
600  "generate gene->mrna and gene->cds xrefs",
601  true );
602 
603  //
604  // alignment reader specific arguments:
605  //
606 
607  arg_desc->SetCurrentGroup("ALIGNMENT READER SPECIFIC");
608 
609  arg_desc->AddDefaultKey(
610  "aln-gapchar",
611  "STRING",
612  "Alignment gap character",
614  "-");
615 
616  arg_desc->AddDefaultKey(
617  "aln-missing",
618  "STRING",
619  "Alignment missing indicator",
621  "");
622 
623  arg_desc->AddDefaultKey(
624  "aln-alphabet",
625  "STRING",
626  "Alignment alphabet",
628  "prot");
629  arg_desc->SetConstraint(
630  "aln-alphabet",
631  &(*new CArgAllow_Strings,
632  "nuc",
633  "prot") );
634 
635  arg_desc->AddDefaultKey(
636  "aln-idval",
637  "STRING",
638  "Alignment sequence ID validation scheme",
640  "");
641 
642  arg_desc->AddFlag(
643  "force-local-ids",
644  "treat all IDs as local IDs",
645  true);
646 
647  arg_desc->AddFlag(
648  "ignore-nexus-info",
649  "ignore char settings in NEXUS format block",
650  true);
651  //
652  // FASTA reader specific arguments:
653  //
654 
655  arg_desc->SetCurrentGroup("FASTA READER SPECIFIC");
656 
657  arg_desc->AddFlag(
658  "parse-mods",
659  "Parse FASTA modifiers on deflines.");
660 
661  arg_desc->AddFlag(
662  "parse-gaps",
663  "Make a delta sequence if gaps found.");
664 
665  arg_desc->AddDefaultKey(
666  "max-id-length",
667  "INTEGER",
668  "Maximum permissible ID length",
670  "0" );
671  arg_desc->SetCurrentGroup("");
672 
673  SetupArgDescriptions(arg_desc.release());
674 }
675 
676 // ----------------------------------------------------------------------------
677 int
679 // ----------------------------------------------------------------------------
680 {
681  m_iFlags = 0;
682 
683  const CArgs& args = GetArgs();
684  string argInFile = args["input"].AsString();
685  string argOutFile = args["output"].AsString();
686  string argInDir = args["indir"].AsString();
687  string argOutDir = args["outdir"].AsString();
688 
689  if ((argInFile != "-") && !argInDir.empty()) {
690  cerr << "multireader: command line args -input and -indir are incompatible."
691  << endl;
692  return 1;
693  }
694  if ((argOutFile != "-") && !argOutDir.empty()) {
695  cerr << "multireader: command line args -output and -outdir are incompatible."
696  << endl;
697  return 1;
698  }
699  if (argInDir.empty() && !argOutDir.empty()) {
700  cerr << "multireader: command line arg -outdir requires -indir."
701  << endl;
702  return 1;
703  }
704  if (argOutDir.empty() && !argInDir.empty()) {
705  cerr << "multireader: command line arg -indir requires -outdir."
706  << endl;
707  return 1;
708  }
709  if (args["genbank"].AsBoolean() && args["genbank-no-locus-tags"].AsBoolean()) {
710  cerr << "multireader: flags -genbank and -genbank-no-locus-tags are mutually "
711  "exclusive"
712  << endl;
713  return 1;
714  }
715  if (!args["locus-tag"].AsString().empty() && args["genbank-no-locus-tags"].AsBoolean()) {
716  cerr << "multireader: flags -locus-tag and -genbank-no-locus-tags are mutually "
717  "exclusive"
718  << endl;
719  return 1;
720  }
721  if (argInFile == "-" && args["format"].AsString() == "guess") {
722  cerr << "multireader: must specify input format (\"-format ...\") if input comes from "
723  "console or pipe"
724  << endl;
725  return 1;
726  }
727 
728  xSetMapper(args);
729  xSetMessageListener(args);
730 
731  if (!argInDir.empty()) {
732  // with tests above, establishes multifile operation
733  string inFilePattern = CDirEntry::MakePath(argInDir, "*", "gff3");
734  string inFile, outFile;
735  CMultiFileSource fileSource(inFilePattern);
736  CMultiFileDestination fileDestination(argOutDir);
737  bool retIn = fileSource.Next(inFile);
738  while (retIn) {
739  if (!fileDestination.Next(inFile, outFile)) {
740  cerr << "multireader: unable to create output file "
741  << outFile << "." << endl;
742  return 1;
743  }
744  CNcbiIfstream istr(inFile, IOS_BASE::binary);
745  CNcbiOfstream ostr(outFile);
746  if (!xProcessSingleFile(args, istr, ostr)) {
747  return 1;
748  }
749  retIn = fileSource.Next(inFile);
750  }
751  }
752  else {
753  // at this point, implies single file operation
754  CNcbiIstream& istr = args["input"].AsInputFile(CArgValue::fBinary);
755  CNcbiOstream& ostr = args["output"].AsOutputFile();
756  if (!xProcessSingleFile(args, istr, ostr)) {
757  return 1;
758  }
759  }
760  return 0;
761 }
762 
763 // -----------------------------------------------------------------------------
764 bool
766  const CArgs& args,
767  CNcbiIstream& istr,
768  CNcbiOstream& ostr)
769 // -----------------------------------------------------------------------------
770 {
771  bool retCode = true;
772 
773  try {
774  xSetFlags(args, args["input"].AsString());
775  switch( m_uFormat ) {
776  default:
777  xProcessDefault(args, istr, ostr);
778  break;
781  xProcessWiggleRaw(args, istr, ostr);
782  }
783  else {
784  xProcessWiggle(args, istr, ostr);
785  }
786  break;
787  case CFormatGuess::eBed:
789  xProcessBedRaw(args, istr, ostr);
790  }
791  else {
792  retCode = xProcessBed(args, istr, ostr);
793  }
794  break;
796  xProcessUCSCRegion(args, istr, ostr);
797  break;
798  case CFormatGuess::eGtf:
801  xProcessGtf(args, istr, ostr);
802  break;
804  xProcessNewick(args, istr, ostr);
805  break;
806  case CFormatGuess::eGff3:
807  xProcessGff3(args, istr, ostr);
808  break;
809  case CFormatGuess::eGff2:
810  xProcessGff2(args, istr, ostr);
811  break;
812  case CFormatGuess::eGvf:
813  xProcessGvf(args, istr, ostr);
814  break;
815  case CFormatGuess::eAgp:
816  xProcessAgp(args, istr, ostr);
817  break;
819  xProcessAlignment(args, istr, ostr);
820  break;
822  xProcess5ColFeatTable(args, istr, ostr);
823  break;
825  xProcessFasta(args, istr, ostr);
826  break;
827  case CFormatGuess::eRmo:
828  xProcessRmo(args, istr, ostr);
829  break;
830  //case CFormatGuess::eHgvs:
831  // xProcessHgvs(args, istr, ostr);
832  // break;
833  }
834  }
835  catch(const CReaderMessage& message) {
836  message.Dump(cerr);
837  retCode = false;
838  }
839  catch(const ILineError&) {
840  AutoPtr<ILineError> line_error_p =
842  eDiag_Fatal, "Reading aborted due to fatal error.");
843  //m_pErrors->PutError(reader_ex); // duplicate!
844  m_pErrors->PutError(*line_error_p);
845  retCode = false;
846  } catch(const std::exception & std_ex) {
847  AutoPtr<ILineError> line_error_p =
849  eDiag_Fatal,
850  FORMAT(
851  "Reading aborted due to fatal error: " << std_ex.what()));
852  m_pErrors->PutError(*line_error_p);
853  retCode = false;
854  }
855  catch(int) {
856  // hack on top of hackish reporting system
857  retCode = false;
858  }
859  catch(...) {
860  AutoPtr<ILineError> line_error_p =
862  eDiag_Fatal, "Unknown Fatal Error occurred");
863  m_pErrors->PutError(*line_error_p);
864  retCode = false;
865  }
866  xDumpErrors( cerr );
867  return retCode;
868 }
869 
870 // ----------------------------------------------------------------------------
872  const CArgs& args,
873  CNcbiIstream& istr,
874  CNcbiOstream& ostr)
875 // ----------------------------------------------------------------------------
876 {
877  typedef list<CRef<CSeq_annot> > ANNOTS;
878  ANNOTS annots;
879 
880  unique_ptr<CReaderBase> pReader(
882  if (!pReader) {
884  eDiag_Fatal, 1, "File format not supported");
885  throw(fatal);
886  }
887  if (ShowingProgress()) {
888  pReader->SetProgressReportInterval(10);
889  }
890  //TestCanceler canceler;
891  //pReader->SetCanceler(&canceler);
892  pReader->ReadSeqAnnots(annots, istr, m_pErrors.get());
893  for (CRef<CSeq_annot> cit : annots) {
894  xWriteObject(args, *cit, ostr);
895  }
896 }
897 
898 // ----------------------------------------------------------------------------
900  const CArgs& args,
901  CNcbiIstream& istr,
902  CNcbiOstream& ostr)
903 // ----------------------------------------------------------------------------
904 {
905  typedef list<CRef<CSeq_annot> > ANNOTS;
906  ANNOTS annots;
907 
909  if (ShowingProgress()) {
910  reader.SetProgressReportInterval(10);
911  }
912  //TestCanceler canceler;
913  //reader.SetCanceler(&canceler);
914  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
915  for (CRef<CSeq_annot> cit : annots) {
916  xWriteObject(args, *cit, ostr);
917  }
918 }
919 
920 // ----------------------------------------------------------------------------
922  const CArgs& args,
923  CNcbiIstream& istr,
924  CNcbiOstream& ostr)
925 // ----------------------------------------------------------------------------
926 {
927  CWiggleReader reader(m_iFlags);
928  CStreamLineReader lr(istr);
929  CRawWiggleTrack raw;
930  while (reader.ReadTrackData(lr, raw)) {
931  raw.Dump(cerr);
932  }
933 }
934 
935 // ----------------------------------------------------------------------------
937  const CArgs& args,
938  CNcbiIstream& istr,
939  CNcbiOstream& ostr)
940 {
941  // Use ReadSeqAnnot() over ReadSeqAnnots() to keep memory footprint down.
942  CUCSCRegionReader reader(m_iFlags);
943  CStreamLineReader lr(istr);
944  CRef<CSerialObject> pAnnot = reader.ReadObject(lr, m_pErrors.get());
945  if (pAnnot) {
946  xWriteObject(args, *pAnnot, ostr);
947  }
948 }
949 // ----------------------------------------------------------------------------
951  const CArgs& args,
952  CNcbiIstream& istr,
953  CNcbiOstream& ostr)
954 // ----------------------------------------------------------------------------
955 {
956  // Use ReadSeqAnnot() over ReadSeqAnnots() to keep memory footprint down.
958  if (args["autosql"]) {
959  if (!reader.SetAutoSql(args["autosql"].AsString())) {
960  return false;
961  }
962  }
963  if (ShowingProgress()) {
964  reader.SetProgressReportInterval(10);
965  }
966  //TestCanceler canceler;
967  //reader.SetCanceler(&canceler);
968  CStreamLineReader lr( istr );
969  CRef<CSeq_annot> pAnnot = reader.ReadSeqAnnot(lr, m_pErrors.get());
970  while(pAnnot) {
971  xWriteObject(args, *pAnnot, ostr);
972  pAnnot.Reset();
973  pAnnot = reader.ReadSeqAnnot(lr, m_pErrors.get());
974  }
975  return true;
976 }
977 
978 // ----------------------------------------------------------------------------
980  const CArgs& args,
981  CNcbiIstream& istr,
982  CNcbiOstream& ostr)
983 // ----------------------------------------------------------------------------
984 {
986  CStreamLineReader lr(istr);
987  CRawBedTrack raw;
988  while (reader.ReadTrackData(lr, raw)) {
989  raw.Dump(cerr);
990  }
991 }
992 
993 // ----------------------------------------------------------------------------
995  const CArgs& args,
996  CNcbiIstream& istr,
997  CNcbiOstream& ostr)
998 // ----------------------------------------------------------------------------
999 {
1000  typedef CGff2Reader::TAnnotList ANNOTS;
1001  ANNOTS annots;
1002 
1003  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1004  return xProcessGff2(args, istr, ostr);
1005  }
1007  if (ShowingProgress()) {
1008  reader.SetProgressReportInterval(10);
1009  }
1010  //TestCanceler canceler;
1011  //reader.SetCanceler(&canceler);
1012  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1013  for (CRef<CSeq_annot> it : annots) {
1014  xPostProcessAnnot(args, *it);
1015  xWriteObject(args, *it, ostr);
1016  }
1017 }
1018 
1019 // ----------------------------------------------------------------------------
1021  const CArgs& args,
1022  CNcbiIstream& istr,
1023  CNcbiOstream& ostr)
1024 // ----------------------------------------------------------------------------
1025 {
1026  typedef CGff2Reader::TAnnotList ANNOTS;
1027  ANNOTS annots;
1028 
1029  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1030  return xProcessGff2(args, istr, ostr);
1031  }
1034  if (ShowingProgress()) {
1035  reader.SetProgressReportInterval(10);
1036  }
1037  //TestCanceler canceler;
1038  //reader.SetCanceler(&canceler);
1039  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1040  for (CRef<CSeq_annot> it : annots) {
1041  const auto& data = it->GetData();
1042  if (data.IsFtable()) {
1043  const auto& features = it->GetData().GetFtable();
1044  if (features.empty()) {
1045  continue;
1046  }
1047  auto pLocationMerger = reader.GetLocationMerger();
1048  xPostProcessAnnot(args, *it, pLocationMerger.get());
1049  }
1050  else {
1051  xPostProcessAnnot(args, *it);
1052  }
1053  xWriteObject(args, *it, ostr);
1054  }
1055 }
1056 
1057 // ----------------------------------------------------------------------------
1059  const CArgs& args,
1060  CNcbiIstream& istr,
1061  CNcbiOstream& ostr)
1062 // ----------------------------------------------------------------------------
1063 {
1064  typedef CGff2Reader::TAnnotList ANNOTS;
1065  ANNOTS annots;
1066 
1068  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1069  for (CRef<CSeq_annot> cit : annots) {
1070  xWriteObject(args, *cit, ostr);
1071  }
1072 }
1073 
1074 /*// ----------------------------------------------------------------------------
1075 void CMultiReaderApp::xProcessHgvs(
1076  const CArgs& args,
1077  CNcbiIstream& istr,
1078  CNcbiOstream& ostr)
1079 // ----------------------------------------------------------------------------
1080 {
1081  typedef vector<CRef<CSeq_annot> > ANNOTS;
1082  ANNOTS annots;
1083 
1084  CHgvsReader reader;
1085  reader.ReadSeqAnnots(annots, istr, m_pErrors);
1086  for (CRef<CSeq_annot> cit : annots) {
1087  xWriteObject(args, *cit, ostr);
1088  }
1089 }*/
1090 
1091 // ----------------------------------------------------------------------------
1093  const CArgs& args,
1094  CNcbiIstream& istr,
1095  CNcbiOstream& ostr)
1096 // ----------------------------------------------------------------------------
1097 {
1098  typedef CGff2Reader::TAnnotList ANNOTS;
1099  ANNOTS annots;
1100 
1101  if (args["format"].AsString() == "gff2") { // process as plain GFF2
1102  return xProcessGff2(args, istr, ostr);
1103  }
1104  if (args["format"].AsString() == "gff3") { // process as plain GFF3
1105  return xProcessGff3(args, istr, ostr);
1106  }
1108  if (ShowingProgress()) {
1109  reader.SetProgressReportInterval(10);
1110  }
1111  //TestCanceler canceler;
1112  //reader.SetCanceler(&canceler);
1113  reader.ReadSeqAnnots(annots, istr, m_pErrors.get());
1114  for (CRef<CSeq_annot> cit : annots) {
1115  xWriteObject(args, *cit, ostr);
1116  }
1117 }
1118 
1119 // ----------------------------------------------------------------------------
1121  const CArgs& args,
1122  CNcbiIstream& istr,
1123  CNcbiOstream& ostr)
1124 // ----------------------------------------------------------------------------
1125 {
1126  while (!istr.eof()) {
1127  unique_ptr<TPhyTreeNode> pTree(ReadNewickTree(istr));
1129  pTree.get());
1130  xWriteObject(args, *btc, ostr);
1131  }
1132 }
1133 
1134 
1135 // ----------------------------------------------------------------------------
1137  const CArgs& args,
1138  CNcbiIstream& istr,
1139  CNcbiOstream& ostr)
1140 // ----------------------------------------------------------------------------
1141 {
1142  CAgpToSeqEntry reader(m_iFlags);
1143 
1144  const int iErrCode = reader.ReadStream(istr);
1145  if( iErrCode != 0 ) {
1147  "AGP reader failed with code " +
1148  NStr::NumericToString(iErrCode), 0 );
1149  }
1150 
1152  xWriteObject(args, **it, ostr);
1153  }
1154 }
1155 
1156 // ----------------------------------------------------------------------------
1158  const CArgs& args,
1159  CNcbiIstream& istr,
1160  CNcbiOstream& ostr)
1161 // ----------------------------------------------------------------------------
1162 {
1163  if (!istr) {
1164  return;
1165  }
1167  CRef<ILineReader> pLineReader = ILineReader::New(istr);
1168  while(!pLineReader->AtEOF()) {
1169  CRef<CSeq_annot> pSeqAnnot =
1170  reader.ReadSeqAnnot(*pLineReader, m_pErrors.get());
1171  if( pSeqAnnot &&
1172  pSeqAnnot->IsFtable() &&
1173  !pSeqAnnot->GetData().GetFtable().empty()) {
1174  xWriteObject(args, *pSeqAnnot, ostr);
1175  }
1176  }
1177 }
1178 
1179 // ----------------------------------------------------------------------------
1181  const CArgs& args,
1182  CNcbiIstream& istr,
1183  CNcbiOstream& ostr)
1184 // ----------------------------------------------------------------------------
1185 {
1186  CRepeatMaskerReader reader;
1187  CRef<ILineReader> pLineReader = ILineReader::New(istr);
1188  while(istr) {
1189  CRef<CSeq_annot> pSeqAnnot =
1190  reader.ReadSeqAnnot(*pLineReader, m_pErrors.get());
1191  if( ! pSeqAnnot || ! pSeqAnnot->IsFtable() ||
1192  pSeqAnnot->GetData().GetFtable().empty() )
1193  {
1194  // empty annot
1195  break;
1196  }
1197  xWriteObject(args, *pSeqAnnot, ostr);
1198  }
1199 }
1200 
1201 // ----------------------------------------------------------------------------
1203  const CArgs& args,
1204  CNcbiIstream& istr,
1205  CNcbiOstream& ostr)
1206 // ----------------------------------------------------------------------------
1207 {
1208  CStreamLineReader line_reader(istr);
1209 
1210  CFastaReader reader(line_reader, m_iFlags);
1211  auto maxIdLength = args["max-id-length"].AsInteger();
1212  if (maxIdLength != 0) {
1213  reader.SetMaxIDLength(maxIdLength);
1214  }
1215 
1216  CRef<CSeq_entry> pSeqEntry = reader.ReadSeqEntry(line_reader, m_pErrors.get());
1217  xWriteObject(args, *pSeqEntry, ostr);
1218 }
1219 
1220 // ----------------------------------------------------------------------------
1222  const CArgs& args,
1223  CNcbiIstream& istr,
1224  CNcbiOstream& ostr)
1225 // ----------------------------------------------------------------------------
1226 {
1227  CFastaReader::TFlags fFlags = 0;
1228  if( args["parse-mods"] ) {
1229  fFlags |= CFastaReader::fAddMods;
1230  }
1231  CAlnReader reader(istr);
1233  if (args["aln-alphabet"].AsString() == "nuc") {
1235  }
1236  try {
1238  (args["all-ids-as-local"].AsBoolean() ?
1241  reader.Read(flags, m_pErrors.get());
1242  CRef<CSeq_entry> pEntry = reader.GetSeqEntry(fFlags, m_pErrors.get());
1243  if (pEntry) {
1244  xWriteObject(args, *pEntry, ostr);
1245  }
1246  }
1247  catch (std::exception&) {
1248  }
1249 }
1250 
1251 // ----------------------------------------------------------------------------
1253  const CArgs& args,
1254  CNcbiIstream& istr )
1255 // ----------------------------------------------------------------------------
1256 {
1258  string format = args["format"].AsString();
1259  const string& strProgramName = GetProgramDisplayName();
1260 
1261  if (NStr::StartsWith(strProgramName, "wig") || format == "wig" ||
1262  format == "wiggle" || format == "bedgraph") {
1264  return;
1265  }
1266  if (NStr::StartsWith(strProgramName, "bed") || format == "bed") {
1268  return;
1269  }
1270  if (NStr::StartsWith(strProgramName, "b15") || format == "bed15" ||
1271  format == "microarray") {
1273  return;
1274  }
1275  if (NStr::StartsWith(strProgramName, "gtf") || format == "gtf") {
1277  return;
1278  }
1279  if (NStr::StartsWith(strProgramName, "gtf") || format == "augustus") {
1281  return;
1282  }
1283  if (NStr::StartsWith(strProgramName, "gff3") || format == "gff3") {
1285  return;
1286  }
1287  if (NStr::StartsWith(strProgramName, "gff2") || format =="gff2") {
1289  return;
1290  }
1291  if (NStr::StartsWith(strProgramName, "agp")) {
1293  return;
1294  }
1295 
1296  if (NStr::StartsWith(strProgramName, "newick") ||
1297  format == "newick" || format == "tree" || format == "tre") {
1299  return;
1300  }
1301  if (NStr::StartsWith(strProgramName, "gvf") || format == "gvf") {
1303  return;
1304  }
1305  if (NStr::StartsWith(strProgramName, "aln") || format == "align" ||
1306  format == "aln") {
1308  return;
1309  }
1310  if (NStr::StartsWith(strProgramName, "hgvs") || format == "hgvs") {
1312  return;
1313  }
1314  if( NStr::StartsWith(strProgramName, "fasta") || format == "fasta" ) {
1316  return;
1317  }
1318  if( NStr::StartsWith(strProgramName, "feattbl") || format == "5colftbl" ) {
1320  return;
1321  }
1322  if( NStr::StartsWith(strProgramName, "vcf") || format == "vcf" ) {
1324  return;
1325  }
1326  if( NStr::StartsWith(strProgramName, "ucsc") || format == "ucsc" ) {
1328  return;
1329  }
1330  if ( NStr::StartsWith(strProgramName, "psl") || format == "psl" ) {
1332  return;
1333  }
1336  }
1337 }
1338 
1339 // ----------------------------------------------------------------------------
1341  const CArgs& args,
1342  const string& filename )
1343 // ----------------------------------------------------------------------------
1344 {
1345  CNcbiIfstream istr(filename);
1346  xSetFlags(args, istr);
1347  istr.close();
1348 }
1349 
1350 // ----------------------------------------------------------------------------
1352  const CArgs& args,
1353  CNcbiIstream& istr)
1354 // ----------------------------------------------------------------------------
1355 {
1357  xSetFormat(args, istr);
1358  }
1359 
1360  m_AnnotName = args["name"].AsString();
1361  m_AnnotTitle = args["title"].AsString();
1362  m_bCheckOnly = args["checkonly"];
1363  m_bXmlMessages = args["xmlmessages"];
1364 
1365  switch( m_uFormat ) {
1366 
1367  case CFormatGuess::eWiggle:
1369  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1370  if ( args["join-same"] ) {
1372  }
1373  //by default now. But still tolerate if explicitly specified.
1374  if (!args["as-real"]) {
1376  }
1377  if ( args["as-graph"] ) {
1379  }
1380 
1381  if ( args["raw"] ) {
1383  }
1384  break;
1385 
1386  case CFormatGuess::eBed:
1388  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1389  if ( args["all-ids-as-local"] ) {
1391  }
1392  if ( args["numeric-ids-as-local"] ) {
1394  }
1395  if ( args["raw"] ) {
1397  }
1398  if ( args["3ff"] ) {
1400  }
1401  if ( args["dfm"] ) {
1403  }
1404  break;
1405 
1406  case CFormatGuess::eGtf:
1408  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1409  if ( args["all-ids-as-local"] ) {
1411  }
1412  if ( args["numeric-ids-as-local"] ) {
1414  }
1415  if ( args["child-links"] ) {
1417  }
1418  if (args["genbank-no-locus-tags"]) {
1420  }
1421  if (args["genbank"]) {
1423  if (args["locus-tag"]) {
1425  }
1426  }
1427  break;
1428 
1429  case CFormatGuess::eGff3:
1431  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1432  if ( args["gene-xrefs"] ) {
1434  }
1435  if (args["genbank-no-locus-tags"]) {
1438  }
1439  if ( args["genbank"] ) {
1442  if (args["locus-tag"]) {
1444  }
1445  }
1446  break;
1447 
1448  case CFormatGuess::eFasta: {
1449  auto flagsStr = args["flags"].AsString();
1451  if( args["parse-mods"] ) {
1453  }
1454  if( args["parse-gaps"] ) {
1456  }
1457 
1458  try {
1459  m_iFlags |= NStr::StringToInt(flagsStr, 0, 16);
1460  }
1461  catch(const CStringException&) {
1462  list<string> stringFlags;
1463  NStr::Split(flagsStr, ",", stringFlags);
1464  CFastaReader::AddStringFlags(stringFlags, m_iFlags);
1465  }
1466  break;
1467  }
1468 
1470  auto flagsStr = args["flags"].AsString();
1471  try {
1472  m_iFlags |= NStr::StringToInt(flagsStr, 0, 16);
1473  }
1474  catch (const CStringException&) {
1475  list<string> stringFlags;
1476  NStr::Split(flagsStr, ",", stringFlags);
1478  }
1479  break;
1480  }
1481 
1482  default:
1484  args["flags"].AsString(), NStr::fConvErr_NoThrow, 16 );
1485  break;
1486  }
1487 }
1488 
1489 // ----------------------------------------------------------------------------
1491  const CArgs& args,
1492  CSeq_annot& annot,
1493  const CGff3LocationMerger* pLocationMerger)
1494  // ----------------------------------------------------------------------------
1495 {
1496  static unsigned int startingLocusTagNumber = 1;
1497  static unsigned int startingFeatureId = 1;
1498 
1499  if (!args["genbank"].AsBoolean() && !args["genbank-no-locus-tags"].AsBoolean()) {
1500  if (args["cleanup"]) {
1501  CCleanup cleanup;
1502  cleanup.BasicCleanup(annot);
1503  }
1504  return;
1505  }
1506 
1507  // all other processing only applies to feature tables
1508  if (!annot.GetData().IsFtable()) {
1509  return;
1510  }
1511 
1512  string prefix, offset;
1513  if (NStr::SplitInTwo(args["locus-tag"].AsString(), "_", prefix, offset)) {
1515  if (tail != -1) {
1516  startingLocusTagNumber = tail;
1517  }
1518  else {
1519  if (!offset.empty()) {
1520  //bads news
1522  "Invalid locus tag: Only one \"_\", and suffix must be numeric", 0);
1523  }
1524  }
1525  }
1526  else {
1527  prefix = args["locus-tag"].AsString();
1528  }
1529 
1530  edit::CFeatTableEdit fte(
1531  annot, 0, prefix, startingLocusTagNumber, startingFeatureId, m_pErrors.get());
1532  fte.InferPartials();
1533  fte.GenerateMissingParentFeatures(args["euk"].AsBoolean(), pLocationMerger);
1534  if (args["genbank"].AsBoolean() && !fte.AnnotHasAllLocusTags()) {
1535  if (!prefix.empty()) {
1536  fte.GenerateLocusTags();
1537  }
1538  else {
1539  AutoPtr<ILineError> line_error_p =
1541  eDiag_Fatal, "Need prefix to generate missing locus tags but none was provided");
1542  this->WriteMessageImmediately(cerr, *line_error_p);
1543  throw(0);
1544  }
1545  }
1546  fte.GenerateProteinAndTranscriptIds();
1547  //fte.InstantiateProducts();
1548  fte.ProcessCodonRecognized();
1549  fte.EliminateBadQualifiers();
1550  fte.SubmitFixProducts();
1551 
1552  startingLocusTagNumber = fte.PendingLocusTagNumber();
1553  startingFeatureId = fte.PendingFeatureId();
1554 
1555  CCleanup cleanup;
1556  cleanup.BasicCleanup(annot);
1557 }
1558 
1559 
1560 // ----------------------------------------------------------------------------
1562  EDiagSev eDiagSev, const string & msg)
1563 // ----------------------------------------------------------------------------
1564 {
1565  // For creating quick messages generated by CMultiReaderApp itself
1566  class CLineErrorForMsg : public CLineError
1567  {
1568  public:
1569  CLineErrorForMsg(EDiagSev eDiagSev, const string & msg)
1570  : CLineError(
1572  eDiagSev,
1574  NStr::TruncateSpaces(msg),
1576  };
1577  return AutoPtr<ILineError>(new CLineErrorForMsg(eDiagSev, msg));
1578 }
1579 
1580 
1581 // ----------------------------------------------------------------------------
1583  ostream & ostr, const ILineError & line_error)
1584 // ----------------------------------------------------------------------------
1585 {
1586  // For example, progress messages and fatal errors should be written
1587  // immediately.
1588  if( m_bXmlMessages ) {
1589  line_error.DumpAsXML(ostr);
1590  } else {
1591  line_error.Dump(ostr);
1592  }
1593  ostr.flush();
1594 }
1595 
1596 // ----------------------------------------------------------------------------
1598  const CArgs & args,
1599  CSerialObject& object, // potentially modified by mapper
1600  CNcbiOstream& ostr)
1601 // ----------------------------------------------------------------------------
1602 {
1603  if (m_pMapper.get()) {
1604  m_pMapper->MapObject(object);
1605  }
1606  if (m_bCheckOnly) {
1607  return;
1608  }
1609  const string out_format = args["out-format"].AsString();
1610  unique_ptr<MSerial_Format> pOutFormat;
1611  if( out_format == "asn_text" ) {
1612  pOutFormat.reset( new MSerial_Format_AsnText );
1613  } else if( out_format == "asn_binary" ) {
1614  pOutFormat.reset( new MSerial_Format_AsnBinary );
1615  } else if( out_format == "xml" ) {
1616  pOutFormat.reset( new MSerial_Format_Xml );
1617  } else if( out_format == "json" ) {
1618  pOutFormat.reset( new MSerial_Format_Json );
1619  } else {
1620  NCBI_USER_THROW_FMT("Unsupported out-format: " << out_format);
1621  }
1622  ostr << *pOutFormat << object;
1623  ostr.flush();
1624 }
1625 
1626 // ----------------------------------------------------------------------------
1627 void
1629  const CArgs& args)
1630 // ----------------------------------------------------------------------------
1631 {
1632  string strBuild = args["genome"].AsString();
1633  string strMapFile = args["mapfile"].AsString();
1634 
1635  if (strBuild.empty() && strMapFile.empty()) {
1636  return;
1637  }
1638  if (!strMapFile.empty()) {
1639  CNcbiIfstream* pMapFile = new CNcbiIfstream(strMapFile);
1640  m_pMapper.reset(
1641  new CIdMapperConfig(*pMapFile, strBuild, false, m_pErrors.get()));
1642  }
1643  else {
1644  m_pMapper.reset(new CIdMapperBuiltin(strBuild, false, m_pErrors.get()));
1645  }
1646 }
1647 
1648 // ----------------------------------------------------------------------------
1649 void
1651  const CArgs& args )
1652 // ----------------------------------------------------------------------------
1653 {
1654 
1655  //
1656  // By default, allow all errors up to the level of "warning" but nothing
1657  // more serious. -strict trumps everything else, -lenient is the second
1658  // strongest. In the absence of -strict and -lenient, -max-error-count and
1659  // -max-error-level become additive, i.e. both are enforced.
1660  //
1661  if ( args["noerrors"] ) { // not using error policy at all
1662  return;
1663  }
1664  m_showingProgress = args["show-progress"];
1665 
1666  if ( args["strict"] ) {
1667  m_pErrors.reset(new CMessageListenerStrict());
1668  } else if ( args["lenient"] ) {
1669  m_pErrors.reset(new CMessageListenerLenient());
1670  } else {
1671  int iMaxErrorCount = args["max-error-count"].AsInteger();
1672  int iMaxErrorLevel = eDiag_Warning;
1673  string strMaxErrorLevel = args["max-error-level"].AsString();
1674  if ( strMaxErrorLevel == "info" ) {
1675  iMaxErrorLevel = eDiag_Info;
1676  }
1677  else if ( strMaxErrorLevel == "error" ) {
1678  iMaxErrorLevel = eDiag_Error;
1679  }
1680 
1681  if ( iMaxErrorCount == -1 ) {
1682  m_pErrors.reset(
1683  new CMyMessageListenerCustomLevel(iMaxErrorLevel, *this));
1684  } else {
1685  m_pErrors.reset(
1687  iMaxErrorCount, iMaxErrorLevel, *this));
1688  }
1689  }
1690  // if progress requested, wrap the m_pErrors so that progress is shown
1691  if (ShowingProgress()) {
1692  m_pErrors->SetProgressOstream( &cerr );
1693  }
1694 }
1695 
1696 // ----------------------------------------------------------------------------
1698  CNcbiOstream& ostr)
1699 // ----------------------------------------------------------------------------
1700 {
1701  if (m_pErrors && m_pErrors->Count() > 0 ) {
1702  if( m_bXmlMessages ) {
1703  m_pErrors->DumpAsXML(ostr);
1704  } else {
1705  m_pErrors->Dump(ostr);
1706  }
1707  }
1708 }
1709 
1710 // ----------------------------------------------------------------------------
1711 int main(int argc, const char* argv[])
1712 // ----------------------------------------------------------------------------
1713 {
1714  // Execute main application function
1715  return CMultiReaderApp().AppMain(argc, argv);
1716 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void fatal(const char *msg,...)
Definition: attributes.c:18
static CBioSource dummy
AutoPtr –.
Definition: ncbimisc.hpp:401
virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)
Read an AGP file from the given input stream.
Definition: agp_util.cpp:1082
This class is used to turn an AGP file into a vector of Seq-entry's.
vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec
This is the way the results will be returned Each Seq-entry contains just one Bioseq,...
TSeqEntryRefVec & GetResult(void)
This gets the results found, but don't call before finalizing.
class CAlnReader supports importing a large variety of text-based alignment formats into standard dat...
Definition: aln_reader.hpp:100
void Read(bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr)
EReadFlags
Read the file This are the main functions.
Definition: aln_reader.hpp:208
void SetAlphabet(const string &value)
Definition: aln_reader.hpp:371
CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)
Definition: aln_reader.cpp:722
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CReaderBase implementation that reads BED data files, either a single object or all objects found.
Definition: bed_reader.hpp:109
virtual bool ReadTrackData(ILineReader &, CRawBedTrack &, ILineErrorListener *=nullptr)
virtual bool SetAutoSql(const string &)
Definition: bed_reader.cpp:278
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read a single object from given line reader containing BED data.
Definition: bed_reader.cpp:267
@ fDirectedFeatureModel
Definition: bed_reader.hpp:127
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: readfeat.cpp:3605
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
Definition: readfeat.cpp:3737
Class implements different ad-hoc unreliable file format identifications.
EFormat
The formats are checked in the same order as declared here.
@ eFiveColFeatureTable
Five-column feature table.
@ eVcf
VCF, CVcfReader.
@ eGff2
GFF2, CGff2Reader, any GFF-like that doesn't fit the others.
@ eBed
UCSC BED file format, CBedReader.
@ eGtf
New GTF, CGtfReader.
@ eGvf
GVF, CGvfReader.
@ eHgvs
HGVS, CHgvsParser.
@ eAgp
AGP format assembly, AgpRead.
@ eGff3
GFF3, CGff3Reader.
@ eGtf_POISENED
Old and Dead GFF/GTF style annotations.
@ eNewick
Newick file.
@ eFasta
FASTA format sequence record, CFastaReader.
@ eUnknown
unknown format
@ eGffAugustus
GFFish output of Augustus Gene Prediction.
@ eRmo
RepeatMasker Output.
@ eUCSCRegion
USCS Region file format.
@ eAlignment
Text alignment.
@ ePsl
PSL alignment format.
@ eBed15
UCSC BED15 or microarray format.
@ eWiggle
UCSC WIGGLE file format.
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=nullptr) override
Read all objects from given insput stream, returning them as a vector of Seq-annots.
shared_ptr< CGff3LocationMerger > GetLocationMerger()
@ fGenerateChildXrefs
Definition: gtf_reader.hpp:218
IdMapper implementation using hardcoded values.
Definition: idmapper.hpp:275
IdMapper implementation using an external configuration file.
Definition: idmapper.hpp:189
size_t Count() const override
void StoreError(const ILineError &err)
void StoreMessage(const IObjtoolsMessage &message)
bool Next(const std::string &, string &)
bool Next(std::string &)
void xProcessGff2(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggle(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xDumpErrors(CNcbiOstream &)
unique_ptr< CMessageListenerBase > m_pErrors
void xSetMessageListener(const CArgs &)
void xSetMapper(const CArgs &)
void xWriteObject(const CArgs &, CSerialObject &, CNcbiOstream &)
void xProcessAgp(const CArgs &, CNcbiIstream &, CNcbiOstream &)
bool xProcessBed(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void WriteMessageImmediately(ostream &ostr, const ILineError &line_error_p)
void xProcessAlignment(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessRmo(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessDefault(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessFasta(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessBedRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGtf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGff3(const CArgs &, CNcbiIstream &, CNcbiOstream &)
unique_ptr< CObjtoolsListener > m_pEditErrors
void xProcessUCSCRegion(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xPostProcessAnnot(const CArgs &, CSeq_annot &, const CGff3LocationMerger *=nullptr)
CFormatGuess::EFormat m_uFormat
int Run() override
Run the application.
void xSetFormat(const CArgs &, CNcbiIstream &)
void Init() override
Initialize the application.
void xSetFlags(const CArgs &, const string &)
bool ShowingProgress() const
unique_ptr< CIdMapper > m_pMapper
bool xProcessSingleFile(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessNewick(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggleRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGvf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcess5ColFeatTable(const CArgs &, CNcbiIstream &, CNcbiOstream &)
static AutoPtr< ILineError > sCreateSimpleMessage(EDiagSev eDiagSev, const string &msg)
bool PutMessage(const IObjtoolsMessage &message)
CMultiReaderApp & m_multi_reader_app
CMyMessageListenerCustomLevel(int level, CMultiReaderApp &multi_reader_app)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
CMyMessageListenerCustom(int iMaxCount, int iMaxLevel, CMultiReaderApp &multi_reader_app)
bool PutMessage(const IObjtoolsMessage &message)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
bool PutError(const ILineError &err)
CMultiReaderApp & m_multi_reader_app
virtual void Dump(CNcbiOstream &out) const
Definition: message.cpp:71
void Dump(CNcbiOstream &ostr) const
Definition: bed_reader.cpp:224
void Dump(CNcbiOstream &ostr) const
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
@ fNumericIdsAsLocal
numeric identifiers are local IDs
Definition: reader_base.hpp:76
@ fAllIdsAsLocal
all identifiers are local IDs
Definition: reader_base.hpp:78
list< CRef< CSeq_annot > > TAnnotList
Definition: reader_base.hpp:90
void SetProgressReportInterval(unsigned int intv)
virtual void ReadSeqAnnots(TAnnots &annots, CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read all objects from given insput stream, returning them as a vector of Seq-annots.
static CReaderBase * GetReader(CFormatGuess::EFormat format, TReaderFlags flags=0, CReaderListener *=nullptr)
Allocate a CReaderBase derived reader object based on the given file format.
virtual void Write(CNcbiOstream &out) const override
virtual EDiagSev Severity() const
Implements a concrete class for reading RepeatMasker output from tabular form and rendering it as ASN...
Definition: rm_reader.hpp:690
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pMessageListener=0)
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: rm_reader.cpp:775
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
Base class for all serializable objects.
Definition: serialbase.hpp:150
Simple implementation of ILineReader for i(o)streams.
CStringException –.
Definition: ncbistr.hpp:4505
CRef< CSerialObject > ReadObject(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
CVersionInfo –.
virtual bool ReadTrackData(ILineReader &, CRawWiggleTrack &, ILineErrorListener *=nullptr)
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
virtual void DumpAsXML(CNcbiOstream &out) const
Definition: line_error.hpp:372
virtual void Dump(CNcbiOstream &out) const
Definition: line_error.hpp:362
virtual EDiagSev Severity(void) const
Definition: line_error.hpp:370
@ eProblem_GeneralParsingError
Definition: line_error.hpp:106
@ eProblem_ProgressInfo
Definition: line_error.hpp:104
vector< unsigned int > TVecOfLines
Definition: line_error.hpp:128
virtual EProblem Problem(void) const =0
virtual EDiagSev GetSeverity(void) const =0
bool IsCanceled() const
static void cleanup(void)
Definition: ct_dynamic.c:30
static uch flags
CRef< objects::CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
Operators to edit gaps in sequences.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
const string & GetProgramDisplayName(void) const
Get the application's "display" name.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void SetVersion(const CVersionInfo &version)
Set the version number for the program.
Definition: ncbiapp.cpp:1135
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
@ eDirectory
Name of file directory.
Definition: ncbiargs.hpp:598
@ fBinary
Open file in binary mode.
Definition: ncbiargs.hpp:263
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
Definition: ncbiexpt.hpp:724
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
#define FORMAT(message)
Format message using iostreams library.
Definition: ncbiexpt.hpp:672
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
virtual CRef< CSeq_entry > ReadSeqEntry(ILineReader &lr, ILineErrorListener *pErrors)
Read an object from a given line reader, render it as a single Seq-entry, if possible.
Definition: fasta.cpp:304
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
Definition: fasta.cpp:222
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void SetMaxIDLength(Uint4 max_len)
If this is set, an exception will be thrown if a Sequence ID exceeds the given length.
Definition: fasta.cpp:485
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fNoSplit
Don't split out ambiguous sequence regions.
Definition: fasta.hpp:99
@ fParseGaps
Make a delta sequence if gaps found.
Definition: fasta.hpp:91
@ fDisableParseRange
No ranges in seq-ids. Ranges part of seq-id instead.
Definition: fasta.hpp:114
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
Definition: ncbistr.cpp:457
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
bool IsFtable(void) const
Check if variant Ftable is selected.
Definition: Seq_annot_.hpp:615
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
Lightweight interface for getting lines of data with minimal memory copying.
USING_SCOPE(objects)
string s_AlnErrorToString(const CAlnError &error)
Definition: multireader.cpp:94
CMultiReaderMessageListener newStyleMessageListener
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Definition: multireader.cpp:89
constexpr bool empty(list< Ts... >) noexcept
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
#define nullptr
Definition: ncbimisc.hpp:45
The NCBI C++/STL use hints.
static Format format
Definition: njn_ioutil.cpp:53
static const char * prefix[]
Definition: pcregrep.c:405
TPhyTreeNode * ReadNewickTree(CNcbiIstream &is)
Newick format input.
int offset
Definition: replacements.h:160
static const char *const features[]
Modified on Sat Dec 09 04:48:40 2023 by modify_doxy.py rev. 669887