97 "At ID '" <<
error.GetID() <<
"' "
98 "in category '" <<
static_cast<int>(
error.GetCategory()) <<
"' "
99 "at line " <<
error.GetLineNum() <<
": "
100 <<
error.GetMsg() <<
"'");
126 pReaderMessage->
Write(cerr);
144 EDiagSev eDiagSev,
const string & msg);
146 ostream & ostr,
const ILineError & line_error_p);
151 void Init()
override;
238 const Uint8 bytesDone,
247 FORMAT(
"Progress: " << bytesDone <<
" bytes done."));
274 const Uint8 bytesDone,
283 FORMAT(msg <<
" (" << bytesDone <<
" bytes)"));
306 arg_desc->SetUsageContext(
"",
"C++ multi format file reader");
312 arg_desc->SetCurrentGroup(
"INPUT / OUTPUT");
314 arg_desc->AddDefaultKey(
320 arg_desc->AddAlias(
"i",
"input");
322 arg_desc->AddDefaultKey(
327 arg_desc->AddAlias(
"o",
"output");
329 arg_desc->AddDefaultKey(
335 arg_desc->AddAlias(
"p",
"indir");
337 arg_desc->AddDefaultKey(
343 arg_desc->AddAlias(
"r",
"outdir");
345 arg_desc->AddDefaultKey(
351 arg_desc->SetConstraint(
355 "microarray",
"bed15",
356 "wig",
"wiggle",
"bedgraph",
357 "gtf",
"gff3",
"gff2",
"augustus",
360 "newick",
"tree",
"tre",
371 arg_desc->AddDefaultKey(
"out-format",
"FORMAT",
372 "This sets how the output of this program will be formatted. "
373 "Note that for some formats some or all values might have no effect.",
375 arg_desc->SetConstraint(
384 arg_desc->AddDefaultKey(
387 "Additional flags passed to the reader, as a single flag integer or comma separated flag names",
391 arg_desc->AddDefaultKey(
394 "Name for annotation",
397 arg_desc->AddDefaultKey(
400 "Title for annotation",
408 arg_desc->SetCurrentGroup(
"ID MAPPING");
410 arg_desc->AddDefaultKey(
413 "IdMapper config filename",
416 arg_desc->AddDefaultKey(
427 arg_desc->SetCurrentGroup(
"ERROR POLICY");
431 "write record counts to stderr",
436 "where possible, print errors, warnings, etc. as XML",
441 "check for errors only",
446 "suppress error display",
451 "accept all input format errors",
456 "accept no input format errors",
459 arg_desc->AddDefaultKey(
462 "Maximum permissible error count",
466 arg_desc->AddDefaultKey(
469 "Maximum permissible error level",
473 arg_desc->SetConstraint(
476 "info",
"warning",
"error" ) );
478 arg_desc->AddFlag(
"show-progress",
479 "This will show progress messages on stderr, if the underlying "
480 "reader supports that.");
486 arg_desc->SetCurrentGroup(
"BED AND GFF READER SPECIFIC");
490 "turn all ids into local ids",
494 "numeric-ids-as-local",
495 "turn integer ids into local ids",
500 "use BED three feature format",
505 "use BED directed feature model",
510 "clean up output for genbank submission",
514 "genbank-no-locus-tags",
515 "clean up output for genbank submission, no locus-ag needed",
520 "clean up output but without genbank specific extensions",
525 "in -genbank mode, generate any missing mRNA features",
530 "generate parent-child xrefs involving genes",
533 arg_desc->AddDefaultKey(
536 "Prefix or starting tag for auto generated locus tags",
540 arg_desc->AddOptionalKey(
543 "BED autosql definition file",
550 arg_desc->SetCurrentGroup(
"WIGGLE READER SPECIFIC");
554 "join abutting intervals",
559 "generate byte compressed data",
564 "generate real value data",
569 "generate graph object",
574 "iteratively return raw track data",
581 arg_desc->SetCurrentGroup(
"GFF READER SPECIFIC");
585 "use new gff3 reader implementation",
589 "use old gff3 reader implementation",
596 arg_desc->SetCurrentGroup(
"GTF READER SPECIFIC");
600 "generate gene->mrna and gene->cds xrefs",
607 arg_desc->SetCurrentGroup(
"ALIGNMENT READER SPECIFIC");
609 arg_desc->AddDefaultKey(
612 "Alignment gap character",
616 arg_desc->AddDefaultKey(
619 "Alignment missing indicator",
623 arg_desc->AddDefaultKey(
626 "Alignment alphabet",
629 arg_desc->SetConstraint(
635 arg_desc->AddDefaultKey(
638 "Alignment sequence ID validation scheme",
644 "treat all IDs as local IDs",
649 "ignore char settings in NEXUS format block",
655 arg_desc->SetCurrentGroup(
"FASTA READER SPECIFIC");
659 "Parse FASTA modifiers on deflines.");
663 "Make a delta sequence if gaps found.");
665 arg_desc->AddDefaultKey(
668 "Maximum permissible ID length",
671 arg_desc->SetCurrentGroup(
"");
684 string argInFile = args[
"input"].AsString();
685 string argOutFile = args[
"output"].AsString();
686 string argInDir = args[
"indir"].AsString();
687 string argOutDir = args[
"outdir"].AsString();
689 if ((argInFile !=
"-") && !argInDir.empty()) {
690 cerr <<
"multireader: command line args -input and -indir are incompatible."
694 if ((argOutFile !=
"-") && !argOutDir.empty()) {
695 cerr <<
"multireader: command line args -output and -outdir are incompatible."
699 if (argInDir.empty() && !argOutDir.empty()) {
700 cerr <<
"multireader: command line arg -outdir requires -indir."
704 if (argOutDir.empty() && !argInDir.empty()) {
705 cerr <<
"multireader: command line arg -indir requires -outdir."
709 if (args[
"genbank"].AsBoolean() && args[
"genbank-no-locus-tags"].AsBoolean()) {
710 cerr <<
"multireader: flags -genbank and -genbank-no-locus-tags are mutually "
715 if (!args[
"locus-tag"].AsString().
empty() && args[
"genbank-no-locus-tags"].AsBoolean()) {
716 cerr <<
"multireader: flags -locus-tag and -genbank-no-locus-tags are mutually "
721 if (argInFile ==
"-" && args[
"format"].AsString() ==
"guess") {
722 cerr <<
"multireader: must specify input format (\"-format ...\") if input comes from "
731 if (!argInDir.empty()) {
734 string inFile, outFile;
737 bool retIn = fileSource.
Next(inFile);
739 if (!fileDestination.
Next(inFile, outFile)) {
740 cerr <<
"multireader: unable to create output file "
741 << outFile <<
"." << endl;
749 retIn = fileSource.
Next(inFile);
774 xSetFlags(args, args[
"input"].AsString());
842 eDiag_Fatal,
"Reading aborted due to fatal error.");
846 }
catch(
const std::exception & std_ex) {
851 "Reading aborted due to fatal error: " << std_ex.what()));
877 typedef list<CRef<CSeq_annot> > ANNOTS;
880 unique_ptr<CReaderBase> pReader(
888 pReader->SetProgressReportInterval(10);
892 pReader->ReadSeqAnnots(annots, istr,
m_pErrors.get());
905 typedef list<CRef<CSeq_annot> > ANNOTS;
958 if (args[
"autosql"]) {
959 if (!reader.
SetAutoSql(args[
"autosql"].AsString())) {
1003 if (args[
"format"].AsString() ==
"gff2") {
1029 if (args[
"format"].AsString() ==
"gff2") {
1041 const auto& data = it->GetData();
1042 if (data.IsFtable()) {
1043 const auto&
features = it->GetData().GetFtable();
1101 if (args[
"format"].AsString() ==
"gff2") {
1104 if (args[
"format"].AsString() ==
"gff3") {
1126 while (!istr.eof()) {
1144 const int iErrCode = reader.
ReadStream(istr);
1145 if( iErrCode != 0 ) {
1147 "AGP reader failed with code " +
1168 while(!pLineReader->
AtEOF()) {
1191 if( ! pSeqAnnot || ! pSeqAnnot->
IsFtable() ||
1211 auto maxIdLength = args[
"max-id-length"].AsInteger();
1212 if (maxIdLength != 0) {
1228 if( args[
"parse-mods"] ) {
1233 if (args[
"aln-alphabet"].AsString() ==
"nuc") {
1238 (args[
"all-ids-as-local"].AsBoolean() ?
1247 catch (std::exception&) {
1258 string format = args[
"format"].AsString();
1271 format ==
"microarray") {
1342 const string& filename )
1370 if ( args[
"join-same"] ) {
1374 if (!args[
"as-real"]) {
1377 if ( args[
"as-graph"] ) {
1381 if ( args[
"raw"] ) {
1389 if ( args[
"all-ids-as-local"] ) {
1392 if ( args[
"numeric-ids-as-local"] ) {
1395 if ( args[
"raw"] ) {
1398 if ( args[
"3ff"] ) {
1401 if ( args[
"dfm"] ) {
1409 if ( args[
"all-ids-as-local"] ) {
1412 if ( args[
"numeric-ids-as-local"] ) {
1415 if ( args[
"child-links"] ) {
1418 if (args[
"genbank-no-locus-tags"]) {
1421 if (args[
"genbank"]) {
1423 if (args[
"locus-tag"]) {
1432 if ( args[
"gene-xrefs"] ) {
1435 if (args[
"genbank-no-locus-tags"]) {
1439 if ( args[
"genbank"] ) {
1442 if (args[
"locus-tag"]) {
1449 auto flagsStr = args[
"flags"].AsString();
1451 if( args[
"parse-mods"] ) {
1454 if( args[
"parse-gaps"] ) {
1462 list<string> stringFlags;
1470 auto flagsStr = args[
"flags"].AsString();
1475 list<string> stringFlags;
1496 static unsigned int startingLocusTagNumber = 1;
1497 static unsigned int startingFeatureId = 1;
1499 if (!args[
"genbank"].AsBoolean() && !args[
"genbank-no-locus-tags"].AsBoolean()) {
1500 if (args[
"cleanup"]) {
1516 startingLocusTagNumber = tail;
1522 "Invalid locus tag: Only one \"_\", and suffix must be numeric", 0);
1527 prefix = args[
"locus-tag"].AsString();
1530 edit::CFeatTableEdit fte(
1531 annot, 0,
prefix, startingLocusTagNumber, startingFeatureId,
m_pErrors.get());
1532 fte.InferPartials();
1533 fte.GenerateMissingParentFeatures(args[
"euk"].AsBoolean(), pLocationMerger);
1534 if (args[
"genbank"].AsBoolean() && !fte.AnnotHasAllLocusTags()) {
1536 fte.GenerateLocusTags();
1541 eDiag_Fatal,
"Need prefix to generate missing locus tags but none was provided");
1546 fte.GenerateProteinAndTranscriptIds();
1548 fte.ProcessCodonRecognized();
1549 fte.EliminateBadQualifiers();
1550 fte.SubmitFixProducts();
1552 startingLocusTagNumber = fte.PendingLocusTagNumber();
1553 startingFeatureId = fte.PendingFeatureId();
1562 EDiagSev eDiagSev,
const string & msg)
1569 CLineErrorForMsg(
EDiagSev eDiagSev,
const string & msg)
1583 ostream & ostr,
const ILineError & line_error)
1591 line_error.
Dump(ostr);
1609 const string out_format = args[
"out-format"].AsString();
1610 unique_ptr<MSerial_Format> pOutFormat;
1611 if( out_format ==
"asn_text" ) {
1613 }
else if( out_format ==
"asn_binary" ) {
1615 }
else if( out_format ==
"xml" ) {
1617 }
else if( out_format ==
"json" ) {
1622 ostr << *pOutFormat << object;
1632 string strBuild = args[
"genome"].AsString();
1633 string strMapFile = args[
"mapfile"].AsString();
1635 if (strBuild.empty() && strMapFile.empty()) {
1638 if (!strMapFile.empty()) {
1661 if ( args[
"noerrors"] ) {
1666 if ( args[
"strict"] ) {
1668 }
else if ( args[
"lenient"] ) {
1671 int iMaxErrorCount = args[
"max-error-count"].AsInteger();
1673 string strMaxErrorLevel = args[
"max-error-level"].AsString();
1674 if ( strMaxErrorLevel ==
"info" ) {
1677 else if ( strMaxErrorLevel ==
"error" ) {
1681 if ( iMaxErrorCount == -1 ) {
1687 iMaxErrorCount, iMaxErrorLevel, *
this));
1711 int main(
int argc,
const char* argv[])
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void fatal(const char *msg,...)
virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)
Read an AGP file from the given input stream.
This class is used to turn an AGP file into a vector of Seq-entry's.
vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec
This is the way the results will be returned Each Seq-entry contains just one Bioseq,...
TSeqEntryRefVec & GetResult(void)
This gets the results found, but don't call before finalizing.
class CAlnReader supports importing a large variety of text-based alignment formats into standard dat...
void Read(bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr)
EReadFlags
Read the file This are the main functions.
void SetAlphabet(const string &value)
CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)
CReaderBase implementation that reads BED data files, either a single object or all objects found.
virtual bool ReadTrackData(ILineReader &, CRawBedTrack &, ILineErrorListener *=nullptr)
virtual bool SetAutoSql(const string &)
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read a single object from given line reader containing BED data.
Base class for reading FASTA sequences.
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=nullptr) override
Read all objects from given insput stream, returning them as a vector of Seq-annots.
shared_ptr< CGff3LocationMerger > GetLocationMerger()
IdMapper implementation using hardcoded values.
IdMapper implementation using an external configuration file.
size_t Count() const override
void StoreError(const ILineError &err)
void StoreMessage(const IObjtoolsMessage &message)
bool Next(const std::string &, string &)
void xProcessGff2(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggle(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xDumpErrors(CNcbiOstream &)
unique_ptr< CMessageListenerBase > m_pErrors
void xSetMessageListener(const CArgs &)
void xSetMapper(const CArgs &)
void xWriteObject(const CArgs &, CSerialObject &, CNcbiOstream &)
void xProcessAgp(const CArgs &, CNcbiIstream &, CNcbiOstream &)
bool xProcessBed(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void WriteMessageImmediately(ostream &ostr, const ILineError &line_error_p)
void xProcessAlignment(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessRmo(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessDefault(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessFasta(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessBedRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGtf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGff3(const CArgs &, CNcbiIstream &, CNcbiOstream &)
unique_ptr< CObjtoolsListener > m_pEditErrors
void xProcessUCSCRegion(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xPostProcessAnnot(const CArgs &, CSeq_annot &, const CGff3LocationMerger *=nullptr)
CFormatGuess::EFormat m_uFormat
int Run() override
Run the application.
void xSetFormat(const CArgs &, CNcbiIstream &)
void Init() override
Initialize the application.
void xSetFlags(const CArgs &, const string &)
bool ShowingProgress() const
unique_ptr< CIdMapper > m_pMapper
bool xProcessSingleFile(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessNewick(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessWiggleRaw(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcessGvf(const CArgs &, CNcbiIstream &, CNcbiOstream &)
void xProcess5ColFeatTable(const CArgs &, CNcbiIstream &, CNcbiOstream &)
static AutoPtr< ILineError > sCreateSimpleMessage(EDiagSev eDiagSev, const string &msg)
bool PutMessage(const IObjtoolsMessage &message)
CMultiReaderApp & m_multi_reader_app
CMyMessageListenerCustomLevel(int level, CMultiReaderApp &multi_reader_app)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
CMyMessageListenerCustom(int iMaxCount, int iMaxLevel, CMultiReaderApp &multi_reader_app)
bool PutMessage(const IObjtoolsMessage &message)
void PutProgress(const string &msg, const Uint8 bytesDone, const Uint8 dummy)
~CMyMessageListenerCustom()
bool PutError(const ILineError &err)
CMultiReaderApp & m_multi_reader_app
void Dump(CNcbiOstream &ostr) const
void Dump(CNcbiOstream &ostr) const
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
@ fNumericIdsAsLocal
numeric identifiers are local IDs
@ fAllIdsAsLocal
all identifiers are local IDs
list< CRef< CSeq_annot > > TAnnotList
void SetProgressReportInterval(unsigned int intv)
virtual void ReadSeqAnnots(TAnnots &annots, CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read all objects from given insput stream, returning them as a vector of Seq-annots.
static CReaderBase * GetReader(CFormatGuess::EFormat format, TReaderFlags flags=0, CReaderListener *=nullptr)
Allocate a CReaderBase derived reader object based on the given file format.
virtual void Write(CNcbiOstream &out) const override
virtual EDiagSev Severity() const
Implements a concrete class for reading RepeatMasker output from tabular form and rendering it as ASN...
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pMessageListener=0)
Read an object from a given line reader, render it as a single Seq-annot, if possible.
bool IsFtable(void) const
Base class for all serializable objects.
Simple implementation of ILineReader for i(o)streams.
CRef< CSerialObject > ReadObject(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
virtual bool ReadTrackData(ILineReader &, CRawWiggleTrack &, ILineErrorListener *=nullptr)
Interface for testing cancellation request in a long lasting operation.
virtual void DumpAsXML(CNcbiOstream &out) const
virtual void Dump(CNcbiOstream &out) const
virtual EDiagSev Severity(void) const
@ eProblem_GeneralParsingError
vector< unsigned int > TVecOfLines
virtual EProblem Problem(void) const =0
static void cleanup(void)
CRef< objects::CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
Operators to edit gaps in sequences.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const string & GetProgramDisplayName(void) const
Get the application's "display" name.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void SetVersion(const CVersionInfo &version)
Set the version number for the program.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
@ eDirectory
Name of file directory.
@ fBinary
Open file in binary mode.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
#define FORMAT(message)
Format message using iostreams library.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
long TFlags
binary OR of EFlags
virtual CRef< CSeq_entry > ReadSeqEntry(ILineReader &lr, ILineErrorListener *pErrors)
Read an object from a given line reader, render it as a single Seq-entry, if possible.
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void SetMaxIDLength(Uint4 max_len)
If this is set, an exception will be thrown if a Sequence ID exceeds the given length.
@ fAddMods
Parse defline mods and add to SeqEntry.
@ fNoSplit
Don't split out ambiguous sequence regions.
@ fParseGaps
Make a delta sequence if gaps found.
@ fDisableParseRange
No ranges in seq-ids. Ranges part of seq-id instead.
void Reset(void)
Reset reference object.
uint64_t Uint8
8-byte (64-bit) unsigned integer
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
@ fConvErr_NoThrow
Do not throw an exception on error.
const TFtable & GetFtable(void) const
Get the variant data.
bool IsFtable(void) const
Check if variant Ftable is selected.
const TData & GetData(void) const
Get the Data member data.
Lightweight interface for getting lines of data with minimal memory copying.
string s_AlnErrorToString(const CAlnError &error)
CMultiReaderMessageListener newStyleMessageListener
int main(int argc, const char *argv[])
constexpr bool empty(list< Ts... >) noexcept
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
The NCBI C++/STL use hints.
static const char * prefix[]
TPhyTreeNode * ReadNewickTree(CNcbiIstream &is)
Newick format input.
static const char *const features[]