NCBI C++ ToolKit
Classes | Public Types | Public Member Functions | Static Public Member Functions | Protected Member Functions | Static Protected Member Functions | Protected Attributes | Private Member Functions | Static Private Member Functions | List of all members
CFormatGuess Class Reference

Search Toolkit Book for CFormatGuess

Class implements different ad-hoc unreliable file format identifications. More...

#include <util/format_guess.hpp>

+ Collaboration diagram for CFormatGuess:

Classes

class  CFormatHints
 Hints for guessing formats. More...
 

Public Types

enum  EFormat {
  eUnknown = 0 , eBinaryASN = 1 , eRmo = 2 , eGtf_POISENED = 3 ,
  eGlimmer3 = 4 , eAgp = 5 , eXml = 6 , eWiggle = 7 ,
  eBed = 8 , eBed15 = 9 , eNewick = 10 , eAlignment = 11 ,
  eDistanceMatrix = 12 , eFlatFileSequence = 13 , eFiveColFeatureTable = 14 , eSnpMarkers = 15 ,
  eFasta = 16 , eTextASN = 17 , eTaxplot = 18 , ePhrapAce = 19 ,
  eTable = 20 , eGtf = 21 , eGff3 = 22 , eGff2 = 23 ,
  eHgvs = 24 , eGvf = 25 , eZip = 26 , eGZip = 27 ,
  eBZip2 = 28 , eLzo = 29 , eSra = 30 , eBam = 31 ,
  eVcf = 32 , eUCSCRegion = 33 , eGffAugustus = 34 , eJSON = 35 ,
  ePsl = 36 , eAltGraphX = 37 , eBed5FloatScore = 38 , eBedGraph = 39 ,
  eBedRnaElements = 40 , eBigBarChart = 41 , eBigBed = 42 , eBigPsl = 43 ,
  eBigChain = 44 , eBigMaf = 45 , eBigWig = 46 , eBroadPeak = 47 ,
  eChain = 48 , eClonePos = 49 , eColoredExon = 50 , eCtgPos = 51 ,
  eDownloadsOnly = 52 , eEncodeFiveC = 53 , eExpRatio = 54 , eFactorSource = 55 ,
  eGenePred = 56 , eLd2 = 57 , eNarrowPeak = 58 , eNetAlign = 59 ,
  ePeptideMapping = 60 , eRmsk = 61 , eSnake = 62 , eVcfTabix = 63 ,
  eWigMaf = 64 , eFlatFileGenbank = 65 , eFlatFileEna = 66 , eFlatFileUniProt = 67 ,
  eZstd = 68 , eFormat_max
}
 The formats are checked in the same order as declared here. More...
 
enum  ESequenceType { eUndefined , eNucleotide , eProtein }
 
enum  EMode { eQuick , eThorough }
 
enum  ESTStrictness { eST_Lax , eST_Default , eST_Strict }
 
enum  EOnError { eDefault = 0 , eThrowOnBadSource }
 

Public Member Functions

 CFormatGuess ()
 
 CFormatGuess (const string &fname)
 
 CFormatGuess (CNcbiIstream &input)
 
 ~CFormatGuess ()
 
EFormat GuessFormat (EMode)
 
bool TestFormat (EFormat, EMode)
 
EFormat GuessFormat (EOnError onerror=eDefault)
 
bool TestFormat (EFormat, EOnError onerror=eDefault)
 
CFormatHintsGetFormatHints (void)
 Get format hints. More...
 
bool IsEnabled (EFormat format) const
 Check whether testing is enabled for given format. More...
 

Static Public Member Functions

static bool IsSupportedFormat (EFormat format)
 
static ESequenceType SequenceType (const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)
 Guess sequence type. More...
 
static const char * GetFormatName (EFormat format)
 
static EFormat Format (const string &path, EOnError onerror=eDefault)
 Guess file format. More...
 
static EFormat Format (CNcbiIstream &input, EOnError onerror=eDefault)
 Format prediction based on an input stream. More...
 

Protected Member Functions

void Initialize ()
 
bool EnsureTestBuffer ()
 
bool EnsureStats ()
 
bool EnsureSplitLines ()
 
bool IsAllComment ()
 
bool IsAsciiText ()
 
bool TestFormatRepeatMasker (EMode)
 
bool TestFormatPhrapAce (EMode)
 
bool TestFormatGtf (EMode)
 
bool TestFormatGvf (EMode)
 
bool TestFormatGff3 (EMode)
 
bool TestFormatGff2 (EMode)
 
bool TestFormatGlimmer3 (EMode)
 
bool TestFormatAgp (EMode)
 
bool TestFormatNewick (EMode)
 
bool TestFormatXml (EMode)
 
bool TestFormatAlignment (EMode)
 
bool TestFormatCLUSTAL (void)
 
bool TestFormatBinaryAsn (EMode)
 
bool TestFormatDistanceMatrix (EMode)
 
bool TestFormatTaxplot (EMode)
 
bool TestFormatFlatFileSequence (EMode)
 
bool TestFormatFiveColFeatureTable (EMode)
 
bool TestFormatTable (EMode)
 
bool TestFormatFasta (EMode)
 
bool TestFormatTextAsn (EMode)
 
bool TestFormatSnpMarkers (EMode)
 
bool TestFormatBed (EMode)
 
bool TestFormatBed15 (EMode)
 
bool TestFormatWiggle (EMode)
 
bool TestFormatHgvs (EMode)
 
bool TestFormatZip (EMode)
 
bool TestFormatGZip (EMode)
 
bool TestFormatZstd (EMode)
 
bool TestFormatBZip2 (EMode)
 
bool TestFormatLzo (EMode)
 
bool TestFormatSra (EMode)
 
bool TestFormatBam (EMode)
 
bool TestFormatVcf (EMode)
 
bool TestFormatAugustus (EMode)
 
bool TestFormatJson (EMode)
 
bool TestFormatPsl (EMode)
 
bool TestFormatFlatFileGenbank (EMode)
 
bool TestFormatFlatFileEna (EMode)
 
bool TestFormatFlatFileUniProt (EMode)
 
bool IsInputRepeatMaskerWithoutHeader ()
 
bool IsInputRepeatMaskerWithHeader ()
 

Static Protected Member Functions

static bool IsLineFlatFileSequence (const std::string &)
 
static bool IsSampleNewick (const std::string &)
 
static bool IsLabelNewick (const std::string &)
 
static bool IsLineAgp (const std::string &)
 
static bool IsLineGlimmer3 (const std::string &)
 
static bool IsLineGtf (const std::string &)
 
static bool IsLineGvf (const std::string &)
 
static bool IsLineGff3 (const std::string &)
 
static bool IsLineGff2 (const std::string &)
 
static bool IsLineAugustus (const std::string &)
 
static bool IsLinePhrapId (const std::string &)
 
static bool IsLineRmo (const std::string &)
 
static bool IsAsnComment (const vector< string > &)
 
static bool IsLineHgvs (const std::string &)
 
static bool IsLinePsl (const std::string &, bool ignoreFirstColumn)
 

Protected Attributes

CNcbiIstreamm_Stream
 
bool m_bOwnsStream
 
char * m_pTestBuffer
 
streamsize m_iTestBufferSize
 
streamsize m_iTestDataSize
 
bool m_bStatsAreValid
 
bool m_bSplitDone
 
unsigned int m_iStatsCountData
 
unsigned int m_iStatsCountAlNumChars
 
unsigned int m_iStatsCountDnaChars
 
unsigned int m_iStatsCountAaChars
 
unsigned int m_iStatsCountBraces
 
std::list< std::stringm_TestLines
 
CFormatHints m_Hints
 

Private Member Functions

bool x_TestFormat (EFormat format, EMode mode)
 
bool x_TestTableDelimiter (const string &delims)
 
bool x_CheckJsonStart (const string &testString) const
 
void x_StripJsonStrings (string &testString) const
 
size_t x_FindNextJsonStringStop (const string &input, const size_t from_pos) const
 
void x_FindJsonStringLimits (const string &testString, list< size_t > &limits) const
 
bool x_CheckStripJsonPunctuation (string &testString) const
 
size_t x_StripJsonPunctuation (string &testString) const
 
void x_StripJsonKeywords (string &testString) const
 
bool x_CheckStripJsonNumbers (string &testString) const
 
bool x_IsTruncatedJsonNumber (const string &testString) const
 
bool x_IsTruncatedJsonKeyword (const string &testString) const
 
bool x_IsNumber (const string &testString) const
 
bool x_IsBlankOrNumbers (const string &testString) const
 
bool x_TryProcessCLUSTALSeqData (const string &line, string &id, size_t &seg_length) const
 
bool x_LooksLikeCLUSTALConservedInfo (const string &line) const
 

Static Private Member Functions

static bool x_TestInput (CNcbiIstream &input, EOnError onerror)
 

Detailed Description

Class implements different ad-hoc unreliable file format identifications.

Definition at line 50 of file format_guess.hpp.

Member Enumeration Documentation

◆ EFormat

The formats are checked in the same order as declared here.

Enumerator
eUnknown 

unknown format

eBinaryASN 

Binary ASN.1.

eRmo 

RepeatMasker Output.

eGtf_POISENED 

Old and Dead GFF/GTF style annotations.

eGlimmer3 

Glimmer3 predictions.

eAgp 

AGP format assembly, AgpRead.

eXml 

XML.

eWiggle 

UCSC WIGGLE file format.

eBed 

UCSC BED file format, CBedReader.

eBed15 

UCSC BED15 or microarray format.

eNewick 

Newick file.

eAlignment 

Text alignment.

eDistanceMatrix 

Distance matrix file.

eFlatFileSequence 

GenBank/GenPept/DDBJ/EMBL flat-file sequence portion.

eFiveColFeatureTable 

Five-column feature table.

eSnpMarkers 

SNP Marker flat file.

eFasta 

FASTA format sequence record, CFastaReader.

eTextASN 

Text ASN.1.

eTaxplot 

Taxplot file.

ePhrapAce 

Phrap ACE assembly file.

eTable 

Generic table.

eGtf 

New GTF, CGtfReader.

eGff3 

GFF3, CGff3Reader.

eGff2 

GFF2, CGff2Reader, any GFF-like that doesn't fit the others.

eHgvs 

HGVS, CHgvsParser.

eGvf 

GVF, CGvfReader.

eZip 

zip compressed file

eGZip 

GNU zip compressed file.

eBZip2 

bzip2 compressed file

eLzo 

lzo compressed file

eSra 

INSDC Sequence Read Archive file.

eBam 

Binary alignment/map file.

eVcf 

VCF, CVcfReader.

eUCSCRegion 

USCS Region file format.

eGffAugustus 

GFFish output of Augustus Gene Prediction.

eJSON 

JSON.

ePsl 

PSL alignment format.

eAltGraphX 
eBed5FloatScore 
eBedGraph 
eBedRnaElements 
eBigBarChart 
eBigBed 
eBigPsl 
eBigChain 
eBigMaf 
eBigWig 
eBroadPeak 
eChain 
eClonePos 
eColoredExon 
eCtgPos 
eDownloadsOnly 
eEncodeFiveC 
eExpRatio 
eFactorSource 
eGenePred 
eLd2 
eNarrowPeak 
eNetAlign 
ePeptideMapping 
eRmsk 
eSnake 
eVcfTabix 
eWigMaf 
eFlatFileGenbank 
eFlatFileEna 
eFlatFileUniProt 
eZstd 

Zstandard (zstd) compressed data.

eFormat_max 

Max value of EFormat.

Definition at line 54 of file format_guess.hpp.

◆ EMode

Enumerator
eQuick 
eThorough 

Definition at line 150 of file format_guess.hpp.

◆ EOnError

Enumerator
eDefault 

Return eUnknown.

eThrowOnBadSource 

Throw an exception if the data source (stream, file) can't be read.

Definition at line 161 of file format_guess.hpp.

◆ ESequenceType

Enumerator
eUndefined 
eNucleotide 
eProtein 

Definition at line 144 of file format_guess.hpp.

◆ ESTStrictness

Enumerator
eST_Lax 

Implement historic behavior, risking false positives.

eST_Default 

Be relatively strict, but still allow for typos.

eST_Strict 

Require 100% encodability of printable non-digits.

Definition at line 155 of file format_guess.hpp.

Constructor & Destructor Documentation

◆ CFormatGuess() [1/3]

CFormatGuess::CFormatGuess ( )

Definition at line 397 of file format_guess.cpp.

References Initialize().

◆ CFormatGuess() [2/3]

CFormatGuess::CFormatGuess ( const string fname)

Definition at line 406 of file format_guess.cpp.

References Initialize().

◆ CFormatGuess() [3/3]

CFormatGuess::CFormatGuess ( CNcbiIstream input)
Note
Data format detection methods GuessFormat() and TestFormat() take care to push whatever data they read back to the stream using CStreamUtils::Stepback()

Definition at line 415 of file format_guess.cpp.

References Initialize().

◆ ~CFormatGuess()

CFormatGuess::~CFormatGuess ( )

Definition at line 424 of file format_guess.cpp.

References m_bOwnsStream, m_pTestBuffer, and m_Stream.

Member Function Documentation

◆ EnsureSplitLines()

bool CFormatGuess::EnsureSplitLines ( )
protected

◆ EnsureStats()

bool CFormatGuess::EnsureStats ( )
protected

◆ EnsureTestBuffer()

bool CFormatGuess::EnsureTestBuffer ( )
protected

◆ Format() [1/2]

CFormatGuess::EFormat CFormatGuess::Format ( CNcbiIstream input,
EOnError  onerror = eDefault 
)
static

Format prediction based on an input stream.

Note
On completion, the function pushes whatever data it had to read (in order to detect data format) back to the stream – using CStreamUtils::Stepback()

Definition at line 385 of file format_guess.cpp.

References GuessFormat(), and input().

◆ Format() [2/2]

CFormatGuess::EFormat CFormatGuess::Format ( const string path,
EOnError  onerror = eDefault 
)
static

◆ GetFormatHints()

CFormatHints& CFormatGuess::GetFormatHints ( void  )
inline

◆ GetFormatName()

const char * CFormatGuess::GetFormatName ( EFormat  format)
static

◆ GuessFormat() [1/2]

CFormatGuess::EFormat CFormatGuess::GuessFormat ( EMode  )

◆ GuessFormat() [2/2]

CFormatGuess::EFormat CFormatGuess::GuessFormat ( EOnError  onerror = eDefault)
Note
If the instance of the class is built upon std::istream, then on completion this function pushes whatever data it had to read (in order to detect data format) back to the stream – using CStreamUtils::Stepback()

Definition at line 453 of file format_guess.cpp.

References eNewick, EnsureTestBuffer(), eQuick, eUnknown, f(), CFormatGuess::CFormatHints::IsDisabled(), CFormatGuess::CFormatHints::IsEmpty(), CFormatGuess::CFormatHints::IsPreferred(), m_Hints, m_Stream, sm_CheckOrder, sm_CheckOrder_Size, TestFormatNewick(), x_TestFormat(), and x_TestInput().

◆ Initialize()

void CFormatGuess::Initialize ( void  )
protected

◆ IsAllComment()

bool CFormatGuess::IsAllComment ( )
protected

◆ IsAsciiText()

bool CFormatGuess::IsAsciiText ( )
protected

Definition at line 3748 of file format_guess.cpp.

References i, isprint(), m_iTestDataSize, and m_pTestBuffer.

Referenced by IsAllComment(), and TestFormatTable().

◆ IsAsnComment()

bool CFormatGuess::IsAsnComment ( const vector< string > &  Fields)
staticprotected

Definition at line 3681 of file format_guess.cpp.

References NStr::StartsWith().

Referenced by TestFormatTextAsn().

◆ IsEnabled()

bool CFormatGuess::IsEnabled ( EFormat  format) const
inline

Check whether testing is enabled for given format.

Definition at line 266 of file format_guess.hpp.

References format, CFormatGuess::CFormatHints::IsDisabled(), and m_Hints.

◆ IsInputRepeatMaskerWithHeader()

bool CFormatGuess::IsInputRepeatMaskerWithHeader ( )
protected

Definition at line 2851 of file format_guess.cpp.

References NStr::FindCase(), i, m_TestLines, NPOS, and NStr::TruncateSpacesInPlace().

Referenced by TestFormatRepeatMasker().

◆ IsInputRepeatMaskerWithoutHeader()

bool CFormatGuess::IsInputRepeatMaskerWithoutHeader ( )
protected

Definition at line 2917 of file format_guess.cpp.

References IsLineRmo(), ITERATE, m_TestLines, str(), and NStr::TruncateSpaces().

Referenced by TestFormatRepeatMasker().

◆ IsLabelNewick()

bool CFormatGuess::IsLabelNewick ( const std::string )
staticprotected

Definition at line 3103 of file format_guess.cpp.

References label, and NPOS.

◆ IsLineAgp()

bool CFormatGuess::IsLineAgp ( const std::string )
staticprotected

◆ IsLineAugustus()

bool CFormatGuess::IsLineAugustus ( const std::string )
staticprotected

◆ IsLineFlatFileSequence()

bool CFormatGuess::IsLineFlatFileSequence ( const std::string )
staticprotected

Definition at line 3074 of file format_guess.cpp.

References i, isalpha(), isspace(), and NPOS.

Referenced by TestFormatFlatFileSequence().

◆ IsLineGff2()

bool CFormatGuess::IsLineGff2 ( const std::string )
staticprotected

◆ IsLineGff3()

bool CFormatGuess::IsLineGff3 ( const std::string )
staticprotected

◆ IsLineGlimmer3()

bool CFormatGuess::IsLineGlimmer3 ( const std::string )
staticprotected

◆ IsLineGtf()

bool CFormatGuess::IsLineGtf ( const std::string )
staticprotected

◆ IsLineGvf()

bool CFormatGuess::IsLineGvf ( const std::string )
staticprotected

◆ IsLineHgvs()

bool CFormatGuess::IsLineHgvs ( const std::string )
staticprotected

Definition at line 3795 of file format_guess.cpp.

References isalnum(), and ITERATE.

Referenced by TestFormatHgvs().

◆ IsLinePhrapId()

bool CFormatGuess::IsLinePhrapId ( const std::string )
staticprotected

◆ IsLinePsl()

bool CFormatGuess::IsLinePsl ( const std::string ,
bool  ignoreFirstColumn 
)
staticprotected

◆ IsLineRmo()

bool CFormatGuess::IsLineRmo ( const std::string )
staticprotected

◆ IsSampleNewick()

bool CFormatGuess::IsSampleNewick ( const std::string )
staticprotected

Definition at line 2947 of file format_guess.cpp.

References NStr::TruncateSpaces().

Referenced by TestFormatNewick().

◆ IsSupportedFormat()

bool CFormatGuess::IsSupportedFormat ( EFormat  format)
static

Definition at line 434 of file format_guess.cpp.

References format, i, sm_CheckOrder, and sm_CheckOrder_Size.

◆ SequenceType()

CFormatGuess::ESequenceType CFormatGuess::SequenceType ( const char *  str,
unsigned  length = 0,
ESTStrictness  strictness = eST_Default 
)
static

Guess sequence type.

Function calculates sequence alphabet and identifies if the source belongs to nucleotide or protein sequence

Definition at line 308 of file format_guess.cpp.

References eNucleotide, eProtein, eST_Default, eST_Lax, eST_Strict, eUndefined, fAlpha, fDigit, fDNA_Ambig_Alphabet, fDNA_Main_Alphabet, fProtein_Alphabet, fSpace, i, init_symbol_type_table(), str(), and symbol_type_table.

Referenced by CFastaReader::AssignMolType(), CPsiBlastValidate::QueryFactory(), CAlnReader::x_GetSequenceMolType(), and x_TryProcessCLUSTALSeqData().

◆ TestFormat() [1/2]

bool CFormatGuess::TestFormat ( EFormat  format,
EMode   
)

Definition at line 495 of file format_guess.cpp.

References eDefault, and format.

Referenced by BOOST_AUTO_TEST_CASE().

◆ TestFormat() [2/2]

bool CFormatGuess::TestFormat ( EFormat  format,
EOnError  onerror = eDefault 
)
Note
If the instance of the class is built upon std::istream, then on completion this function pushes whatever data it had to read (in order to detect data format) back to the stream – using CStreamUtils::Stepback()

Definition at line 501 of file format_guess.cpp.

References eQuick, eUnknown, format, m_Stream, x_TestFormat(), and x_TestInput().

◆ TestFormatAgp()

bool CFormatGuess::TestFormatAgp ( EMode  )
protected

Definition at line 1042 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), IsLineAgp(), ITERATE, and m_TestLines.

Referenced by x_TestFormat().

◆ TestFormatAlignment()

bool CFormatGuess::TestFormatAlignment ( EMode  )
protected

Definition at line 1317 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), ITERATE, m_TestLines, NPOS, and TestFormatCLUSTAL().

Referenced by x_TestFormat().

◆ TestFormatAugustus()

bool CFormatGuess::TestFormatAugustus ( EMode  )
protected

◆ TestFormatBam()

bool CFormatGuess::TestFormatBam ( EMode  mode)
protected

Definition at line 2065 of file format_guess.cpp.

Referenced by x_TestFormat().

◆ TestFormatBed()

bool CFormatGuess::TestFormatBed ( EMode  )
protected

◆ TestFormatBed15()

bool CFormatGuess::TestFormatBed15 ( EMode  )
protected

◆ TestFormatBinaryAsn()

bool CFormatGuess::TestFormatBinaryAsn ( EMode  )
protected

Definition at line 1162 of file format_guess.cpp.

References eMaybe, eNo, EnsureTestBuffer(), eYes, i, isgraph(), isspace(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatBZip2()

bool CFormatGuess::TestFormatBZip2 ( EMode  )
protected

Definition at line 1995 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatCLUSTAL()

bool CFormatGuess::TestFormatCLUSTAL ( void  )
protected

◆ TestFormatDistanceMatrix()

bool CFormatGuess::TestFormatDistanceMatrix ( EMode  )
protected

◆ TestFormatFasta()

bool CFormatGuess::TestFormatFasta ( EMode  )
protected

◆ TestFormatFiveColFeatureTable()

bool CFormatGuess::TestFormatFiveColFeatureTable ( EMode  )
protected

Definition at line 1255 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), ITERATE, and m_TestLines.

Referenced by x_TestFormat().

◆ TestFormatFlatFileEna()

bool CFormatGuess::TestFormatFlatFileEna ( EMode  )
protected

◆ TestFormatFlatFileGenbank()

bool CFormatGuess::TestFormatFlatFileGenbank ( EMode  )
protected

◆ TestFormatFlatFileSequence()

bool CFormatGuess::TestFormatFlatFileSequence ( EMode  )
protected

◆ TestFormatFlatFileUniProt()

bool CFormatGuess::TestFormatFlatFileUniProt ( EMode  )
protected

◆ TestFormatGff2()

bool CFormatGuess::TestFormatGff2 ( EMode  )
protected

◆ TestFormatGff3()

bool CFormatGuess::TestFormatGff3 ( EMode  )
protected

◆ TestFormatGlimmer3()

bool CFormatGuess::TestFormatGlimmer3 ( EMode  )
protected

Definition at line 1013 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), IsLineGlimmer3(), and m_TestLines.

Referenced by x_TestFormat().

◆ TestFormatGtf()

bool CFormatGuess::TestFormatGtf ( EMode  )
protected

◆ TestFormatGvf()

bool CFormatGuess::TestFormatGvf ( EMode  )
protected

◆ TestFormatGZip()

bool CFormatGuess::TestFormatGZip ( EMode  )
protected

Definition at line 1954 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatHgvs()

bool CFormatGuess::TestFormatHgvs ( EMode  )
protected

◆ TestFormatJson()

bool CFormatGuess::TestFormatJson ( EMode  )
protected

◆ TestFormatLzo()

bool CFormatGuess::TestFormatLzo ( EMode  )
protected

Definition at line 2020 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatNewick()

bool CFormatGuess::TestFormatNewick ( EMode  )
protected

◆ TestFormatPhrapAce()

bool CFormatGuess::TestFormatPhrapAce ( EMode  )
protected

◆ TestFormatPsl()

bool CFormatGuess::TestFormatPsl ( EMode  mode)
protected

◆ TestFormatRepeatMasker()

bool CFormatGuess::TestFormatRepeatMasker ( EMode  )
protected

◆ TestFormatSnpMarkers()

bool CFormatGuess::TestFormatSnpMarkers ( EMode  )
protected

Definition at line 1728 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), ITERATE, m_TestLines, and str().

Referenced by x_TestFormat().

◆ TestFormatSra()

bool CFormatGuess::TestFormatSra ( EMode  )
protected

Definition at line 2047 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatTable()

bool CFormatGuess::TestFormatTable ( EMode  )
protected

Definition at line 1580 of file format_guess.cpp.

References EnsureSplitLines(), EnsureTestBuffer(), IsAsciiText(), and x_TestTableDelimiter().

Referenced by x_TestFormat().

◆ TestFormatTaxplot()

bool CFormatGuess::TestFormatTaxplot ( EMode  )
protected

Definition at line 1720 of file format_guess.cpp.

Referenced by x_TestFormat().

◆ TestFormatTextAsn()

bool CFormatGuess::TestFormatTextAsn ( EMode  )
protected

◆ TestFormatVcf()

bool CFormatGuess::TestFormatVcf ( EMode  )
protected

Definition at line 2500 of file format_guess.cpp.

References EnsureSplitLines(), EnsureStats(), ITERATE, m_TestLines, and NStr::StartsWith().

Referenced by x_TestFormat().

◆ TestFormatWiggle()

bool CFormatGuess::TestFormatWiggle ( EMode  )
protected

◆ TestFormatXml()

bool CFormatGuess::TestFormatXml ( EMode  )
protected

◆ TestFormatZip()

bool CFormatGuess::TestFormatZip ( EMode  )
protected

Definition at line 1929 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ TestFormatZstd()

bool CFormatGuess::TestFormatZstd ( EMode  )
protected

Definition at line 1973 of file format_guess.cpp.

References EnsureTestBuffer(), m_iTestDataSize, and m_pTestBuffer.

Referenced by x_TestFormat().

◆ x_CheckJsonStart()

bool CFormatGuess::x_CheckJsonStart ( const string testString) const
private

Definition at line 2789 of file format_guess.cpp.

References NPOS, and NStr::StartsWith().

Referenced by TestFormatJson().

◆ x_CheckStripJsonNumbers()

bool CFormatGuess::x_CheckStripJsonNumbers ( string testString) const
private

◆ x_CheckStripJsonPunctuation()

bool CFormatGuess::x_CheckStripJsonPunctuation ( string testString) const
private

Definition at line 2741 of file format_guess.cpp.

References x_StripJsonPunctuation().

Referenced by TestFormatJson().

◆ x_FindJsonStringLimits()

void CFormatGuess::x_FindJsonStringLimits ( const string testString,
list< size_t > &  limits 
) const
private

Definition at line 2567 of file format_guess.cpp.

References NStr::Find(), input(), limits, NPOS, and x_FindNextJsonStringStop().

Referenced by x_StripJsonStrings().

◆ x_FindNextJsonStringStop()

size_t CFormatGuess::x_FindNextJsonStringStop ( const string input,
const size_t  from_pos 
) const
private

Referenced by x_FindJsonStringLimits().

◆ x_IsBlankOrNumbers()

bool CFormatGuess::x_IsBlankOrNumbers ( const string testString) const
private

◆ x_IsNumber()

bool CFormatGuess::x_IsNumber ( const string testString) const
private

◆ x_IsTruncatedJsonKeyword()

bool CFormatGuess::x_IsTruncatedJsonKeyword ( const string testString) const
private

Definition at line 2695 of file format_guess.cpp.

Referenced by TestFormatJson().

◆ x_IsTruncatedJsonNumber()

bool CFormatGuess::x_IsTruncatedJsonNumber ( const string testString) const
private

Definition at line 2662 of file format_guess.cpp.

References x_IsNumber().

Referenced by TestFormatJson().

◆ x_LooksLikeCLUSTALConservedInfo()

bool CFormatGuess::x_LooksLikeCLUSTALConservedInfo ( const string line) const
private

Definition at line 1348 of file format_guess.cpp.

References isspace().

Referenced by TestFormatCLUSTAL().

◆ x_StripJsonKeywords()

void CFormatGuess::x_StripJsonKeywords ( string testString) const
private

Definition at line 2779 of file format_guess.cpp.

References NStr::ReplaceInPlace().

Referenced by TestFormatJson().

◆ x_StripJsonPunctuation()

size_t CFormatGuess::x_StripJsonPunctuation ( string testString) const
private

Definition at line 2762 of file format_guess.cpp.

References NStr::ReplaceInPlace().

Referenced by x_CheckStripJsonPunctuation().

◆ x_StripJsonStrings()

void CFormatGuess::x_StripJsonStrings ( string testString) const
private

Definition at line 2521 of file format_guess.cpp.

References limits, and x_FindJsonStringLimits().

Referenced by TestFormatJson().

◆ x_TestFormat()

bool CFormatGuess::x_TestFormat ( EFormat  format,
EMode  mode 
)
private

◆ x_TestInput()

bool CFormatGuess::x_TestInput ( CNcbiIstream input,
EOnError  onerror 
)
staticprivate

Definition at line 752 of file format_guess.cpp.

References eThrowOnBadSource, input(), and NCBI_THROW.

Referenced by GuessFormat(), and TestFormat().

◆ x_TestTableDelimiter()

bool CFormatGuess::x_TestTableDelimiter ( const string delims)
private

◆ x_TryProcessCLUSTALSeqData()

bool CFormatGuess::x_TryProcessCLUSTALSeqData ( const string line,
string id,
size_t &  seg_length 
) const
private

Member Data Documentation

◆ m_bOwnsStream

bool CFormatGuess::m_bOwnsStream
protected

Definition at line 385 of file format_guess.hpp.

Referenced by ~CFormatGuess().

◆ m_bSplitDone

bool CFormatGuess::m_bSplitDone
protected

Definition at line 391 of file format_guess.hpp.

Referenced by EnsureSplitLines(), Initialize(), and IsAllComment().

◆ m_bStatsAreValid

bool CFormatGuess::m_bStatsAreValid
protected

Definition at line 390 of file format_guess.hpp.

Referenced by EnsureStats(), and Initialize().

◆ m_Hints

CFormatHints CFormatGuess::m_Hints
protected

Definition at line 398 of file format_guess.hpp.

Referenced by GetFormatHints(), GuessFormat(), IsEnabled(), and x_TestFormat().

◆ m_iStatsCountAaChars

unsigned int CFormatGuess::m_iStatsCountAaChars
protected

Definition at line 395 of file format_guess.hpp.

Referenced by EnsureStats(), Initialize(), and TestFormatFasta().

◆ m_iStatsCountAlNumChars

unsigned int CFormatGuess::m_iStatsCountAlNumChars
protected

Definition at line 393 of file format_guess.hpp.

Referenced by EnsureStats(), Initialize(), TestFormatFasta(), and TestFormatTextAsn().

◆ m_iStatsCountBraces

unsigned int CFormatGuess::m_iStatsCountBraces
protected

Definition at line 396 of file format_guess.hpp.

Referenced by EnsureStats(), Initialize(), and TestFormatTextAsn().

◆ m_iStatsCountData

unsigned int CFormatGuess::m_iStatsCountData
protected

Definition at line 392 of file format_guess.hpp.

Referenced by EnsureStats(), Initialize(), and TestFormatFasta().

◆ m_iStatsCountDnaChars

unsigned int CFormatGuess::m_iStatsCountDnaChars
protected

Definition at line 394 of file format_guess.hpp.

Referenced by EnsureStats(), Initialize(), and TestFormatFasta().

◆ m_iTestBufferSize

streamsize CFormatGuess::m_iTestBufferSize
protected

◆ m_iTestDataSize

streamsize CFormatGuess::m_iTestDataSize
protected

◆ m_pTestBuffer

char* CFormatGuess::m_pTestBuffer
protected

◆ m_Stream

CNcbiIstream& CFormatGuess::m_Stream
protected

◆ m_TestLines

std::list<std::string> CFormatGuess::m_TestLines
protected

The documentation for this class was generated from the following files:
Modified on Mon Jul 22 05:03:05 2024 by modify_doxy.py rev. 669887