NCBI C++ ToolKit
Classes | Typedefs | Enumerations | Enumerator | Functions | Variables
Regular Expressions
+ Collaboration diagram for Regular Expressions:

Classes

class  CRegexp
 CRegexp –. More...
 
class  CRegexpUtil
 CRegexpUtil –. More...
 
class  CRegexpException
 CRegexpException –. More...
 
class  CRegexpTemplateTester
 CRegexpTemplateTester –. More...
 
class  CRegexpTemplateTesterException
 CRegexpTemplateTesterException –. More...
 

Typedefs

typedef size_t CRegexp::TOffset
 Element type for GetResults(). More...
 
typedef unsigned int CRegexp::TCompile
 Type definitions used for code clarity. More...
 
typedef unsigned int CRegexp::TMatch
 Match options. More...
 
typedef unsigned int CRegexpTemplateTester::TFlags
 Binary OR of "EFlags". More...
 
typedef map< string, stringCRegexpTemplateTester::TVarMap
 
typedef list< stringCRegexpTemplateTester::TVarList
 

Enumerations

enum  CRegexp::ECompile {
  CRegexp::fCompile_default = 0x80000000 , CRegexp::fCompile_ignore_case = 0x80000001 , CRegexp::fCompile_dotall = 0x80000002 , CRegexp::fCompile_newline = 0x80000004 ,
  CRegexp::fCompile_ungreedy = 0x80000008 , CRegexp::fCompile_extended = 0x80000010
}
 Flags for compile regular expressions. More...
 
enum  CRegexp::ECompile_deprecated {
  CRegexp::eCompile_default = fCompile_default , CRegexp::eCompile_ignore_case = fCompile_ignore_case , CRegexp::eCompile_dotall = fCompile_dotall , CRegexp::eCompile_newline = fCompile_newline ,
  CRegexp::eCompile_ungreedy = fCompile_ungreedy
}
 
enum  CRegexp::EMatch { CRegexp::fMatch_default = 0x80000000 , CRegexp::fMatch_not_begin = 0x80000001 , CRegexp::fMatch_not_end = 0x80000002 , CRegexp::fMatch_not_both = fMatch_not_begin | fMatch_not_end }
 Flags for match string against a precompiled pattern. More...
 
enum  CRegexp::EMatch_deprecated { CRegexp::eMatch_default = fMatch_default , CRegexp::eMatch_not_begin = fMatch_not_begin , CRegexp::eMatch_not_end = fMatch_not_end , CRegexp::eMatch_not_both = fMatch_not_both }
 
enum  CRegexpUtil::ERange { CRegexpUtil::eInside , CRegexpUtil::eOutside }
 Range processing type. More...
 
enum  CRegexpException::EErrCode { CRegexpException::eCompile , CRegexpException::eBadFlags }
 
enum  CRegexpTemplateTester::EFlags { CRegexpTemplateTester::fSkipEmptySourceLines = (1 << 0) , CRegexpTemplateTester::fSkipEmptyTemplateLines = (1 << 1) , CRegexpTemplateTester::fSkipEmptyLines = fSkipEmptySourceLines | fSkipEmptyTemplateLines }
 
enum  CRegexpTemplateTester::ESource { CRegexpTemplateTester::eFile , CRegexpTemplateTester::eTemplate }
 Processing source. More...
 
enum  CRegexpTemplateTester::EResult { CRegexpTemplateTester::eTemplateEOF , CRegexpTemplateTester::eStop }
 The reason of stopping x_Compare(), if no error. More...
 
enum  CRegexpTemplateTesterException::EErrCode {
  CRegexpTemplateTesterException::eOpenFile , CRegexpTemplateTesterException::eMismatchLength , CRegexpTemplateTesterException::eMismatchContent , CRegexpTemplateTesterException::eVarNotFound ,
  CRegexpTemplateTesterException::eVarErr , CRegexpTemplateTesterException::eOpUnknown , CRegexpTemplateTesterException::eOpErr , CRegexpTemplateTesterException::eOpTest
}
 Error types that tester can generate. More...
 

Functions

string ConvertDateTo_iso8601 (string const &value)
 Convert dates from an arbitrary format to corresponding ISO 8601. More...
 
pair< string, stringConvertDateTo_iso8601_and_annotate (string const &value)
 Convert dates from an arbitrary format to corresponding ISO 8601, with annotation. More...
 
 CRegexp::CRegexp (CTempStringEx pattern, TCompile flags=fCompile_default)
 Constructor. More...
 
virtual CRegexp::~CRegexp ()
 Destructor. More...
 
void CRegexp::Set (CTempStringEx pattern, TCompile flags=fCompile_default)
 Set and compile PCRE. More...
 
CTempString CRegexp::GetMatch (CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
 Get matching pattern and subpatterns. More...
 
bool CRegexp::IsMatch (CTempString str, TMatch flags=fMatch_default)
 Check existence substring which match a specified pattern. More...
 
CTempString CRegexp::GetSub (CTempString str, size_t idx=0) const
 Get pattern/subpattern from previous GetMatch(). More...
 
void CRegexp::GetSub (CTempString str, size_t idx, string &dst) const
 Get pattern/subpattern from previous GetMatch(). More...
 
int CRegexp::NumFound () const
 Get number of patterns + subpatterns. More...
 
const TOffsetCRegexp::GetResults (size_t idx) const
 Get location of pattern/subpattern for the last GetMatch(). More...
 
static string CRegexp::Escape (CTempString str)
 Escape all regular expression meta characters in the string. More...
 
static string CRegexp::WildcardToRegexp (CTempString mask)
 Convert wildcard mask to regular expression. More...
 
 CRegexp::CRegexp (const CRegexp &)
 
void CRegexp::operator= (const CRegexp &)
 
void CRegexp::x_Match (CTempString str, size_t offset, TMatch flags)
 
 CRegexpUtil::CRegexpUtil (CTempString str=kEmptyStr)
 Constructor. More...
 
void CRegexpUtil::Reset (CTempString str)
 Reset the content of the string to process. More...
 
void CRegexpUtil::operator= (CTempString str)
 Reset the content of the string to process. More...
 
string CRegexpUtil::GetResult (void)
 Get result string. More...
 
 CRegexpUtil::operator string (void)
 Get result string. More...
 
bool CRegexpUtil::Exists (CTempStringEx pattern, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default)
 Check existence of substring which match a specified pattern. More...
 
string CRegexpUtil::Extract (CTempStringEx pattern, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, size_t pattern_idx=0)
 Get matching pattern/subpattern from string. More...
 
size_t CRegexpUtil::Replace (CTempStringEx search, CTempString replace, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, size_t max_replace=0)
 Replace occurrences of a substring within a string by pattern. More...
 
void CRegexpUtil::SetRange (CTempStringEx addr_start=kEmptyStr, CTempStringEx addr_end=kEmptyStr, CTempString delimiter="\n")
 Set new range for range-dependent functions. More...
 
void CRegexpUtil::ClearRange (void)
 Clear range for range-dependent functions. More...
 
size_t CRegexpUtil::ReplaceRange (CTempStringEx search, CTempString replace, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, CRegexpUtil::ERange process_within=eInside, size_t max_replace=0)
 Replace all occurrences of a substring within a string by pattern. More...
 
void CRegexpUtil::x_Divide (CTempString delimiter)
 Divide source string to substrings by delimiter for separate processing. More...
 
void CRegexpUtil::x_Divide (void)
 
void CRegexpUtil::x_Join (void)
 Join substrings back to entire string. More...
 
virtual const char * CRegexpException::GetErrCodeString (void) const override
 Get error code interpreted as text. More...
 
 CRegexpException::NCBI_EXCEPTION_DEFAULT (CRegexpException, CException)
 
 CRegexpTemplateTester::CRegexpTemplateTester (TFlags flags=0)
 Default constructor. More...
 
void CRegexpTemplateTester::Compare (const string &file_path, const string &template_path)
 Compare file against template (file version). More...
 
void CRegexpTemplateTester::Compare (istream &file_stream, istream &template_stream)
 Compare file against template (stream version). More...
 
void CRegexpTemplateTester::SetVarScope (string &start, string &end)
 Change strings defining start/end of variables. More...
 
void CRegexpTemplateTester::SetCommentStart (string &str)
 Change string defining start of comments line in templates. More...
 
void CRegexpTemplateTester::SetCommandStart (string &str)
 Change string defining start of template commands and operations. More...
 
void CRegexpTemplateTester::SetDelimiters (string &str)
 Change delimiters string, used for comparing data and templates. More...
 
void CRegexpTemplateTester::PrintVars (void) const
 
void CRegexpTemplateTester::PrintVar (const string &name) const
 
string CRegexpTemplateTester::GetVar (const string &name) const
 
const TVarMapCRegexpTemplateTester::GetVars (void) const
 
void CRegexpTemplateTester::x_Op_Set (CTempString str)
 
void CRegexpTemplateTester::x_Op_Echo (CTempString str)
 
void CRegexpTemplateTester::x_Op_Test (CTempString str)
 
void CRegexpTemplateTester::x_Op_Include (CTempString str, istream &file_stm)
 
void CRegexpTemplateTester::x_Op_Skip (CTempString str, istream &file_stm)
 
void CRegexpTemplateTester::x_Reset (void)
 Reset object state. More...
 
EResult CRegexpTemplateTester::x_Compare (istream &file_stream, istream &template_stream)
 Main compare method, compare streams. More...
 
void CRegexpTemplateTester::x_CompareLines (CTempString file_line, CTempString template_line)
 Process/compare lines. More...
 
SIZE_TYPE CRegexpTemplateTester::x_ParseVar (CTempString str, SIZE_TYPE pos) const
 Parse variable from string, return its length. More...
 
SIZE_TYPE CRegexpTemplateTester::x_ParseVarName (CTempString str, SIZE_TYPE pos) const
 Parse/check variable name from string, return its length. More...
 
string CRegexpTemplateTester::x_SubstituteVars (CTempString str, TVarList *inline_vars) const
 Replace all variables in the string with corresponding values. More...
 
istream & CRegexpTemplateTester::x_GetLine (istream &is, ESource src)
 Get line from the stream 'is'. More...
 
virtual const char * CRegexpTemplateTesterException::GetErrCodeString (void) const override
 CRegexpTemplateTesterException –. More...
 
 CRegexpTemplateTesterException::NCBI_EXCEPTION_DEFAULT (CRegexpTemplateTesterException, CCoreException)
 

Variables

const size_t kRegexpMaxSubPatterns = 100
 Specifies the maximum number of subpatterns that can be found. More...
 
void * CRegexp::m_PReg
 
void * CRegexp::m_MatchData
 Pointer to compiled PCRE pattern. More...
 
TOffsetCRegexp::m_Results
 
int CRegexp::m_JITStatus
 
int CRegexp::m_NumFound
 The total number of pattern + subpatterns resulting from the last call to GetMatch. More...
 
string CRegexpUtil::m_Content
 Content string. More...
 
list< stringCRegexpUtil::m_ContentList
 Content list. More...
 
bool CRegexpUtil::m_IsDivided
 TRUE if m_ContentList is newer than m_Content, and FALSE otherwise. More...
 
CTempStringEx CRegexpUtil::m_RangeStart
 Regexp to determine start of range. More...
 
CTempStringEx CRegexpUtil::m_RangeEnd
 Regexp to determine end of range. More...
 
CTempString CRegexpUtil::m_Delimiter
 Delimiter used to split m_Content. More...
 
TFlags CRegexpTemplateTester::m_Flags
 Processing flags. More...
 
string CRegexpTemplateTester::m_VarStart
 Variable definition start. More...
 
string CRegexpTemplateTester::m_VarEnd
 Variable definition end. More...
 
string CRegexpTemplateTester::m_OpStart
 Start of the template command line. More...
 
string CRegexpTemplateTester::m_CommentStart
 Start of the comment line. More...
 
string CRegexpTemplateTester::m_EOLs
 Lines delimiters. More...
 
TVarMap CRegexpTemplateTester::m_Vars
 Map of variables/values. More...
 
string CRegexpTemplateTester::m_FileName
 Current file name (if any) More...
 
string CRegexpTemplateTester::m_FileLine
 Currently processing file line. More...
 
SIZE_TYPE CRegexpTemplateTester::m_FileLineNum
 Current file/stream line number. More...
 
string CRegexpTemplateTester::m_TemplateName
 Current template name. More...
 
string CRegexpTemplateTester::m_TemplateLine
 Currently processing template line. More...
 
SIZE_TYPE CRegexpTemplateTester::m_TemplateLineNum
 Current template line number. More...
 
bool CRegexpTemplateTester::m_ReprocessFileLine
 TRUE if m_FileLine should be reprocessed with next template data line. More...
 

Detailed Description

Typedef Documentation

◆ TCompile

typedef unsigned int CRegexp::TCompile

Type definitions used for code clarity.

Compilation options.

Definition at line 84 of file regexp.hpp.

◆ TFlags

Binary OR of "EFlags".

Definition at line 209 of file regexp_template_tester.hpp.

◆ TMatch

typedef unsigned int CRegexp::TMatch

Match options.

Definition at line 85 of file regexp.hpp.

◆ TOffset

typedef size_t CRegexp::TOffset

Element type for GetResults().

Definition at line 78 of file regexp.hpp.

◆ TVarList

typedef list<string> CRegexpTemplateTester::TVarList
private

Definition at line 300 of file regexp_template_tester.hpp.

◆ TVarMap

Definition at line 272 of file regexp_template_tester.hpp.

Enumeration Type Documentation

◆ ECompile

Flags for compile regular expressions.

PCRE compiler flags used in the constructor and in Set(). If fCompile_ignore_case is set, matches are case insensitive. If fCompile_dotall is set, a dot meta-character in the pattern matches all characters, including newlines. Without it, newlines are excluded. If fCompile_newline is set then ^ matches the start of a line and $ matches the end of a line. If not set, ^ matches only the start of the entire string and $ matches only the end of the entire string. If fCompile_ungreedy inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by "?". It is not compatible with Perl.

The settings can be changed from within the pattern by a sequence of Perl option letters enclosed between "(?" and ")". The option letters are: i for PCRE_CASELESS m for PCRE_MULTILINE s for PCRE_DOTALL x for PCRE_EXTENDED U for PCRE_UNGREEDY

Enumerator
fCompile_default 
fCompile_ignore_case 
fCompile_dotall 
fCompile_newline 
fCompile_ungreedy 
fCompile_extended 

Definition at line 109 of file regexp.hpp.

◆ ECompile_deprecated

Enumerator
eCompile_default 
eCompile_ignore_case 
eCompile_dotall 
eCompile_newline 
eCompile_ungreedy 

Definition at line 118 of file regexp.hpp.

◆ EErrCode [1/2]

Enumerator
eCompile 
eBadFlags 

Definition at line 555 of file regexp.hpp.

◆ EErrCode [2/2]

Error types that tester can generate.

Enumerator
eOpenFile 

file open error

eMismatchLength 

file/template line number mismatch

eMismatchContent 

file/template lines do not match

eVarNotFound 

variable not found

eVarErr 

variable definition error

eOpUnknown 

unknown operation

eOpErr 

operation definition error

eOpTest 

'test' operation return FALSE

Definition at line 357 of file regexp_template_tester.hpp.

◆ EFlags

Enumerator
fSkipEmptySourceLines 

Skip empty lines in the source.

fSkipEmptyTemplateLines 

Skip empty lines in the template.

fSkipEmptyLines 

Definition at line 204 of file regexp_template_tester.hpp.

◆ EMatch

Flags for match string against a precompiled pattern.

Setting fMatch_not_begin causes ^ not to match before the first character of a line. Without setting fCompile_newline, ^ won't match anything if fMatch_not_begin is set. Setting fMatch_not_end causes $ not to match immediately before a new line. Without setting fCompile_newline, $ won't match anything if fMatch_not_end is set.

Enumerator
fMatch_default 
fMatch_not_begin 

^ won't match string begin.

fMatch_not_end 

$ won't match string end.

fMatch_not_both 

Definition at line 134 of file regexp.hpp.

◆ EMatch_deprecated

Enumerator
eMatch_default 
eMatch_not_begin 
eMatch_not_end 
eMatch_not_both 

Definition at line 141 of file regexp.hpp.

◆ ERange

Range processing type.

Defines which part of the specified range should be processed.

Enumerator
eInside 

Process substrings inside range.

eOutside 

Process substrings outside range.

Definition at line 447 of file regexp.hpp.

◆ EResult

The reason of stopping x_Compare(), if no error.

Enumerator
eTemplateEOF 
eStop 

Definition at line 296 of file regexp_template_tester.hpp.

◆ ESource

Processing source.

Enumerator
eFile 

source file/stream

eTemplate 

template

Definition at line 290 of file regexp_template_tester.hpp.

Function Documentation

◆ ClearRange()

void CRegexpUtil::ClearRange ( void  )
inline

Clear range for range-dependent functions.

Have the same effect as SetRange() without parameters.

See also
SetRange()

Definition at line 626 of file regexp.hpp.

References CRegexpUtil::SetRange().

◆ Compare() [1/2]

void CRegexpTemplateTester::Compare ( const string file_path,
const string template_path 
)

Compare file against template (file version).

Parameters
file_pathPath to the checking file.
template_pathPath to the corresponding template.
Returns
Nothing on success. Throw CRegexpTemplateTesterException on error or mismatch.

Definition at line 159 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::eFile, ERROR_FILE, ERROR_TEMPLATE, CRegexpTemplateTester::eStop, CRegexpTemplateTester::m_FileName, CRegexpTemplateTester::m_TemplateName, CRegexpTemplateTester::x_Compare(), CRegexpTemplateTester::x_GetLine(), and CRegexpTemplateTester::x_Reset().

◆ Compare() [2/2]

void CRegexpTemplateTester::Compare ( istream &  file_stream,
istream &  template_stream 
)

Compare file against template (stream version).

Parameters
file_streamInput stream with checking data.
template_streamInput stream with corresponding template data.
Returns
Nothing on success. Throw CRegexpTemplateTesterException on error or mismatch.
Note
Due to file-oriented nature of the 'include' command in the templates, it works a little different that in the file-based version. It is better do not use 'include' with streams at all, but if you want it, be aware. We don't have path to the directory with the original template, so included sub-template should be located in the current directory.

Definition at line 185 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::eFile, ERROR_TEMPLATE, CRegexpTemplateTester::eStop, CRegexpTemplateTester::x_Compare(), CRegexpTemplateTester::x_GetLine(), and CRegexpTemplateTester::x_Reset().

◆ ConvertDateTo_iso8601()

string ConvertDateTo_iso8601 ( string const value)

Convert dates from an arbitrary format to corresponding ISO 8601.

Parameters
valueString in some arbitrary date format.
Returns
Corresponding string in ISO 8601 format. Contain the same field as an original string, but rearranged to conform ISO 8601. If the original string have time, it will be omitted. Return empty string on error, if we cannot locate valid date, or it is ambiguous.

Definition at line 104 of file convert_dates_iso8601.cpp.

References extract_date_iso8601(), get_date_range_rule_collection(), get_date_rule_collection(), get_transform_for_ambiguous_date(), result, and rapidjson::value.

◆ ConvertDateTo_iso8601_and_annotate()

pair<string, string> ConvertDateTo_iso8601_and_annotate ( string const value)

Convert dates from an arbitrary format to corresponding ISO 8601, with annotation.

Parameters
valueString in some arbitrary data/time format.
Returns
Return pair of strings where on success:
  • first string is a tag, marking original format (MM/DD/YYYY, etc);
  • second string is a date in ISO 8601 format. on failure (we cannot locate valid date, or date is ambiguous)
  • first string is a tag: ["ambiguous", "miss"];
  • second string is empty.
See also
ConvertDateTo_iso8601

Definition at line 114 of file convert_dates_iso8601.cpp.

References extract_date_iso8601(), get_date_range_rule_collection(), get_date_rule_collection(), get_transform_for_ambiguous_date(), and rapidjson::value.

◆ CRegexp() [1/2]

CRegexp::CRegexp ( const CRegexp )
private

◆ CRegexp() [2/2]

CRegexp::CRegexp ( CTempStringEx  pattern,
TCompile  flags = fCompile_default 
)

Constructor.

Set and compile the PCRE pattern specified by argument according to compile options. Also allocate memory for compiled PCRE.

Parameters
patternPerl regular expression to compile.
flagsRegular expression compilation flags.
See also
ECompile

Definition at line 111 of file regexp.cpp.

References flags, and CRegexp::Set().

◆ CRegexpTemplateTester()

CRegexpTemplateTester::CRegexpTemplateTester ( TFlags  flags = 0)

Default constructor.

CRegexpTemplateTester –.

Definition at line 74 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::x_Reset().

◆ CRegexpUtil()

CRegexpUtil::CRegexpUtil ( CTempString  str = kEmptyStr)

Constructor.

Set string for processing.

Parameters
strString to process.
See also
Exists(), Extract(), Replace(), ReplaceRange()

Definition at line 329 of file regexp.cpp.

References CRegexpUtil::Reset(), and str().

◆ Escape()

string CRegexp::Escape ( CTempString  str)
static

Escape all regular expression meta characters in the string.

Definition at line 260 of file regexp.cpp.

References NPOS, out(), prev(), s_Special, and str().

Referenced by CFindASN1Dlg::OnReplaceButton().

◆ Exists()

bool CRegexpUtil::Exists ( CTempStringEx  pattern,
CRegexp::TCompile  compile_flags = CRegexp::fCompile_default,
CRegexp::TMatch  match_flags = CRegexp::fMatch_default 
)
inline

Check existence of substring which match a specified pattern.

Parameters
patternPerl regular expression to search.
compile_flagsRegular expression compilation flags.
match_flagsFlags to match.
Returns
Return TRUE if a string corresponding to the match to pattern or subpattern.
See also
CRegexp, CRegexp::GetMatch()

Definition at line 632 of file regexp.hpp.

References CRegexp::GetMatch(), CRegexpUtil::m_Content, CRegexp::NumFound(), and CRegexpUtil::x_Join().

◆ Extract()

string CRegexpUtil::Extract ( CTempStringEx  pattern,
CRegexp::TCompile  compile_flags = CRegexp::fCompile_default,
CRegexp::TMatch  match_flags = CRegexp::fMatch_default,
size_t  pattern_idx = 0 
)
inline

Get matching pattern/subpattern from string.

Parameters
patternPerl regular expression to search.
compile_flagsRegular expression compilation flags.
match_flagsFlags to match.
pattern_idxIndex of pattern/subpattern to extract. Use pattern_idx = 0 for pattern, pattern_idx > 0 for sub patterns.
Returns
Return the substring at location of pattern/subpattern match with index pattern_idx. Return empty string when no match.
See also
CRegexp, CRegexp::GetMatch()

Definition at line 646 of file regexp.hpp.

References CRegexp::GetMatch(), CRegexpUtil::m_Content, and CRegexpUtil::x_Join().

Referenced by CPsgPerfApplication::ProcessLine().

◆ GetErrCodeString() [1/2]

const char * CRegexpException::GetErrCodeString ( void  ) const
overridevirtual

Get error code interpreted as text.

Reimplemented from CException.

Definition at line 548 of file regexp.cpp.

References CRegexpException::eBadFlags, CRegexpException::eCompile, CException::GetErrCode(), and CException::GetErrCodeString().

◆ GetErrCodeString() [2/2]

const char * CRegexpTemplateTesterException::GetErrCodeString ( void  ) const
overridevirtual

◆ GetMatch()

CTempString CRegexp::GetMatch ( CTempString  str,
size_t  offset = 0,
size_t  idx = 0,
TMatch  flags = fMatch_default,
bool  noreturn = false 
)

Get matching pattern and subpatterns.

Return a string corresponding to the match to pattern or subpattern. Set noreturn to true when GetSub() or GetResults() will be used to retrieve pattern and subpatterns. Calling GetMatch() causes the entire search to be performed again. If you want to retrieve a different pattern/subpattern from an already performed search, it is more efficient to use GetSub() or GetResults(). If you need to get numeric offset of the found pattern or subpattern, that use GetResults() method. Doo not use functions like strstr(), or string's find() method and etc, because in general they give you wrong results. This is very dependent from used regular expression.

Parameters
strString to search.
offsetStarting offset in str.
idx(Sub) match to return. Use idx = 0 for complete pattern. Use idx > 0 for subpatterns.
flagsFlags to match.
noreturnReturn empty string if noreturn is true.
Returns
Return (sub) match with number idx or empty string when no match found or if noreturn is true.
See also
EMatch, GetSub, GetResult

Definition at line 242 of file regexp.cpp.

References flags, CRegexp::GetSub(), offset, str(), and CRegexp::x_Match().

Referenced by BrBookURLToCCddBookRef(), BrFcgiBookTermToEutilsTerm(), CapitalizeAfterApostrophe(), CPepXML::ConvertScanID(), DoesPatternMatchHighlightedResidues(), CRegexpUtil::Exists(), CRegexpUtil::Extract(), extract_date_iso8601(), CFindPattern::Find(), FixAffiliationShortWordsInElement(), FixOrdinalNumbers(), NMItemData::GetResolveFuncForPubQual(), CSpectrumSet::LoadMultDTA(), PortalBookURLToCCddBookRef(), CRegexpUtil::Replace(), CRegexpUtil::ReplaceRange(), NSearchFeatPanel::SplitPosOrRange(), transform_range(), CMsvcConfigure::WriteBuildVer(), CConvertAuthorToConsortiumWhereConstraint::x_ApplyToCAuth(), COrfSearchJob::x_DoSearch(), CSequenceSearchJob::x_GetMatches(), and CFeatureSearchJob::x_Match().

◆ GetResult()

string CRegexpUtil::GetResult ( void  )
inline

◆ GetResults()

const CRegexp::TOffset * CRegexp::GetResults ( size_t  idx) const
inline

Get location of pattern/subpattern for the last GetMatch().

Parameters
idxIndex of pattern/subpattern to obtaining. Use idx = 0 for pattern, idx > 0 for sub patterns.
Returns
Return array where index 0 is location of first character in pattern/sub pattern and index 1 is 1 beyond last character in pattern/sub pattern. Throws if called with idx >= NumFound().
See also
GetMatch(), NumFound()

Definition at line 584 of file regexp.hpp.

References CRegexp::m_NumFound, and CRegexp::m_Results.

Referenced by CapitalizeAfterApostrophe(), CFindPattern::Find(), FixAffiliationShortWordsInElement(), FixOrdinalNumbers(), CNcbiApplogApp::GetRawAppName(), CRegexpUtil::Replace(), CNcbiApplogApp::Run(), and CSequenceSearchJob::x_GetMatches().

◆ GetSub() [1/2]

void CRegexp::GetSub ( CTempString  str,
size_t  idx,
string dst 
) const

Get pattern/subpattern from previous GetMatch().

Deprecated:

Definition at line 189 of file regexp.cpp.

References CRegexp::GetSub(), and str().

◆ GetSub() [2/2]

CTempString CRegexp::GetSub ( CTempString  str,
size_t  idx = 0 
) const

Get pattern/subpattern from previous GetMatch().

Should only be called after GetMatch() has been called with the same string. GetMatch() internally stores locations on string where pattern and subpatterns were found.

Parameters
strString to search.
idx(Sub) match to return.
Returns
Return the substring at location of pattern match (idx 0) or subpattern match (idx > 0). Return empty string when no match.
See also
GetMatch(), GetResult()

Definition at line 200 of file regexp.cpp.

References CRegexp::m_NumFound, CRegexp::m_Results, PCRE2_SIZE, PCRE2_UNSET, and str().

Referenced by BrBookURLToCCddBookRef(), BrFcgiBookTermToEutilsTerm(), extract_date_iso8601(), CRegexp::GetMatch(), NMItemData::GetResolveFuncForPubQual(), CRegexp::GetSub(), PortalBookURLToCCddBookRef(), CReadBlastApp::ReadBlast(), s_ChrName(), CTabularFormatter::SetFormat(), sParseVersion(), NSearchFeatPanel::SplitPosOrRange(), transform_ambiguous_date(), transform_range(), CRegexpTemplateTester::x_CompareLines(), and CSeq_id_Resolver__LRG::x_Create().

◆ GetVar()

string CRegexpTemplateTester::GetVar ( const string name) const

◆ GetVars()

const CRegexpTemplateTester::TVarMap & CRegexpTemplateTester::GetVars ( void  ) const

Definition at line 138 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::m_Vars.

◆ IsMatch()

bool CRegexp::IsMatch ( CTempString  str,
TMatch  flags = fMatch_default 
)

◆ NCBI_EXCEPTION_DEFAULT() [1/2]

CRegexpException::NCBI_EXCEPTION_DEFAULT ( CRegexpException  ,
CException   
)

◆ NCBI_EXCEPTION_DEFAULT() [2/2]

CRegexpTemplateTesterException::NCBI_EXCEPTION_DEFAULT ( CRegexpTemplateTesterException  ,
CCoreException   
)

◆ NumFound()

int CRegexp::NumFound ( ) const
inline

◆ operator string()

CRegexpUtil::operator string ( void  )
inline

Get result string.

The same as GetResult().

See also
GetResult()

Definition at line 614 of file regexp.hpp.

◆ operator=() [1/2]

void CRegexp::operator= ( const CRegexp )
private

◆ operator=() [2/2]

void CRegexpUtil::operator= ( CTempString  str)
inline

Reset the content of the string to process.

The same as Reset().

Parameters
strString to process.
See also
Reset()

Definition at line 620 of file regexp.hpp.

References CRegexpUtil::Reset(), and str().

◆ PrintVar()

void CRegexpTemplateTester::PrintVar ( const string name) const

◆ PrintVars()

void CRegexpTemplateTester::PrintVars ( void  ) const

◆ Replace()

size_t CRegexpUtil::Replace ( CTempStringEx  search,
CTempString  replace,
CRegexp::TCompile  compile_flags = CRegexp::fCompile_default,
CRegexp::TMatch  match_flags = CRegexp::fMatch_default,
size_t  max_replace = 0 
)

Replace occurrences of a substring within a string by pattern.

Parameters
searchRegular expression to match a substring value that is replaced.
replaceReplace "search" substring with this value. The matched subpatterns (if any) can be found and inserted into replace string using variables $1, $2, $3, and so forth. The variable can be enclosed in the curly brackets {}, that will be deleted on substitution.
compile_flagsRegular expression compilation flags.
match_flagsFlags to match.
max_replaceReplace no more than "max_replace" occurrences of substring "search". If "max_replace" is zero (default), then replace all occurrences with "replace".
Returns
Return the count of replacements.
See also
CRegexp, ReplaceRange()

Definition at line 349 of file regexp.cpp.

References CTempString::assign(), count, CTempString::data(), CTempString::empty(), CRegexp::GetMatch(), CRegexp::GetResults(), int, kMax_Int, CTempString::length(), CRegexpUtil::m_Content, n, NPOS, CRegexp::NumFound(), result, rapidjson::value, and CRegexpUtil::x_Join().

Referenced by CapitalizeSAfterNumber(), CPhyObjectLoader::Execute(), FindReplaceString_CountryFixes(), FixAbbreviationsInElement(), FixAffiliationShortWordsInElement(), CFixProductNames::FixCaps(), FixCountryCapitalization(), FixKnownAbbreviationsInElement(), FixShortWordsInElement(), FixupMouseStrain(), InsertMissingSpacesAfterCommas(), InsertMissingSpacesAfterNo(), CRegexpUtil::ReplaceRange(), CFindASN1Dlg::ReplaceValue(), CMacroFunction_EditStringQual::s_EditText(), s_RegexpReplace(), and CMacroFunction_FixSpelling::s_SpellingFixes().

◆ ReplaceRange()

size_t CRegexpUtil::ReplaceRange ( CTempStringEx  search,
CTempString  replace,
CRegexp::TCompile  compile_flags = CRegexp::fCompile_default,
CRegexp::TMatch  match_flags = CRegexp::fMatch_default,
CRegexpUtil::ERange  process_within = eInside,
size_t  max_replace = 0 
)

Replace all occurrences of a substring within a string by pattern.

Use range specified by SetRange() method. Work like SED command s/.

Parameters
searchRegular expression to match a substring value that is replaced.
replaceReplace "search" substring with this value. The matched subpatterns (if any) can be found and inserted into replace string using variables $1, $2, $3, and so forth. The variable can be enclosed in the curly brackets {}, that will be deleted on substitution.
compile_flagsRegular expression compilation flags.
match_flagsFlags to match.
process_withinDefine which part of the range should be processed.
max_replaceReplace no more than "max_replace" occurrences of substring "search" in the every substring. If "max_replace" is zero (default), then replace all occurrences with "replace".
Returns
Return the count of replacements.
See also
ERange, SetRange(), ClearRange()

Definition at line 444 of file regexp.cpp.

References CRegexpUtil::eInside, CTempString::empty(), CRegexpUtil::eOutside, CRegexp::fMatch_default, CRegexp::GetMatch(), i, CRegexpUtil::m_ContentList, CRegexpUtil::m_RangeEnd, CRegexpUtil::m_RangeStart, NON_CONST_ITERATE, CRegexp::NumFound(), CRegexpUtil::Replace(), and CRegexpUtil::x_Divide().

◆ Reset()

void CRegexpUtil::Reset ( CTempString  str)
inline

Reset the content of the string to process.

Parameters
strString to process.
See also
operator =

Definition at line 606 of file regexp.hpp.

References CRegexpUtil::m_Content, CRegexpUtil::m_ContentList, CRegexpUtil::m_IsDivided, and str().

Referenced by CRegexpUtil::CRegexpUtil(), CRegexpUtil::operator=(), and CPsgPerfApplication::ProcessLine().

◆ Set()

void CRegexp::Set ( CTempStringEx  pattern,
TCompile  flags = fCompile_default 
)

Set and compile PCRE.

Set and compile the PCRE pattern specified by argument according to compile options. Also deallocate/allocate memory for compiled PCRE.

Parameters
patternPerl regular expression to compile.
flagsRegular expression compilation flags.
See also
ECompile

Definition at line 138 of file regexp.cpp.

References ArraySize(), CTempString::data(), flags, CTempStringEx::HasZeroAtEnd(), CRegexp::m_MatchData, CRegexp::m_PReg, NCBI_THROW, NULL, pcre2_code, pcre2_code_free(), pcre2_compile(), pcre2_get_error_message(), pcre2_match_data, pcre2_match_data_create_from_pattern(), pcre2_match_data_free(), PCRE2_SPTR, PCRE2_UCHAR, s_GetRealCompileFlags(), and CTempString::size().

Referenced by CRegexp::CRegexp(), and CRemoveDescDlg::x_ApplyToSeq_entry().

◆ SetCommandStart()

void CRegexpTemplateTester::SetCommandStart ( string str)

Change string defining start of template commands and operations.

Default value: "#"

Definition at line 116 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::m_OpStart, and str().

◆ SetCommentStart()

void CRegexpTemplateTester::SetCommentStart ( string str)

Change string defining start of comments line in templates.

Default value: "//"

Definition at line 110 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::m_CommentStart, and str().

◆ SetDelimiters()

void CRegexpTemplateTester::SetDelimiters ( string str)

Change delimiters string, used for comparing data and templates.

Default value: "\r\n"

Definition at line 122 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::m_EOLs, and str().

◆ SetRange()

void CRegexpUtil::SetRange ( CTempStringEx  addr_start = kEmptyStr,
CTempStringEx  addr_end = kEmptyStr,
CTempString  delimiter = "\n" 
)

Set new range for range-dependent functions.

The matched string will be split up by "delimiter". And then in range-dependent functions every part (substring) is checked to fall into the range, specified by start and end addresses.

The addresses works similar to the Unix utility SED, except that regular expressions is Perl-compatible:

  • empty address in the range correspond to any substring.
  • command with one address correspond to any substring that matches the address.
  • command with two addresses correspond to inclusive range from the start address to through the next pattern space that matches the end address.

Specified range have effect only for range-dependent functions. Otherwise range is ignored.

Parameters
addr_startRegular expression which assign a starting address of range.
addr_endRegular expression which assign an ending address of range. Should be empty if the start address is empty.
delimiterSplit a source string by "delimiter.
See also
ClearRange, ReplaceRange()

Definition at line 337 of file regexp.cpp.

References delimiter, CRegexpUtil::m_Delimiter, CRegexpUtil::m_RangeEnd, CRegexpUtil::m_RangeStart, and CRegexpUtil::x_Divide().

Referenced by CRegexpUtil::ClearRange().

◆ SetVarScope()

void CRegexpTemplateTester::SetVarScope ( string start,
string end 
)

Change strings defining start/end of variables.

By default use next syntax: ${var}

Definition at line 103 of file regexp_template_tester.cpp.

References CRegexpTemplateTester::m_VarEnd, and CRegexpTemplateTester::m_VarStart.

◆ WildcardToRegexp()

string CRegexp::WildcardToRegexp ( CTempString  mask)
static

Convert wildcard mask to regular expression.

Escapes all regular expression meta characters in the string, except '*' and '?'. They will be replaced with '.*' and '.' accordingly.

Parameters
maskWildcard mask.
Returns
Regular expression.
See also
Escape, NStr::MatchesMask

Definition at line 288 of file regexp.cpp.

References mask, NPOS, out(), prev(), and s_Special.

Referenced by CSequenceSearchJob::x_DoSearch(), and CFeatureCheckPanel::x_InitTree().

◆ x_Compare()

CRegexpTemplateTester::EResult CRegexpTemplateTester::x_Compare ( istream &  file_stream,
istream &  template_stream 
)
private

◆ x_CompareLines()

void CRegexpTemplateTester::x_CompareLines ( CTempString  file_line,
CTempString  template_line 
)
private

◆ x_Divide() [1/2]

void CRegexpUtil::x_Divide ( CTempString  delimiter)
private

Divide source string to substrings by delimiter for separate processing.

Definition at line 510 of file regexp.cpp.

References delimiter, CRegexpUtil::m_Content, CRegexpUtil::m_ContentList, CRegexpUtil::m_Delimiter, CRegexpUtil::m_IsDivided, NPOS, and CRegexpUtil::x_Join().

◆ x_Divide() [2/2]

void CRegexpUtil::x_Divide ( void  )
inlineprivate

◆ x_GetLine()

istream & CRegexpTemplateTester::x_GetLine ( istream &  is,
ESource  src 
)
private

◆ x_Join()

void CRegexpUtil::x_Join ( void  )
private

◆ x_Match()

void CRegexp::x_Match ( CTempString  str,
size_t  offset,
TMatch  flags 
)
private

◆ x_Op_Echo()

void CRegexpTemplateTester::x_Op_Echo ( CTempString  str)
private

◆ x_Op_Include()

void CRegexpTemplateTester::x_Op_Include ( CTempString  str,
istream &  file_stm 
)
private

◆ x_Op_Set()

void CRegexpTemplateTester::x_Op_Set ( CTempString  str)
private

◆ x_Op_Skip()

void CRegexpTemplateTester::x_Op_Skip ( CTempString  str,
istream &  file_stm 
)
private

◆ x_Op_Test()

void CRegexpTemplateTester::x_Op_Test ( CTempString  str)
private

◆ x_ParseVar()

SIZE_TYPE CRegexpTemplateTester::x_ParseVar ( CTempString  str,
SIZE_TYPE  pos 
) const
private

Parse variable from string, return its length.

Can process nested variables.

Definition at line 322 of file regexp_template_tester.cpp.

References NStr::CompareCase(), len, CRegexpTemplateTester::m_VarEnd, CRegexpTemplateTester::m_VarStart, NPOS, and str().

Referenced by CRegexpTemplateTester::x_SubstituteVars().

◆ x_ParseVarName()

SIZE_TYPE CRegexpTemplateTester::x_ParseVarName ( CTempString  str,
SIZE_TYPE  pos 
) const
private

Parse/check variable name from string, return its length.

Definition at line 347 of file regexp_template_tester.cpp.

References _ASSERT, ERROR_TEMPLATE, isalnum(), isalpha(), isspace(), len, NPOS, and str().

Referenced by CRegexpTemplateTester::x_Op_Set(), and CRegexpTemplateTester::x_SubstituteVars().

◆ x_Reset()

void CRegexpTemplateTester::x_Reset ( void  )
private

◆ x_SubstituteVars()

string CRegexpTemplateTester::x_SubstituteVars ( CTempString  str,
TVarList inline_vars 
) const
private

◆ ~CRegexp()

CRegexp::~CRegexp ( )
virtual

Destructor.

Deallocate compiled Perl-compatible regular expression.

Definition at line 126 of file regexp.cpp.

References CRegexp::m_MatchData, CRegexp::m_PReg, pcre2_code, pcre2_code_free(), pcre2_match_data, and pcre2_match_data_free().

Variable Documentation

◆ kRegexpMaxSubPatterns

const size_t kRegexpMaxSubPatterns = 100

Specifies the maximum number of subpatterns that can be found.

Definition at line 56 of file regexp.hpp.

Referenced by CRegexp::x_Match().

◆ m_CommentStart

string CRegexpTemplateTester::m_CommentStart
private

Start of the comment line.

Definition at line 329 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::SetCommentStart(), and CRegexpTemplateTester::x_Compare().

◆ m_Content

string CRegexpUtil::m_Content
private

◆ m_ContentList

list<string> CRegexpUtil::m_ContentList
private

Content list.

Definition at line 537 of file regexp.hpp.

Referenced by CRegexpUtil::ReplaceRange(), CRegexpUtil::Reset(), CRegexpUtil::x_Divide(), and CRegexpUtil::x_Join().

◆ m_Delimiter

CTempString CRegexpUtil::m_Delimiter
private

Delimiter used to split m_Content.

Definition at line 542 of file regexp.hpp.

Referenced by CRegexpUtil::SetRange(), CRegexpUtil::x_Divide(), and CRegexpUtil::x_Join().

◆ m_EOLs

string CRegexpTemplateTester::m_EOLs
private

Lines delimiters.

Definition at line 330 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::SetDelimiters(), and CRegexpTemplateTester::x_GetLine().

◆ m_FileLine

string CRegexpTemplateTester::m_FileLine
private

◆ m_FileLineNum

SIZE_TYPE CRegexpTemplateTester::m_FileLineNum
private

Current file/stream line number.

Definition at line 336 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::x_Compare(), CRegexpTemplateTester::x_GetLine(), and CRegexpTemplateTester::x_Reset().

◆ m_FileName

string CRegexpTemplateTester::m_FileName
private

Current file name (if any)

Definition at line 334 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::Compare(), and CRegexpTemplateTester::x_Reset().

◆ m_Flags

TFlags CRegexpTemplateTester::m_Flags
private

Processing flags.

Definition at line 325 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::x_GetLine().

◆ m_IsDivided

bool CRegexpUtil::m_IsDivided
private

TRUE if m_ContentList is newer than m_Content, and FALSE otherwise.

Definition at line 538 of file regexp.hpp.

Referenced by CRegexpUtil::GetResult(), CRegexpUtil::Reset(), CRegexpUtil::x_Divide(), and CRegexpUtil::x_Join().

◆ m_JITStatus

int CRegexp::m_JITStatus
private

Definition at line 302 of file regexp.hpp.

Referenced by CRegexp::x_Match().

◆ m_MatchData

void* CRegexp::m_MatchData
private

Pointer to compiled PCRE pattern.

Definition at line 300 of file regexp.hpp.

Referenced by CRegexp::Set(), CRegexp::x_Match(), and CRegexp::~CRegexp().

◆ m_NumFound

int CRegexp::m_NumFound
private

The total number of pattern + subpatterns resulting from the last call to GetMatch.

Definition at line 315 of file regexp.hpp.

Referenced by CRegexp::GetResults(), CRegexp::GetSub(), CRegexp::IsMatch(), CRegexp::NumFound(), and CRegexp::x_Match().

◆ m_OpStart

string CRegexpTemplateTester::m_OpStart
private

Start of the template command line.

Definition at line 328 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::SetCommandStart(), and CRegexpTemplateTester::x_Compare().

◆ m_PReg

void* CRegexp::m_PReg
private

Definition at line 297 of file regexp.hpp.

Referenced by CRegexp::Set(), CRegexp::x_Match(), and CRegexp::~CRegexp().

◆ m_RangeEnd

CTempStringEx CRegexpUtil::m_RangeEnd
private

Regexp to determine end of range.

Definition at line 541 of file regexp.hpp.

Referenced by CRegexpUtil::ReplaceRange(), and CRegexpUtil::SetRange().

◆ m_RangeStart

CTempStringEx CRegexpUtil::m_RangeStart
private

Regexp to determine start of range.

Definition at line 540 of file regexp.hpp.

Referenced by CRegexpUtil::ReplaceRange(), and CRegexpUtil::SetRange().

◆ m_ReprocessFileLine

bool CRegexpTemplateTester::m_ReprocessFileLine
private

TRUE if m_FileLine should be reprocessed with next template data line.

Definition at line 341 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::x_Compare(), CRegexpTemplateTester::x_Op_Skip(), and CRegexpTemplateTester::x_Reset().

◆ m_Results

TOffset* CRegexp::m_Results
private

Definition at line 301 of file regexp.hpp.

Referenced by CRegexp::GetResults(), CRegexp::GetSub(), and CRegexp::x_Match().

◆ m_TemplateLine

string CRegexpTemplateTester::m_TemplateLine
private

Currently processing template line.

Definition at line 338 of file regexp_template_tester.hpp.

Referenced by CRegexpTemplateTester::x_Compare(), and CRegexpTemplateTester::x_GetLine().

◆ m_TemplateLineNum

SIZE_TYPE CRegexpTemplateTester::m_TemplateLineNum
private

◆ m_TemplateName

string CRegexpTemplateTester::m_TemplateName
private

◆ m_VarEnd

string CRegexpTemplateTester::m_VarEnd
private

◆ m_Vars

TVarMap CRegexpTemplateTester::m_Vars
private

◆ m_VarStart

string CRegexpTemplateTester::m_VarStart
private
Modified on Fri Sep 20 14:57:48 2024 by modify_doxy.py rev. 669887