1 #ifndef UTIL___MULTIPATTERN_SEARCH_IMPL__HPP
2 #define UTIL___MULTIPATTERN_SEARCH_IMPL__HPP
66 static bool IsWordCharacter(
unsigned char c) {
return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'Z') || (c >=
'a' && c <=
'z') || c ==
'_'; }
74 virtual operator bool() {
return true; }
77 virtual bool IsAssert()
const {
return false; }
78 virtual void Print(ostream&
out,
size_t off)
const = 0;
86 operator bool() {
return false; }
99 void Print(ostream&
out,
size_t off)
const;
110 void Print(ostream&
out,
size_t off)
const;
158 void Print(ostream&
out,
size_t off)
const;
169 void Print(ostream&
out,
size_t off)
const override
172 {
throw string(
"back reference"); }
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
246 typedef vector<unique_ptr<CRegExState>>
TStates;
247 typedef pair<size_t, CRegEx::EType>
TNode;
255 void Trans(
size_t x,
unsigned char c,
size_t y) {
m_States[x]->Trans(c, y); };
260 void Add(
const vector<unique_ptr<CRegEx>>& v);
261 void Merge(unique_ptr<CRegExFSA> fsa);
268 static void Push(
size_t x, vector<size_t>& v, vector<size_t>& h) {
270 for (
i = 0;
i < h.size(); ++
i) {
271 if (h[
i] == x)
return;
276 for (
size_t j = h.size() - 1; j >
i; --j) h[j] = h[j - 1];
279 static bool In(
size_t x, vector<size_t>& h) {
281 for (
i = 0;
i < h.size(); ++
i) {
282 if (h[
i] == x)
return true;
pair< size_t, CRegEx::EType > TNode
static void Push(size_t x, vector< size_t > &v, vector< size_t > &h)
void GenerateArrayMapData(ostream &out) const
void Trans(size_t x, unsigned char c, size_t y)
void Add(const CRegEx &rx)
void Emit(size_t x, size_t n)
void GenerateSourceCode(ostream &out) const
static bool In(size_t x, vector< size_t > &h)
void Create(const CRegEx &rx, size_t emit)
map< TNodeSet, size_t > TNodeSetMap
static size_t Collect(TScratch &VV, CRegEx::EType t, TStates &src, TStates &dest, TNodeSetMap &NM, TNodeSetList &NL, TNodeSet &NS, TScratch &HH)
void GenerateDotGraph(ostream &out) const
void Short(size_t x, size_t y)
static void Extend(size_t x, unsigned char c, TStates &src, TStates &dest, TNodeSetMap &NM, TNodeSetList &NL, TNodeSet &NS, TScratch &VV, TScratch &HH)
array< vector< size_t >, 4 > TScratch
size_t AddState(unsigned char t=CRegEx::eTypePass)
void Merge(unique_ptr< CRegExFSA > fsa)
vector< TNodeSet > TNodeSetList
vector< unique_ptr< CRegExState > > TStates
CRegEx(const string &s, CMultipatternSearch::TFlags f=0)
unique_ptr< CRegX > x_ParseAtom()
unique_ptr< CRegX > x_ParsePlain()
unique_ptr< CRegX > x_ParseConcat()
void x_ParseSquare(set< unsigned char > &t)
bool x_ParseRepeat(int &from, int &to, bool &lazy)
void x_Print(ostream &out) const
void x_ThrowError(const string msg, size_t pos, size_t len)
unsigned char x_ParseEscape()
CRegEx(const char *s, CMultipatternSearch::TFlags f=0)
unique_ptr< CRegX > x_ParseTerm()
int x_ParseHex(size_t len=0)
CMultipatternSearch::TFlags m_Flag
void x_ThrowUnexpectedCharacter()
int x_ParseDec(size_t len=0)
friend ostream & operator<<(ostream &, const CRegEx &)
void x_ThrowUnexpectedEndOfLine()
unique_ptr< CRegX > m_RegX
static bool IsWordCharacter(unsigned char c)
unique_ptr< CRegX > x_ParseSelect()
iterator_bool insert(const value_type &val)
std::ofstream out("events_result.xml")
main entry point for tests
#define HH(a, b, c, d, x, s)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Simultaneous search of multiple RegEx patterns in the input string.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
array< size_t, 256 > m_Trans
CRegExState(unsigned char t=CRegEx::eTypePass)
void Trans(unsigned char c, size_t n)
size_t operator()(const vector< size_t > &s)
virtual bool IsAssert() const
bool IsCaseInsensitive() const
void Print(ostream &out, size_t off) const
void SetCaseInsensitive()
CRegXAssert(EAssert a, unique_ptr< CRegX > &x)
void Render(CRegExFSA &fsa, size_t from, size_t to) const
unique_ptr< CRegX > m_RegX
CRegXBackRef(unsigned int n)
void Render(CRegExFSA &, size_t, size_t) const override
void Print(ostream &out, size_t off) const override
bool IsCaseInsensitive() const override
CRegXChar(char c, bool neg=false)
void Set(const set< unsigned char > &t)
CRegXChar(const set< unsigned char > &t, bool neg=false)
void Render(CRegExFSA &fsa, size_t from, size_t to) const
void Print(ostream &out, size_t off) const
set< unsigned char > m_Set
bool IsCaseInsensitive() const
void SetCaseInsensitive()
void Print(ostream &out, size_t off) const
void Render(CRegExFSA &fsa, size_t from, size_t to) const
bool IsCaseInsensitive() const
void SetCaseInsensitive()
vector< unique_ptr< CRegX > > m_Vec
CRegXConcat(vector< unique_ptr< CRegX > > &v)
bool IsCaseInsensitive() const
void Render(CRegExFSA &fsa, size_t from, size_t to) const
void Print(ostream &out, size_t off) const
void Print(ostream &out, size_t off) const
CRegXSelect(vector< unique_ptr< CRegX > > &v)
void Render(CRegExFSA &fsa, size_t from, size_t to) const
bool IsCaseInsensitive() const
vector< unique_ptr< CRegX > > m_Vec
void SetCaseInsensitive()
bool IsCaseInsensitive() const
unique_ptr< CRegX > m_RegX
void Print(ostream &out, size_t off) const
CRegXTerm(unique_ptr< CRegX > &x, unsigned int min, unsigned int max, bool lazy=false)
void SetCaseInsensitive()
void Render(CRegExFSA &fsa, size_t from, size_t to) const
static void DummyTrans(CRegExFSA &fsa, size_t x, unsigned char t)
virtual void SetCaseInsensitive()
static void PrintOffset(ostream &out, size_t off)
virtual bool IsCaseInsensitive() const =0
virtual void Render(CRegExFSA &fsa, size_t from, size_t to) const =0
virtual bool IsAssert() const
virtual void Print(ostream &out, size_t off) const =0