1 #ifndef UTIL___STRSEARCH__HPP
2 #define UTIL___STRSEARCH__HPP
76 ePrefixMatch = (1 << 0),
77 eSuffixMatch = (1 << 1),
78 eWholeWordMatch = (ePrefixMatch | eSuffixMatch)
92 unsigned int whole_word = eSubstrMatch);
109 const string& word_delimeters,
111 bool invert_delimiters =
false);
121 void SetWordDelimiters(
const string& word_delimeters,
122 bool invert_delimiters =
false);
129 void AddDelimiters(
const string& word_delimeters);
136 void AddDelimiters(
char ch);
140 void InitCommonDelimiters();
149 m_WholeWord = whole_word;
163 return Search(
text.c_str(), pos,
text.length());
190 bool IsWholeWord(
const char*
text,
194 void x_InitPattern(
void);
212 template <
typename MatchType>
333 template <
typename MatchType>
335 m_Primed(
false), m_CaseSensitive(case_sensitive)
342 template <
typename MatchType>
346 if ( !m_CaseSensitive ) {
352 word_len = (
int)temp.length();
355 for (
i = 0;
i < word_len; ++
i ) {
357 if (
next == eFailState )
break;
362 for ( ;
i < word_len; ++
i ) {
365 m_States.push_back(new_state);
375 template <
typename MatchType>
378 if ( m_Primed )
return;
386 template <
typename MatchType>
389 if (
size_t(
state) >= m_States.size() ) {
392 return &(m_States[
state]);
396 template <
typename MatchType>
403 template <
typename MatchType>
406 if (
size_t(
state) >= m_States.size() ) {
411 int initial = GetInitialState();
412 while ( (
next = GetNextState(m_States[
state],
letter)) == eFailState ) {
413 if (
state == initial ) {
424 template <
typename MatchType>
431 in_queue [qbeg] =
val;
433 for ( ; in_queue [q] != 0; q = in_queue [q])
continue;
440 template <
typename MatchType>
444 vector<int> state_queue(m_States.size());
453 m_States[GetInitialState()].GetTransitions() ) {
455 m_States[s].SetOnFailure(0);
456 QueueAdd(state_queue, qbeg, s);
459 while (state_queue [qbeg] != 0) {
460 r = state_queue [qbeg];
466 m_States[
r].GetTransitions() ) {
468 QueueAdd(state_queue, qbeg, s);
481 state = m_States[
r].GetOnFailure();
482 FindFail(
state, s, it->first);
488 template <
typename MatchType>
495 while ( (
next = GetNextState(
state, ch)) == eFailState) {
504 m_States[new_state].SetOnFailure(
next);
508 copy( m_States[
next].GetMatches().begin(),
509 m_States[
next].GetMatches().end(),
510 back_inserter(m_States[new_state].GetMatches()) );
514 template <
typename MatchType>
516 return m_States[
state].GetMatches();
520 template <
typename MatchType>
523 return m_States[
state].IsMatchFound();
527 template <
typename MatchType>
This implemetation uses the Boyer-Moore alg.
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void FindFail(int state, int new_state, char ch)
void AddWord(const string &word, const MatchType &match)
NStr::ECase m_CaseSensitive
vector< MatchType > & GetMatches(void)
vector< CState > m_States
void QueueAdd(vector< int > &in_queue, int qbeg, int val)
int GetNextState(int state, char letter) const
TMapCharInt m_Transitions
int GetOnFailure(void) const
const TMapCharInt & GetTransitions(void) const
void AddWord(const string &word)
const vector< MatchType > & GetMatches(void) const
void SetOnFailure(int state)
CTextFsa(bool case_sensitive=false)
const vector< MatchType > & GetMatches(int state) const
CTextFsm(bool case_sensitive=false)
void SetWordMatching(unsigned int whole_word=eWholeWordMatch)
Set word matching mode.
int GetNextState(char letter) const
bool IsMatchFound(void) const
bool IsMatchFound(int state) const
map< char, int > TMapCharInt
void AddMatch(const MatchType &match)
vector< MatchType > m_Matches
void AddTransition(char letter, int to)
CState * GetState(int state)
bool IsPrimed(void) const
int GetInitialState(void) const
vector< size_t > m_LastOccurrence
vector< unsigned char > m_WordDelimiters
static const int sm_AlphabetSize
int GetNextState(const CState &from, char letter) const
size_t Search(const string &text, size_t pos=0) const
Search for the pattern over text starting at position pos.
NCBI_NS_STD::string::size_type SIZE_TYPE
ECase
Which type of string comparison.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
@ eNocase
Case insensitive compare.
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
static Uint4 letter(char c)