43 : m_MetaphoneKeySize(meta_key_size)
50 : m_MetaphoneKeySize(meta_key_size)
59 : m_MetaphoneKeySize(meta_key_size)
71 string::size_type pos = line.find_first_of(
"|");
72 if (pos == string::npos) {
76 metaphone = line.substr(0, pos);
77 word = line.substr(pos + 1, line.length() - pos - 1);
91 ostr << iter->first <<
"|" << *word_iter << endl;
122 size_t max_alts)
const
126 list<TReverseDict::const_iterator> keys;
134 ITERATE (list<TReverseDict::const_iterator>, key_iter, keys) {
142 *set_iter, (*key_iter)->first);
143 if (alt.
score <= 0) {
148 << metaphone <<
" <-> " << (*key_iter)->first
149 <<
", " << word <<
" <-> " << *set_iter
150 <<
" (" << alt.
score <<
")");
155 count += used ? 1 : 0;
159 << keys.size() <<
" keys searched "
160 <<
count <<
" keys used");
162 if ( !words.empty() ) {
164 TAltSet::const_iterator iter = words.begin();
165 alts.push_back(*iter);
166 TAltSet::const_iterator
prev = iter;
168 iter != words.end() &&
169 (alts.size() < max_alts ||
prev->score == iter->score);
171 alts.push_back(*iter);
175 alternates.insert(alternates.end(), alts.begin(), alts.end());
181 list<TReverseDict::const_iterator>& keys)
const
183 if ( !metaphone.length() ) {
187 const size_t max_meta_edit_dist = 1;
191 string::const_iterator iter = metaphone.begin();
192 string::const_iterator end = iter + max_meta_edit_dist + 1;
195 _TRACE(
"meta key: " << metaphone);
196 for ( ; iter != end; ++iter) {
197 string seed(1, *iter);
207 if (dist > max_meta_edit_dist) {
211 keys.push_back(lower);
215 _TRACE(
"exmained " <<
count <<
" keys, returning " << keys.size());
246 if ( iter->dict->CheckWord(word) ) {
257 size_t max_alts)
const
261 iter->dict->SuggestAlternates(word, alts, max_alts);
265 if (alts.size() > max_alts) {
266 TAlternates::iterator
prev = alts.begin() + max_alts;
267 TAlternates::iterator iter =
prev;
269 for ( ; iter != alts.end() && iter->score ==
prev->score; ++iter) {
272 alts.erase(iter, alts.end());
275 alternates.swap(alts);
293 size_t max_alts)
const
297 alternates = iter->second;
bool CheckWord(const string &word) const
Virtual requirement: check a word for existence in the dictionary.
void SuggestAlternates(const string &word, TAlternates &alternates, size_t max_alternates=20) const
Scan for a list of words similar to the indicated word.
CCachedDictionary(IDictionary &dict)
CRef< IDictionary > m_Dict
static size_t GetEditDistance(const string &str1, const string &str2, EDistanceMethod method=eEditDistance_Exact)
static int Score(const string &word1, const string &word2, size_t max_metaphone=eMaxMetaphone)
Compute a nearness score for two different words or phrases.
EDistanceMethod
Return the Levenshtein edit distance between two words.
@ eEditDistance_Similar
This method performs a simpler search, looking for the distance between similar words.
static void GetMetaphone(const string &in, string *out, size_t max_chars=eMaxMetaphone)
Compute the Metaphone key for a given word Metaphone is a more advanced algorithm than Soundex; inste...
TDictionaries m_Dictionaries
void RegisterDictionary(IDictionary &dict, int priority=ePriority_Default)
void SuggestAlternates(const string &word, TAlternates &alternates, size_t max_alternates=20) const
Scan for a list of words similar to the indicated word.
bool CheckWord(const string &word) const
Virtual requirement: check a word for existence in the dictionary.
vector< SDictionary > TDictionaries
void x_GetMetaphoneKeys(const string &metaphone, list< TReverseDict::const_iterator > &keys) const
void SuggestAlternates(const string &word, TAlternates &alternates, size_t max_alternates=20) const
Scan for a list of words similar to the indicated word.
void Read(CNcbiIstream &istr)
bool CheckWord(const string &word) const
Virtual requirement: check a word for existence in the dictionary.
CSimpleDictionary(size_t metaphone_key_size=5)
void Write(CNcbiOstream &ostr) const
void AddWord(const string &str)
Add a word to the dictionary.
TReverseDict m_ReverseDict
TForwardDict m_ForwardDict
const size_t m_MetaphoneKeySize
the size of our metaphone keys
class IDictionary defines an abstract interface for dictionaries.
vector< SAlternate > TAlternates
virtual bool CheckWord(const string &word) const =0
Virtual requirement: check a word for existence in the dictionary.
virtual void SuggestAlternates(const string &word, TAlternates &alternates, size_t max_alternates=20) const =0
Scan for a list of words similar to the indicated word.
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
parent_type::const_iterator const_iterator
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
constexpr auto sort(_Init &&init)
SAlternate wraps a word and its score.
functor for sorting alternates list by score
bool operator()(const CMultiDictionary::SDictionary &d1, const CMultiDictionary::SDictionary &d2) const