NCBI C++ ToolKit
vector.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_TEXT___VECTOR__HPP
2 #define ALGO_TEXT___VECTOR__HPP
3 
4 /* $Id: vector.hpp 79823 2017-10-16 18:33:56Z dicuccio $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Mike DiCuccio
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbiobj.hpp>
36 
38 
39 template <class Key, class Score>
40 class CScoreVector;
41 
42 template <class Key, class Score>
43 class CRawScoreVector;
44 
45 
46 /////////////////////////////////////////////////////////////////////////////
47 ///
48 /// class CRawScoreVector stores its data in a (sorted) STL vector
49 /// this gives a better memory profile and ias easier to serialize
50 ///
51 
52 template <class Key, class Score>
53 class CRawScoreVector : public CObject
54 {
55 public:
56  typedef Key key_type;
57  typedef Score score_type;
58  typedef pair<Key, Score> TIdxScore;
59  typedef vector<TIdxScore> TVector;
61  typedef typename TVector::iterator iterator;
62  typedef typename TVector::const_iterator const_iterator;
63 
65  virtual ~CRawScoreVector() {}
66 
69 
72 
73  virtual void Swap(CRawScoreVector<Key, Score>& other);
74 
75  /// @name STL-ish functions
76  /// @{
77 
78  void clear();
79  bool empty() const;
80  size_t size() const;
81  void reserve(size_t size);
82  iterator begin();
83  iterator end();
84  const_iterator begin() const;
85  const_iterator end() const;
86  iterator find(const Key& key);
87  const_iterator find(const Key& key) const;
88  void insert(const value_type& val);
89  void insert(iterator ins_before,
90  const_iterator start,
91  const_iterator stop);
92  void erase(iterator it);
93 
94  /// @}
95 
96  ///
97  /// setup functions
98  ///
99  key_type GetId() const;
100  void SetId(key_type uid);
101  Score Get(Key idx) const;
102  void Set(Key idx, Score weight);
103  void Add(Key idx, Score weight = Score(1));
105 
106  void TrimLength(float trim_pct);
107  void TrimCount (size_t max_words);
108  void TrimThresh(Score min_score);
109 
110  /// force the vector to be sorted in order of descending score
111  void SortByScore();
112 
113  /// re-sort the vector by index.
114  /// This should normally never need to be done
115  void SortByIndex();
116 
117  ///
118  /// math functions
119  ///
120  float Length2() const;
121  float Length() const;
122  void Normalize();
123  void ProbNormalize();
124 
129 
130  TVector& Set() { return m_Data; }
131  const TVector& Get() const { return m_Data; }
132 
133 protected:
134  /// UID for this set
136 
137  /// the data for this document
139 };
140 
141 
142 /////////////////////////////////////////////////////////////////////////////
143 
144 
145 template <class Key, class Score>
146 class CScoreVector : public CObject
147 {
148 public:
149  typedef Key key_type;
150  typedef Score score_type;
152  typedef typename TVector::value_type value_type;
153  typedef typename TVector::iterator iterator;
155 
156  CScoreVector();
157  virtual ~CScoreVector() {}
162 
163  virtual void Swap(CScoreVector<Key, Score>& other);
164 
165  /// @name STL-ish functions
166  /// @{
167 
168  void clear();
169  bool empty() const;
170  size_t size() const;
171  iterator begin();
172  iterator end();
173  const_iterator begin() const;
174  const_iterator end() const;
175  iterator find(const Key& key);
176  const_iterator find(const Key& key) const;
177  pair<iterator, bool> insert(const value_type& val);
178  iterator insert(iterator hint, const value_type& val);
179  void erase(iterator it);
180  void erase(const key_type& v);
181 
182  template <typename OtherIterator>
183  void insert(OtherIterator it_begin, OtherIterator it_end)
184  {
185  m_Data.insert(it_begin, it_end);
186  }
187 
188  /// @}
189 
190  ///
191  /// setup functions
192  ///
193  key_type GetId() const;
194  void SetId(key_type uid);
195  size_t GetSize() const { return m_Data.size(); }
196  Score Get(Key idx) const;
197  void Set(Key idx, Score weight);
198  void Add(Key idx, Score weight = Score(1));
199 
200  void TrimLength(float trim_pct);
201  void TrimCount (size_t max_words);
202  void TrimThresh(Score min_score);
203 
204  void SubtractMissing(const CScoreVector<Key, Score>& other);
205  void AddScores (const CScoreVector<Key, Score>& other);
206 
207  ///
208  /// math functions
209  ///
210  float Length2() const;
211  float Length() const;
212  void Normalize();
213  void ProbNormalize();
214 
219 
220  TVector& Set() { return m_Data; }
221  const TVector& Get() const { return m_Data; }
222 
223 protected:
224  /// UID for this set
226 
227  /// the data for this document
229 };
230 
231 
232 
233 /// @name Scoring Interface
234 /// @{
235 
236 template <class ScoreVectorA, class ScoreVectorB>
237 inline
238 float ScoreCombined(const ScoreVectorA& query, const ScoreVectorB& vec);
239 
240 template <class ScoreVectorA, class ScoreVectorB>
241 inline
242 float ScoreCosine(const ScoreVectorA& query, const ScoreVectorB& vec);
243 
244 template <class ScoreVectorA, class ScoreVectorB>
245 inline
246 float ScoreDice(const ScoreVectorA& query, const ScoreVectorB& vec);
247 
248 template <class ScoreVectorA, class ScoreVectorB>
249 inline
250 float ScoreDistance(const ScoreVectorA& query, const ScoreVectorB& vec);
251 
252 template <class ScoreVectorA, class ScoreVectorB>
253 inline
254 float ScoreDot(const ScoreVectorA& query, const ScoreVectorB& vec);
255 
256 template <class ScoreVectorA, class ScoreVectorB>
257 inline
258 float ScoreJaccard(const ScoreVectorA& query, const ScoreVectorB& vec);
259 
260 template <class ScoreVectorA, class ScoreVectorB>
261 inline
262 float ScoreOverlap(const ScoreVectorA& query, const ScoreVectorB& vec);
263 
264 /// @}
265 
266 
267 
269 
270 
271 #include <algo/text/vector_impl.hpp>
272 
273 #endif // ALGO_TEXT___VECTOR__HPP
CObject –.
Definition: ncbiobj.hpp:180
class CRawScoreVector stores its data in a (sorted) STL vector this gives a better memory profile and...
Definition: vector.hpp:54
void TrimThresh(Score min_score)
vector< TIdxScore > TVector
Definition: vector.hpp:59
iterator begin()
TIdxScore value_type
Definition: vector.hpp:60
CRawScoreVector< Key, Score > & operator+=(const CRawScoreVector< Key, Score > &other)
void TrimLength(float trim_pct)
size_t size() const
virtual void Swap(CRawScoreVector< Key, Score > &other)
void SortByIndex()
re-sort the vector by index.
bool empty() const
TVector::const_iterator const_iterator
Definition: vector.hpp:62
key_type GetId() const
setup functions
void TrimCount(size_t max_words)
pair< Key, Score > TIdxScore
Definition: vector.hpp:58
const TVector & Get() const
Definition: vector.hpp:131
void insert(const value_type &val)
void SortByScore()
force the vector to be sorted in order of descending score
virtual ~CRawScoreVector()
Definition: vector.hpp:65
iterator find(const Key &key)
void SetId(key_type uid)
key_type m_Uid
UID for this set.
Definition: vector.hpp:135
void reserve(size_t size)
iterator end()
void erase(iterator it)
CRawScoreVector & operator=(const CScoreVector< Key, Score > &)
float Length2() const
math functions
TVector m_Data
the data for this document
Definition: vector.hpp:138
CRawScoreVector< Key, Score > & operator/=(Score val)
float Length() const
Score score_type
Definition: vector.hpp:57
void Add(Key idx, Score weight=Score(1))
TVector & Set()
Definition: vector.hpp:130
TVector::iterator iterator
Definition: vector.hpp:61
CRawScoreVector< Key, Score > & operator*=(Score val)
CRawScoreVector< Key, Score > & operator-=(const CRawScoreVector< Key, Score > &other)
void insert(OtherIterator it_begin, OtherIterator it_end)
Definition: vector.hpp:183
void TrimThresh(Score min_score)
CScoreVector< Key, Score > & operator-=(const CScoreVector< Key, Score > &other)
TVector::const_iterator const_iterator
Definition: vector.hpp:154
const TVector & Get() const
Definition: vector.hpp:221
virtual ~CScoreVector()
Definition: vector.hpp:157
iterator find(const Key &key)
CScoreVector< Key, Score > & operator/=(Score val)
void ProbNormalize()
map< Key, Score > TVector
Definition: vector.hpp:151
bool empty() const
iterator begin()
size_t GetSize() const
Definition: vector.hpp:195
void TrimLength(float trim_pct)
iterator end()
CScoreVector & operator=(const CScoreVector< Key, Score > &other)
void SubtractMissing(const CScoreVector< Key, Score > &other)
pair< iterator, bool > insert(const value_type &val)
CScoreVector< Key, Score > & operator*=(Score val)
float Length() const
CScoreVector< Key, Score > & operator+=(const CScoreVector< Key, Score > &other)
TVector m_Data
the data for this document
Definition: vector.hpp:228
float Length2() const
math functions
TVector & Set()
Definition: vector.hpp:220
void erase(iterator it)
void Normalize()
key_type m_Uid
UID for this set.
Definition: vector.hpp:225
void Add(Key idx, Score weight=Score(1))
size_t size() const
TVector::value_type value_type
Definition: vector.hpp:152
TVector::iterator iterator
Definition: vector.hpp:153
key_type GetId() const
setup functions
virtual void Swap(CScoreVector< Key, Score > &other)
void TrimCount(size_t max_words)
void AddScores(const CScoreVector< Key, Score > &other)
Key key_type
Definition: vector.hpp:149
void SetId(key_type uid)
Score score_type
Definition: vector.hpp:150
size_type size() const
Definition: map.hpp:148
container_type::const_iterator const_iterator
Definition: map.hpp:53
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
container_type::value_type value_type
Definition: map.hpp:52
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
n font weight
const struct ncbi::grid::netcache::search::fields::KEY key
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
static string query
float ScoreDistance(const ScoreVectorA &query, const ScoreVectorB &vec)
float ScoreJaccard(const ScoreVectorA &query, const ScoreVectorB &vec)
The Jaccard coefficient is defined as.
float ScoreCombined(const ScoreVectorA &query, const ScoreVectorB &vec)
float ScoreCosine(const ScoreVectorA &query, const ScoreVectorB &vec)
float ScoreOverlap(const ScoreVectorA &query, const ScoreVectorB &vec)
The overlap function is a dot product weighted by the *shortest* of each term.
float ScoreDot(const ScoreVectorA &query, const ScoreVectorB &vec)
float ScoreDice(const ScoreVectorA &query, const ScoreVectorB &vec)
The dice coefficient is defined as.
Modified on Mon Dec 11 02:42:20 2023 by modify_doxy.py rev. 669887