NCBI C++ ToolKit
farmhash.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 // Copyright (c) 2014 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // FarmHash, by Geoff Pike
22 
23 //
24 // http://code.google.com/p/farmhash/
25 //
26 // This file provides a few functions for hashing strings and other
27 // data. All of them are high-quality functions in the sense that
28 // they do well on standard tests such as Austin Appleby's SMHasher.
29 // They're also fast. FarmHash is the successor to CityHash.
30 //
31 // Functions in the FarmHash family are not suitable for cryptography.
32 //
33 // WARNING: This code has been only lightly tested on big-endian platforms!
34 // It is known to work well on little-endian platforms that have a small penalty
35 // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
36 // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
37 // bug reports are welcome.
38 //
39 // By the way, for some hash functions, given strings a and b, the hash
40 // of a+b is easily derived from the hashes of a and b. This property
41 // doesn't hold for any hash functions in this file.
42 
43 #ifndef FARM_HASH_H_
44 #define FARM_HASH_H_
45 
46 #include <assert.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49 #include <string.h> // for memcpy and memset
50 #include <utility>
51 
52 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
53 #define NAMESPACE_FOR_HASH_FUNCTIONS util
54 #endif
55 
57 
58 #if defined(FARMHASH_UINT128_T_DEFINED)
59 #if defined(__clang__)
60 #if !defined(uint128_t)
61 #define uint128_t __uint128_t
62 #endif
63 #endif
64 inline uint64_t Uint128Low64(const uint128_t x) {
65  return static_cast<uint64_t>(x);
66 }
67 inline uint64_t Uint128High64(const uint128_t x) {
68  return static_cast<uint64_t>(x >> 64);
69 }
70 inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
71  return lo + (((uint128_t)hi) << 64);
72 }
73 #else
74 typedef std::pair<uint64_t, uint64_t> uint128_t;
75 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
76 inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
77 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
78 #endif
79 
80 
81 // BASIC STRING HASHING
82 
83 // Hash function for a byte array.
84 // May change from time to time, may differ on different platforms, may differ
85 // depending on NDEBUG.
86 size_t Hash(const char* s, size_t len);
87 
88 // Hash function for a byte array. Most useful in 32-bit binaries.
89 // May change from time to time, may differ on different platforms, may differ
90 // depending on NDEBUG.
91 uint32_t Hash32(const char* s, size_t len);
92 
93 // Hash function for a byte array. For convenience, a 32-bit seed is also
94 // hashed into the result.
95 // May change from time to time, may differ on different platforms, may differ
96 // depending on NDEBUG.
97 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
98 
99 // Hash function for a byte array.
100 // May change from time to time, may differ on different platforms, may differ
101 // depending on NDEBUG.
102 uint64_t Hash64(const char* s, size_t len);
103 
104 // Hash function for a byte array. For convenience, a 64-bit seed is also
105 // hashed into the result.
106 // May change from time to time, may differ on different platforms, may differ
107 // depending on NDEBUG.
108 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
109 
110 // Hash function for a byte array. For convenience, two seeds are also
111 // hashed into the result.
112 // May change from time to time, may differ on different platforms, may differ
113 // depending on NDEBUG.
114 uint64_t Hash64WithSeeds(const char* s, size_t len,
115  uint64_t seed0, uint64_t seed1);
116 
117 // Hash function for a byte array.
118 // May change from time to time, may differ on different platforms, may differ
119 // depending on NDEBUG.
120 uint128_t Hash128(const char* s, size_t len);
121 
122 // Hash function for a byte array. For convenience, a 128-bit seed is also
123 // hashed into the result.
124 // May change from time to time, may differ on different platforms, may differ
125 // depending on NDEBUG.
126 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
127 
128 // BASIC NON-STRING HASHING
129 
130 // Hash 128 input bits down to 64 bits of output.
131 // This is intended to be a reasonably good hash function.
132 // May change from time to time, may differ on different platforms, may differ
133 // depending on NDEBUG.
135  // Murmur-inspired hashing.
136  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
137  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
138  a ^= (a >> 47);
139  uint64_t b = (Uint128High64(x) ^ a) * kMul;
140  b ^= (b >> 47);
141  b *= kMul;
142  return b;
143 }
144 
145 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
146 
147 // Fingerprint function for a byte array. Most useful in 32-bit binaries.
148 uint32_t Fingerprint32(const char* s, size_t len);
149 
150 // Fingerprint function for a byte array.
151 uint64_t Fingerprint64(const char* s, size_t len);
152 
153 // Fingerprint function for a byte array.
154 uint128_t Fingerprint128(const char* s, size_t len);
155 
156 // This is intended to be a good fingerprinting primitive.
157 // See below for more overloads.
159  // Murmur-inspired hashing.
160  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
161  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
162  a ^= (a >> 47);
163  uint64_t b = (Uint128High64(x) ^ a) * kMul;
164  b ^= (b >> 44);
165  b *= kMul;
166  b ^= (b >> 41);
167  b *= kMul;
168  return b;
169 }
170 
171 // This is intended to be a good fingerprinting primitive.
173  // Murmur-inspired hashing.
174  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
175  uint64_t b = x * kMul;
176  b ^= (b >> 44);
177  b *= kMul;
178  b ^= (b >> 41);
179  b *= kMul;
180  return b;
181 }
182 
183 #ifndef FARMHASH_NO_CXX_STRING
184 
185 // Convenience functions to hash or fingerprint C++ strings.
186 // These require that Str::data() return a pointer to the first char
187 // (as a const char*) and that Str::length() return the string's length;
188 // they work with std::string, for example.
189 
190 // Hash function for a byte array.
191 // May change from time to time, may differ on different platforms, may differ
192 // depending on NDEBUG.
193 template <typename Str>
194 inline size_t Hash(const Str& s) {
195  assert(sizeof(s[0]) == 1);
196  return Hash(s.data(), s.length());
197 }
198 
199 // Hash function for a byte array. Most useful in 32-bit binaries.
200 // May change from time to time, may differ on different platforms, may differ
201 // depending on NDEBUG.
202 template <typename Str>
203 inline uint32_t Hash32(const Str& s) {
204  assert(sizeof(s[0]) == 1);
205  return Hash32(s.data(), s.length());
206 }
207 
208 // Hash function for a byte array. For convenience, a 32-bit seed is also
209 // hashed into the result.
210 // May change from time to time, may differ on different platforms, may differ
211 // depending on NDEBUG.
212 template <typename Str>
213 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
214  assert(sizeof(s[0]) == 1);
215  return Hash32WithSeed(s.data(), s.length(), seed);
216 }
217 
218 // Hash 128 input bits down to 64 bits of output.
219 // Hash function for a byte array.
220 // May change from time to time, may differ on different platforms, may differ
221 // depending on NDEBUG.
222 template <typename Str>
223 inline uint64_t Hash64(const Str& s) {
224  assert(sizeof(s[0]) == 1);
225  return Hash64(s.data(), s.length());
226 }
227 
228 // Hash function for a byte array. For convenience, a 64-bit seed is also
229 // hashed into the result.
230 // May change from time to time, may differ on different platforms, may differ
231 // depending on NDEBUG.
232 template <typename Str>
233 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
234  assert(sizeof(s[0]) == 1);
235  return Hash64WithSeed(s.data(), s.length(), seed);
236 }
237 
238 // Hash function for a byte array. For convenience, two seeds are also
239 // hashed into the result.
240 // May change from time to time, may differ on different platforms, may differ
241 // depending on NDEBUG.
242 template <typename Str>
243 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
244  assert(sizeof(s[0]) == 1);
245  return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
246 }
247 
248 // Hash function for a byte array.
249 // May change from time to time, may differ on different platforms, may differ
250 // depending on NDEBUG.
251 template <typename Str>
252 inline uint128_t Hash128(const Str& s) {
253  assert(sizeof(s[0]) == 1);
254  return Hash128(s.data(), s.length());
255 }
256 
257 // Hash function for a byte array. For convenience, a 128-bit seed is also
258 // hashed into the result.
259 // May change from time to time, may differ on different platforms, may differ
260 // depending on NDEBUG.
261 template <typename Str>
262 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
263  assert(sizeof(s[0]) == 1);
264  return Hash128(s.data(), s.length(), seed);
265 }
266 
267 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
268 
269 // Fingerprint function for a byte array. Most useful in 32-bit binaries.
270 template <typename Str>
271 inline uint32_t Fingerprint32(const Str& s) {
272  assert(sizeof(s[0]) == 1);
273  return Fingerprint32(s.data(), s.length());
274 }
275 
276 // Fingerprint 128 input bits down to 64 bits of output.
277 // Fingerprint function for a byte array.
278 template <typename Str>
279 inline uint64_t Fingerprint64(const Str& s) {
280  assert(sizeof(s[0]) == 1);
281  return Fingerprint64(s.data(), s.length());
282 }
283 
284 // Fingerprint function for a byte array.
285 template <typename Str>
286 inline uint128_t Fingerprint128(const Str& s) {
287  assert(sizeof(s[0]) == 1);
288  return Fingerprint128(s.data(), s.length());
289 }
290 
291 #endif
292 
293 } // namespace NAMESPACE_FOR_HASH_FUNCTIONS
294 
295 /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
296 #if defined(__BIG_ENDIAN__)
297  #if !defined(FARMHASH_BIG_ENDIAN)
298  #define FARMHASH_BIG_ENDIAN
299  #endif
300 #elif defined(__LITTLE_ENDIAN__)
301  // nothing for little-endian
302 #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
303  // nothing for little-endian
304 #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
305  #if !defined(FARMHASH_BIG_ENDIAN)
306  #define FARMHASH_BIG_ENDIAN
307  #endif
308 #elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
309  #include <endian.h> // libc6-dev, GLIBC
310  #if BYTE_ORDER == BIG_ENDIAN
311  #if !defined(FARMHASH_BIG_ENDIAN)
312  #define FARMHASH_BIG_ENDIAN
313  #endif
314  #endif
315 #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__)
316  #include <sys/endian.h>
317  #if BYTE_ORDER == BIG_ENDIAN
318  #if !defined(FARMHASH_BIG_ENDIAN)
319  #define FARMHASH_BIG_ENDIAN
320  #endif
321  #endif
322 #elif defined(_WIN32)
323  // Windows is (currently) little-endian
324 #else
325  #error "Unable to determine endianness!"
326 #endif /* __BIG_ENDIAN__ */
327 
328 #endif // FARM_HASH_H_
uint64 Uint128High64(const uint128 &x)
Definition: city.h:75
uint64 Uint128Low64(const uint128 &x)
Definition: city.h:74
uint64 Hash128to64(const uint128 &x)
Definition: city.h:101
#define NAMESPACE_FOR_HASH_FUNCTIONS
Definition: farmhash.h:53
int len
uint32_t Hash32(const Str &s)
Definition: farmhash.h:203
uint128_t Fingerprint128(const Str &s)
Definition: farmhash.h:286
uint64_t Fingerprint64(const Str &s)
Definition: farmhash.h:279
size_t Hash(const Str &s)
Definition: farmhash.h:194
uint64_t Hash64WithSeed(const Str &s, uint64_t seed)
Definition: farmhash.h:233
uint32_t Hash32WithSeed(const Str &s, uint32_t seed)
Definition: farmhash.h:213
uint128_t Hash128WithSeed(const Str &s, uint128_t seed)
Definition: farmhash.h:262
uint64_t Hash64WithSeeds(const Str &s, uint64_t seed0, uint64_t seed1)
Definition: farmhash.h:243
uint128_t Uint128(uint64_t lo, uint64_t hi)
Definition: farmhash.h:77
uint32_t Fingerprint32(const Str &s)
Definition: farmhash.h:271
uint64_t Fingerprint(uint64_t x)
Definition: farmhash.h:172
uint128_t Hash128(const Str &s)
Definition: farmhash.h:252
std::pair< uint64_t, uint64_t > uint128_t
Definition: farmhash.h:74
uint64_t Hash64(const Str &s)
Definition: farmhash.h:223
unsigned int a
Definition: ncbi_localip.c:102
#define assert(x)
Definition: srv_diag.hpp:58
unsigned int uint32_t
Definition: stdint.h:126
unsigned __int64 uint64_t
Definition: stdint.h:136
static int seed
Definition: test_table.cpp:132
Modified on Thu Nov 30 04:54:21 2023 by modify_doxy.py rev. 669887