NCBI C++ ToolKit
checksum.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: checksum.cpp 91523 2020-11-06 20:26:34Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko, Vladimir Ivanov
27  *
28  * File Description: Checksum and hash calculation classes.
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbifile.hpp>
35 #include <util/checksum.hpp>
36 
37 // Use builtin versions of CityHash and FarmHash libraries,
38 // that compiles as separate files to avoid name clashing.
39 #include "checksum/cityhash/city.h"
40 #include "checksum/farmhash/config.h" // need for farmhash.h
42 // And include MurmurHash directly
43 #include "checksum/murmurhash/MurmurHash2.cxx"
44 #include "checksum/murmurhash/MurmurHash3.cxx"
45 
46 
47 #define USE_CRC32C_INTEL // try to use Intel CRC32C instructions
48 
49 #ifdef USE_CRC32C_INTEL
50 # undef USE_CRC32C_INTEL // we'll define it again where available
51 # if defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ICC) \
52  || defined(NCBI_COMPILER_ANY_CLANG)
53 # if defined(__x86_64__) || defined(__i386__)
54 # ifdef HAVE_CPUID_H
55 # include <cpuid.h>
56 # endif
57 # define USE_CRC32C_INTEL
58 # endif
59 # if defined(__x86_64__)
60 # define HAVE_CRC32C_64
61 # endif
62 # elif defined(NCBI_COMPILER_MSVC)
63 # if defined(_M_X64) || defined(_M_IX86)
64 # include <intrin.h>
65 # define USE_CRC32C_INTEL
66 # endif
67 # if defined(_M_X64)
68 # define HAVE_CRC32C_64
69 # endif
70 # endif
71 #endif
72 
73 
75 
76 
77 static const size_t kCRC32Size = 256;
79 
80 // Defines
81 
82 #define TABLES_COUNT 8
83 #define NCBI_USE_PRECOMPILED_CRC32_TABLES 1
84 
85 // sx_Start must begin with "/* O" (see ValidChecksumLine() in checksum.hpp)
86 static const char sx_Start[] = "/* Original file checksum: ";
87 static const char sx_End[] = " */";
88 static const char sx_LineCount[] = "lines: ";
89 static const char sx_CharCount[] = "chars: ";
90 
91 // Forward declarations
92 
93 #ifdef NCBI_USE_PRECOMPILED_CRC32_TABLES
94  static inline void s_InitTableCRC32Forward() {}
95  static inline void s_InitTableCRC32Reverse() {}
96  static inline void s_InitTableCRC32CReverse() {}
97 #else
98  static void s_InitTableCRC32Forward();
99  static void s_InitTableCRC32Reverse();
100  static void s_InitTableCRC32CReverse();
101 #endif //NCBI_USE_PRECOMPILED_CRC32_TABLES
102 
103 #ifdef USE_CRC32C_INTEL
104  static bool s_IsCRC32CIntelEnabled(void);
105 #endif
106 
107 
108 
110  : m_Method(eNone)
111 {
112  x_Reset(method);
113 }
114 
115 
117 {
118  x_Free();
119 }
120 
121 
123  : m_Method(other.m_Method),
124  m_CharCount(other.m_CharCount)
125 {
126  if ( m_Method == eMD5 ) {
127  m_Value.md5 = new CMD5(*other.m_Value.md5);
128  } else {
129  m_Value.v64 = other.m_Value.v64;
130  }
131 }
132 
133 
135 {
136  if (&other == this){
137  return *this;
138  }
139  x_Free();
140 
141  m_Method = other.m_Method;
142  m_CharCount = other.m_CharCount;
143 
144  if ( m_Method == eMD5 ) {
145  m_Value.md5 = new CMD5(*other.m_Value.md5);
146  } else {
147  m_Value.v64 = other.m_Value.v64;
148  }
149  return *this;
150 }
151 
152 
153 string CChecksumBase::GetResultHex(void) const
154 {
155  switch (m_Method ) {
156  case eMD5:
157  return m_Value.md5->GetHexSum();
158  default:
159  if (GetBits() == 64) {
160  return NStr::NumericToString(GetResult64(), 0, 16);
161  }
162  if (GetBits() == 32) {
163  return NStr::NumericToString(GetResult32(), 0, 16);
164  }
165  _ASSERT(0);
166  return kEmptyStr;
167  }
168 }
169 
170 
172 {
173  x_Free();
174 
175  m_Method = method;
176  m_Value.v64 = 0;
177  m_CharCount = 0;
178 
179  switch ( method ) {
180  case eCRC32:
181  case eCRC32CKSUM:
183  break;
184  case eCRC32ZIP:
185  case eCRC32INSD:
186  m_Value.v32 = ~0;
188  break;
189  case eCRC32C:
190  m_Value.v32 = ~0;
191 #ifdef USE_CRC32C_INTEL
192  if ( s_IsCRC32CIntelEnabled() ) {
193  break;
194  }
195 #endif
197  break;
198  case eAdler32:
199  m_Value.v32 = 1;
200  break;
201  case eMD5:
202  m_Value.md5 = new CMD5;
203  break;
204  case eCityHash32:
205  case eCityHash64:
206  case eFarmHash32:
207  case eFarmHash64:
208  case eMurmurHash2_32:
209  case eMurmurHash2_64:
210  case eMurmurHash3_32:
211  break;
212  default:
213  _ASSERT(0);
214  }
215 }
216 
217 
219  : CChecksumBase((EMethodDef)method)
220 {
221 }
222 
223 
224 CHash::CHash(const CHash& other)
225  : CChecksumBase(other)
226 {
227 }
228 
229 
231 {
233  return *this;
234 }
235 
236 
237 /// @sa CHash::SetSeed()
239 
240 
242 {
243  m_Seed = seed;
244 }
245 
246 
248 {
249  CHash h(method);
250  h.Calculate(str);
251  hash = h.GetResult32();
252 }
253 
254 
256 {
257  CHash h(method);
258  h.Calculate(str);
259  hash = h.GetResult64();
260 }
261 
262 
263 void CHash::Calculate(const char* str, size_t len, EMethod method, Uint4& hash)
264 {
265  CHash h(method);
266  h.Calculate(str, len);
267  hash = h.GetResult32();
268 }
269 
270 
271 void CHash::Calculate(const char* str, size_t len, EMethod method, Uint8& hash)
272 {
273  CHash h(method);
274  h.Calculate(str, len);
275  hash = h.GetResult64();
276 }
277 
278 
280  : CChecksumBase((EMethodDef)method),
281  m_LineCount(0)
282 {
283 }
284 
285 
287  : CChecksumBase(other),
288  m_LineCount(other.m_LineCount)
289 {
290 }
291 
292 
294 {
296  m_LineCount = other.m_LineCount;
297  return *this;
298 }
299 
300 
302 {
303  if (!out.good()) {
304  return out;
305  }
306  out << sx_Start
307  << sx_LineCount << m_LineCount << ", "
308  << sx_CharCount << m_CharCount << ", ";
310  return out << sx_End << '\n';
311 }
312 
313 
314 bool CChecksum::ValidChecksumLineLong(const char* line, size_t len) const
315 {
318  string buffer_str = CNcbiOstrstreamToString(buffer);
319  if ( buffer_str.size() != len + 1 ) { // account for '\n'
320  return false;
321  }
322  return memcmp(line, buffer_str.data(), len) == 0;
323 }
324 
325 
327 {
328  if ( GetMethod() == eMD5 ) {
329  out << m_Value.md5->GetHexSum();
330  } else {
331  IOS_BASE::fmtflags flags = out.setf(IOS_BASE::hex, IOS_BASE::basefield);
332  out << setprecision(8);
333  out << GetChecksum();
334  out.flags(flags);
335  }
336  return out;
337 }
338 
339 
341 {
342  switch ( GetMethod() ) {
343  case eMD5:
344  out << "MD5: ";
345  break;
346  case eAdler32:
347  out << "Adler32: ";
348  break;
349  case eCRC32:
350  case eCRC32ZIP:
351  case eCRC32INSD:
352  case eCRC32CKSUM:
353  case eCRC32C:
354  out << "CRC32: ";
355  break;
356  default:
357  _ASSERT(0);
358  return out;
359  }
360  WriteHexSum(out);
361  return out;
362 }
363 
364 
366 {
367  char eol = '\n';
368  x_Update(&eol, 1);
369  ++m_LineCount;
370 }
371 
372 
373 void CChecksum::AddFile(const string& file_path)
374 {
375  CFileIO f;
376  try {
377  f.Open(file_path, CFileIO::eOpen, CFileIO::eRead);
378  CChecksum tmp(*this);
379  size_t n;
380  char buf[1024 * 8];
381  while ((n = f.Read(buf, sizeof(buf))) > 0) {
382  tmp.AddChars(buf, n);
383  }
384  f.Close();
385  *this = tmp;
386  }
387  catch (CFileException& e) {
388  f.Close();
389  NCBI_RETHROW(e, CChecksumException, eFileIO, "Error add checksum for file: " + file_path);
390  throw;
391  }
392 }
393 
394 
396 {
397  if ( is.eof() ) {
398  return;
399  }
400  if ( !is.good() ) {
401  NCBI_THROW(CChecksumException, eStreamIO, "Input stream is not good()");
402  return;
403  }
404  CChecksum tmp(*this);
405 
406  while ( !is.eof() ) {
407  char buf[1024 * 8];
408  is.read(buf, sizeof(buf));
409  size_t n = (size_t)is.gcount();
410  if (n) {
411  tmp.AddChars(buf, n);
412  } else {
413  if (is.fail() && !is.eof()) {
414  NCBI_THROW(CChecksumException, eStreamIO, "Error reading from input stream");
415  return;
416  }
417  }
418  }
419  *this = tmp;
420 }
421 
422 
423 // @deprecated
424 CChecksum& ComputeFileChecksum_deprecated(const string& path, CChecksum& checksum)
425 {
426  CNcbiIfstream is(path.c_str(), IOS_BASE::in | IOS_BASE::binary);
427  if ( !is.is_open() ) {
428  return checksum;
429  }
430  while ( !is.eof() ) {
431  char buf[1024*8];
432  is.read(buf, sizeof(buf));
433  size_t count = (size_t)is.gcount();
434  if ( count ) {
435  checksum.AddChars(buf, count);
436  }
437  }
438  is.close();
439  return checksum;
440 }
441 
442 // @deprecated
444 {
445  CChecksum checksum(method);
446  return ComputeFileChecksum_deprecated(path, checksum);
447 }
448 
449 // @deprecated
450 CChecksum& ComputeFileChecksum(const string& path, CChecksum& checksum)
451 {
452  return ComputeFileChecksum_deprecated(path, checksum);
453 }
454 
455 // @deprecated
456 Uint4 ComputeFileCRC32(const string& path)
457 {
458  CChecksum checksum(CChecksum::eCRC32);
459  return ComputeFileChecksum_deprecated(path, checksum).GetChecksum();
460 }
461 
462 
464 {
468 }
469 
470 
471 template<size_t kCRC32Tables>
472 static inline
473 void s_PrintTable(CNcbiOstream& out, const char* name,
474  const TCRC32Table (&table)[kCRC32Tables])
475 {
476  const size_t kLineSize = 4;
477  out << "static const TCRC32Table " << name << "["<<kCRC32Tables<<"] = {";
478  for ( size_t k = 0; k < kCRC32Tables; ++k ) {
479  if ( k ) {
480  out << ',';
481  }
482  out << "\n {";
483  for ( size_t i = 0; i < kCRC32Size; ++i ) {
484  if ( i != 0 ) {
485  out << ',';
486  }
487  if ( i % kLineSize == 0 ) {
488  out << "\n ";
489  } else {
490  out << ' ';
491  }
492  out << "0x" << hex << setw(8) << setfill('0') << table[k][i];
493  }
494  out << "\n }";
495  }
496  out << dec << "\n};\n" << endl;
497 }
498 
499 
500 #ifdef NCBI_USE_PRECOMPILED_CRC32_TABLES
501 
502 # include "crc32tables.c"
503 
504 #else
505 
509 
510 /////////////////////////////////////////////////////////////////////////////
511 // Implementation of CRC32 algorithm.
512 /////////////////////////////////////////////////////////////////////////////
513 //
514 // This code assumes that an unsigned is at least 32 bits wide and
515 // that the predefined type char occupies one 8-bit byte of storage.
516 
517 // The polynomial used is
518 // x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
519 #define CRC32_POLYNOMIAL 0x04c11db7
520 // CRC32C (Castagnoli) polynomial is
521 // x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+x^8+x^6+x^0
522 #define CRC32C_POLYNOMIAL 0x1edc6f41
523 
524 // CRC32 is linear meaning that for any texts t1 & t2:
525 // CRC32[t1 XOR t2] = CRC32[t1] XOR CRC32[t2].
526 // This allows to speed up calculation of CRC32 tables by first
527 // calculating CRC32 for bytes with only one bit set,
528 // and then xoring all CRC32 of lowest bit and CRC32 of remaining bits
529 // to get CRC32 of whole number.
530 // First part is done by calling s_CalcByteCRC32Forward or
531 // s_CalcByteCRC32Reverse for each bit.
532 // Second pass is universal for any CRC32 and is performed by function
533 // s_FillMultiBitsCRC().
534 
535 
536 static inline
537 Uint4 s_CalcByteCRC32Forward(size_t byte, Uint4 polynomial)
538 {
539  Uint4 byteCRC = byte << 24;
540  for ( int j = 0; j < 8; ++j ) {
541  if ( byteCRC & 0x80000000U )
542  byteCRC = (byteCRC << 1) ^ polynomial;
543  else
544  byteCRC = (byteCRC << 1);
545  }
546  return byteCRC;
547 }
548 
549 
550 static inline
551 Uint4 s_CalcByteCRC32Reverse(size_t byte, Uint4 reversed_polynomial)
552 {
553  Uint4 byteCRC = byte;
554  for ( int j = 0; j < 8; ++j ) {
555  if ( byteCRC & 1 )
556  byteCRC = (byteCRC >> 1) ^ reversed_polynomial;
557  else
558  byteCRC = (byteCRC >> 1);
559  }
560  return byteCRC;
561 }
562 
563 
564 static inline
565 void s_FillMultiBitsCRC(Uint4* table, size_t size)
566 {
567  // Preconditions:
568  // Entries at one-bit indexes (1<<k), are calculated.
569  for ( size_t i = 1; i < size; ++i ) { // order is significant
570  // Split bits of i into two parts:
571  // lobit contains lowest bit set, or zero if no bits are set,
572  // hibits contains all other bits.
573  size_t hibits = i & (i-1);
574  size_t lobit = i & ~(i-1);
575  // Because of:
576  // 1. i = lobit ^ hibits
577  // 2. lobit <= i
578  // 3. hibits <= i
579  // we can calculate entry at i by xoring entries at lobit and hibits
580  // There are 3 possible cases:
581  // A. i = 0
582  // In this case lobit = 0 and hibits = 0.
583  // As a result table[0] will become 0, which is correct for CRC.
584  // B. i = 1<<k
585  // In this case lobit = i, and hibits = 0.
586  // table[i] will become table[i] ^ table[0].
587  // Because table[0] is 0 (see case A above),
588  // table[i] will not change and will preserve precalculated value
589  // (see Preconditions above).
590  // C. all other i
591  // In this case lobit < i, and hibits < i
592  // It means the entries at lobit and hibits are calculated already
593  // because of the order of iteration by i.
594  table[i] = table[lobit] ^ table[hibits];
595  }
596 }
597 
598 
599 template<size_t kCRC32Tables>
600 static inline
601 void s_InitTableCRC32Forward(TCRC32Table (&table)[kCRC32Tables],
602  Uint4 polynomial)
603 {
604  // check the last element to make sure we minimize chances of races
605  // in MT programs.
606  if ( table[kCRC32Tables-1][kCRC32Size-1] ) {
607  return;
608  }
609  // Initialize CRC32 for bytes with only one bit set
610  for ( size_t i = 1; i < kCRC32Size; i <<= 1 ) {
611  table[0][i] = s_CalcByteCRC32Forward(i, polynomial);
612  }
613  // Fill the rest of the main table
614  s_FillMultiBitsCRC(table[0], kCRC32Size);
615  // Fill secondary tables
616  for ( size_t k = 1; k < kCRC32Tables; ++k ) {
617  for ( size_t i = 0; i < kCRC32Size; ++i ) {
618  Uint4 checksum = table[k-1][i];
619  checksum = (checksum << 8) ^ table[0][checksum >> 24];
620  table[k][i] = checksum;
621  }
622  }
623 }
624 
625 
626 template<size_t kCRC32Tables>
627 static inline
628 void s_InitTableCRC32Reverse(TCRC32Table (&table)[kCRC32Tables],
629  Uint4 polynomial)
630 {
631  Uint4 reversed_polynomial = 0;
632  for ( size_t i = 0; i < 32; ++i ) {
633  reversed_polynomial = (reversed_polynomial << 1)|(polynomial & 1);
634  polynomial >>= 1;
635  }
636  // check the last element to make sure we minimize chances of races
637  // in MT programs.
638  if ( table[kCRC32Tables-1][kCRC32Size-1] ) {
639  return;
640  }
641  // Initialize CRC32 for bytes with only one bit set
642  for ( size_t i = 1; i < kCRC32Size; i <<= 1 ) {
643  table[0][i] = s_CalcByteCRC32Reverse(i, reversed_polynomial);
644  }
645  // Fill the rest of the table
646  s_FillMultiBitsCRC(table[0], kCRC32Size);
647  // Fill secondary tables
648  for ( size_t k = 1; k < kCRC32Tables; ++k ) {
649  for ( size_t i = 0; i < kCRC32Size; ++i ) {
650  Uint4 checksum = table[k-1][i];
651  checksum = (checksum >> 8) ^ table[0][checksum & 0xff];
652  table[k][i] = checksum;
653  }
654  }
655 }
656 
657 
658 void s_InitTableCRC32Forward(void)
659 {
660  s_InitTableCRC32Forward(s_CRC32TableForward, CRC32_POLYNOMIAL);
661 }
662 
663 
664 void s_InitTableCRC32Reverse(void)
665 {
666  s_InitTableCRC32Reverse(s_CRC32TableReverse, CRC32_POLYNOMIAL);
667 }
668 
669 
670 void s_InitTableCRC32CReverse(void)
671 {
672  s_InitTableCRC32Reverse(s_CRC32CTableReverse, CRC32C_POLYNOMIAL);
673 }
674 
675 
676 #endif //NCBI_USE_PRECOMPILED_CRC32_TABLES
677 
678 
679 #define s_UpdateCRC32Forward_1(crc, str, table) \
680  do { \
681  Uint4 v = *(const Uint1*)(str) ^ ((crc) >> 24); \
682  (crc) = ((crc) << 8) ^ (table)[0][v]; \
683  } while(0)
684 
685 #define s_UpdateCRC32Forward_2(crc, str, table) \
686  do { \
687  Uint4 v = *(const Uint2*)(str); \
688  /* index bytes are in wrong order */ \
689  (crc) = ((crc) << 16) ^ \
690  (table)[0][(((crc)>>16)^(v>>8)) & 0xff] ^ \
691  (table)[1][(((crc)>>24)^(v )) & 0xff]; \
692  } while(0)
693 
694 #define s_UpdateCRC32Forward_4(crc, str, table) \
695  do { \
696  Uint4 v = *(const Uint4*)(str); \
697  /* index bytes are in wrong order */ \
698  (crc) = \
699  (table)[0][(((crc) )^(v>>24)) & 0xff] ^ \
700  (table)[1][(((crc)>> 8)^(v>>16)) & 0xff] ^ \
701  (table)[2][(((crc)>>16)^(v>> 8)) & 0xff] ^ \
702  (table)[3][(((crc)>>24)^(v )) & 0xff]; \
703  } while(0)
704 
705 #define s_UpdateCRC32Forward_8(crc, str, table) \
706  do { \
707  Uint4 v0 = ((const Uint4*)(str))[0]; \
708  Uint4 v1 = ((const Uint4*)(str))[1]; \
709  /* index bytes are in wrong order */ \
710  (crc) = \
711  (table)[0][( (v1>>24)) ] ^ \
712  (table)[1][( (v1>>16)) & 0xff] ^ \
713  (table)[2][( (v1>> 8)) & 0xff] ^ \
714  (table)[3][( (v1 )) & 0xff] ^ \
715  (table)[4][(((crc) )^(v0>>24)) & 0xff] ^ \
716  (table)[5][(((crc)>> 8)^(v0>>16)) & 0xff] ^ \
717  (table)[6][(((crc)>>16)^(v0>> 8)) & 0xff] ^ \
718  (table)[7][(((crc)>>24)^(v0 )) & 0xff]; \
719  } while(0)
720 
721 
722 #define s_UpdateCRC32Reverse_1(crc, str, table) \
723  do { \
724  Uint4 v = *(const Uint1*)(str); \
725  v ^= (crc); \
726  (crc) = ((crc) >> 8) ^ \
727  (table)[0][v & 0xff]; \
728  } while(0)
729 
730 #define s_UpdateCRC32Reverse_2(crc, str, table) \
731  do { \
732  Uint4 v = *(const Uint2*)(str); \
733  v ^= (crc); \
734  (crc) = ((crc) >> 16) ^ \
735  (table)[1][(v ) & 0xff] ^ \
736  (table)[0][(v>>8) & 0xff]; \
737  } while(0)
738 
739 #define s_UpdateCRC32Reverse_4(crc, str, table) \
740  do { \
741  Uint4 v = *(const Uint4*)(str); \
742  v ^= (crc); \
743  (crc) = \
744  (table)[3][(v ) & 0xff] ^ \
745  (table)[2][(v>> 8) & 0xff] ^ \
746  (table)[1][(v>>16) & 0xff] ^ \
747  (table)[0][(v>>24) ]; \
748  } while(0)
749 
750 #define s_UpdateCRC32Reverse_8(crc, str, table) \
751  do { \
752  Uint4 v0 = ((const Uint4*)(str))[0]; \
753  Uint4 v1 = ((const Uint4*)(str))[1]; \
754  v0 ^= (crc); \
755  (crc) = \
756  (table)[7][(v0 ) & 0xff] ^ \
757  (table)[6][(v0>> 8) & 0xff] ^ \
758  (table)[5][(v0>>16) & 0xff] ^ \
759  (table)[4][(v0>>24) ] ^ \
760  (table)[3][(v1 ) & 0xff] ^ \
761  (table)[2][(v1>> 8) & 0xff] ^ \
762  (table)[1][(v1>>16) & 0xff] ^ \
763  (table)[0][(v1>>24) ]; \
764  } while(0)
765 
766 
767 template<size_t kCRC32Tables>
768 static inline
769 Uint4 s_UpdateCRC32Forward(Uint4 checksum, const char *str, size_t count,
770  const TCRC32Table (&table)[kCRC32Tables])
771 {
772 #if TABLES_COUNT >= 2
773  if ( (uintptr_t(str)&1) && count >= 1 ) {
774  s_UpdateCRC32Forward_1(checksum, str, table);
775  count -= 1;
776  str += 1;
777  }
778 # if TABLES_COUNT >= 4
779  if ( (uintptr_t(str)&2) && count >= 2 ) {
780  s_UpdateCRC32Forward_2(checksum, str, table);
781  count -= 2;
782  str += 2;
783  }
784 # if TABLES_COUNT >= 8
785  while ( count >= 8 ) {
786  s_UpdateCRC32Forward_8(checksum, str, table);
787  count -= 8;
788  str += 8;
789  }
790  if ( count >= 4 ) {
791  s_UpdateCRC32Forward_4(checksum, str, table);
792  count -= 4;
793  str += 4;
794  }
795 # else // < 8
796  while ( count >= 4 ) {
797  s_UpdateCRC32Forward_4(checksum, str, table);
798  count -= 4;
799  str += 4;
800  }
801 # endif // done 4
802  if ( count >= 2 ) {
803  s_UpdateCRC32Forward_2(checksum, str, table);
804  count -= 2;
805  str += 2;
806  }
807 # else // < 4
808  while ( count >= 2 ) {
809  s_UpdateCRC32Forward_2(checksum, str, table);
810  count -= 2;
811  str += 2;
812  }
813 # endif // done 2
814  if ( count ) {
815  s_UpdateCRC32Forward_1(checksum, str, table);
816  }
817 #else // < 2
818  while ( count ) {
819  s_UpdateCRC32Forward_1(checksum, str, table);
820  count -= 1;
821  str += 1;
822  }
823 #endif // done 1
824  return checksum;
825 }
826 
827 
828 template<size_t kCRC32Tables>
829 static inline
830 Uint4 s_UpdateCRC32Reverse(Uint4 checksum, const char *str, size_t count,
831  const TCRC32Table (&table)[kCRC32Tables])
832 {
833 #if TABLES_COUNT >= 2
834  if ( (uintptr_t(str)&1) && count >= 1 ) {
835  s_UpdateCRC32Reverse_1(checksum, str, table);
836  count -= 1;
837  str += 1;
838  }
839 # if TABLES_COUNT >= 4
840  if ( (uintptr_t(str)&2) && count >= 2 ) {
841  s_UpdateCRC32Reverse_2(checksum, str, table);
842  count -= 2;
843  str += 2;
844  }
845 # if TABLES_COUNT >= 8
846  while ( count >= 8 ) {
847  s_UpdateCRC32Reverse_8(checksum, str, table);
848  count -= 8;
849  str += 8;
850  }
851  if ( count >= 4 ) {
852  s_UpdateCRC32Reverse_4(checksum, str, table);
853  count -= 4;
854  str += 4;
855  }
856 # else // < 8
857  while ( count >= 4 ) {
858  s_UpdateCRC32Reverse_4(checksum, str, table);
859  count -= 4;
860  str += 4;
861  }
862 # endif // done 4
863  if ( count >= 2 ) {
864  s_UpdateCRC32Reverse_2(checksum, str, table);
865  count -= 2;
866  str += 2;
867  }
868 # else // < 4
869  while ( count >= 2 ) {
870  s_UpdateCRC32Reverse_2(checksum, str, table);
871  count -= 2;
872  str += 2;
873  }
874 # endif // done 2
875  if ( count ) {
876  s_UpdateCRC32Reverse_1(checksum, str, table);
877  }
878 #else // < 2
879  while ( count ) {
880  s_UpdateCRC32Reverse_1(checksum, str, table);
881  count -= 1;
882  str += 1;
883  }
884 #endif // done 1
885  return checksum;
886 }
887 
888 
889 #ifdef USE_CRC32C_INTEL
890 
891 #if !defined(NCBI_COMPILER_MSVC) && !defined(bit_SSE4_2)
892 // our Darwin GCC doesn't have cpuid.h :(
893 // we have to reimplement cpuid functionality, luckily it's not too big
894 static inline
895 void call_cpuid(unsigned level,
896  unsigned* a, unsigned* b, unsigned* c, unsigned* d)
897 {
898 #if defined(__i386__) && defined(__PIC__)
899  // ebx may be the PIC register and some old GCC versions fail to take care
900  __asm__("xchgl %%ebx, %k1;"
901  "cpuid;"
902  "xchgl %%ebx, %k1;"
903  : "=a" (*a), "=&r" (*b), "=c" (*c), "=d" (*d)
904  : "0" (level));
905 #elif defined(__x86_64__) && defined(__PIC__)
906  // rbx may be the PIC register and some old GCC versions fail to take care
907  __asm__("xchgq %%rbx, %q1;"
908  "cpuid;"
909  "xchgq %%rbx, %q1;"
910  : "=a" (*a), "=&r" (*b), "=c" (*c), "=d" (*d)
911  : "0" (level));
912 #else
913  __asm__("cpuid"
914  : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
915  : "0" (level));
916 #endif
917 }
918 static inline
919 unsigned get_cpuid_max(unsigned extended)
920 {
921  unsigned a, b, c, d;
922 #ifdef __i386__
923  // on 32-bit processors we test special flag to check CPUID support
924  const unsigned HAS_CPUID_FLAG = 0x00200000;
925  __asm__(
926  "pushfl;"
927  "pushfl;"
928  "popl %0;"
929  "movl %0, %1;"
930  "xorl %2, %0;"
931  "pushl %0;"
932  "popfl;"
933  "pushfl;"
934  "popl %0;"
935  "popfl;"
936  : "=&r" (a), "=&r" (b)
937  : "i" (HAS_CPUID_FLAG));
938  if ( !((a ^ b) & HAS_CPUID_FLAG) )
939  return 0;
940 #endif
941  call_cpuid(extended, &a, &b, &c, &d);
942  return a;
943 }
944 static inline
945 bool get_cpuid(unsigned level,
946  unsigned *a, unsigned *b, unsigned *c, unsigned *d)
947 {
948  if ( get_cpuid_max(level & 0x80000000U) < level) {
949  return false;
950  }
951  call_cpuid (level, a, b, c, d);
952  return true;
953 }
954 # define __get_cpuid get_cpuid
955 # define bit_SSE4_2 (1<<20)
956 #endif
957 
958 bool s_IsCRC32CIntelEnabled(void)
959 {
960  static volatile bool enabled, initialized;
961  if ( !initialized ) {
962 #ifdef NCBI_COMPILER_MSVC
963  int a[4];
964  __cpuid(a, 0);
965  if ( a[0] >= 1 ) {
966  __cpuid(a, 1);
967  enabled = (a[2] & (1<<20)) != 0;
968  }
969 #else
970  unsigned a, b, c, d;
971  enabled = __get_cpuid(1, &a, &b, &c, &d) && (c & bit_SSE4_2);
972 #endif
973  initialized = true;
974  }
975  return enabled;
976 }
977 
978 static inline
979 Uint4 s_CRC32C(Uint4 checksum, const char* data)
980 {
981 #ifdef NCBI_COMPILER_MSVC
982  return _mm_crc32_u8(checksum, *data);
983 #else
984  // We cannot rely on _mm_crc32_u8() as it's available only when
985  // -msse4_2 compiler option is specified.
986  __asm__("crc32b %1, %0"
987  : "+r" (checksum)
988  : "m" (*data));
989  return checksum;
990 #endif
991 }
992 
993 static inline
994 Uint4 s_CRC32C(Uint4 checksum, const Uint2* data)
995 {
996 #ifdef NCBI_COMPILER_MSVC
997  return _mm_crc32_u16(checksum, *data);
998 #else
999  // We cannot rely on _mm_crc32_u16() as it's available only when
1000  // -msse4_2 compiler option is specified.
1001  __asm__("crc32w %1, %0"
1002  : "+r" (checksum)
1003  : "m" (*data));
1004  return checksum;
1005 #endif
1006 }
1007 
1008 static inline
1009 Uint4 s_CRC32C(Uint4 checksum, const Uint4* data)
1010 {
1011 #ifdef NCBI_COMPILER_MSVC
1012  return _mm_crc32_u32(checksum, *data);
1013 #else
1014  // We cannot rely on _mm_crc32_u32() as it's available only when
1015  // -msse4_2 compiler option is specified.
1016  __asm__("crc32l %1, %0"
1017  : "+r" (checksum)
1018  : "m" (*data));
1019  return checksum;
1020 #endif
1021 }
1022 
1023 #ifdef HAVE_CRC32C_64
1024 static inline
1025 Uint8 s_CRC32C(Uint8 checksum, const Uint8* data)
1026 {
1027 #ifdef NCBI_COMPILER_MSVC
1028  return _mm_crc32_u64(checksum, *data);
1029 #else
1030  // We cannot rely on _mm_crc32_u64() as it's available only when
1031  // -msse4_2 compiler option is specified.
1032  __asm__("crc32q %1, %0"
1033  : "+r" (checksum)
1034  : "m" (*data));
1035 #endif
1036  return checksum;
1037 }
1038 #endif // HAVE_CRC32C_64
1039 
1040 static inline
1041 Uint4 s_UpdateCRC32CIntel(Uint4 checksum, const char *str, size_t count)
1042 {
1043  // Newer Intel CPUs with SSE 4.2 have instructions for CRC32C polynomial.
1044  // Since byte order is little-endian on Intel there is no need to bswap.
1045 
1046  // Align buffer
1047  if ( (uintptr_t(str)&1) && count >= 1 ) {
1048  checksum = s_CRC32C(checksum, str);
1049  count -= 1;
1050  str += 1;
1051  }
1052  if ( (uintptr_t(str)&2) && count >= 2 ) {
1053  checksum = s_CRC32C(checksum, (const Uint2*)str);
1054  count -= 2;
1055  str += 2;
1056  }
1057 #ifdef HAVE_CRC32C_64
1058  // Main loop processes by 8 bytes
1059  if ( count >= 4 ) {
1060  if ( (uintptr_t(str)&4) ) {
1061  checksum = s_CRC32C(checksum, (const Uint4*)str);
1062  count -= 4;
1063  str += 4;
1064  }
1065  Uint8 crc = checksum;
1066  while ( count >= 8 ) {
1067  crc = s_CRC32C(crc, (const Uint8*)str);
1068  count -= 8;
1069  str += 8;
1070  }
1071  checksum = Uint4(crc);
1072  if ( count >= 4 ) {
1073  checksum = s_CRC32C(checksum, (const Uint4*)str);
1074  count -= 4;
1075  str += 4;
1076  }
1077  }
1078 #else
1079  // Main loop processes by 4 bytes
1080  while ( count >= 4 ) {
1081  checksum = s_CRC32C(checksum, (const Uint4*)str);
1082  count -= 4;
1083  str += 4;
1084  }
1085 #endif
1086  // Process remainder smaller than 8 bytes
1087  if ( count >= 2 ) {
1088  checksum = s_CRC32C(checksum, (const Uint2*)str);
1089  count -= 2;
1090  str += 2;
1091  }
1092  if ( count >= 1 ) {
1093  checksum = s_CRC32C(checksum, str);
1094  //last count and str updates aren't necessary
1095  //count -= 1;
1096  //str += 1;
1097  }
1098  return checksum;
1099 }
1100 
1101 #endif //USE_CRC32C_INTEL
1102 
1103 
1104 static inline
1105 Uint4 s_UpdateAdler32(Uint4 sum, const char* data, size_t len)
1106 {
1107  const Uint4 MOD_ADLER = 65521;
1108 
1109 #define ADJUST_ADLER(a) a = (a & 0xffff) + (a >> 16) * (0x10000-MOD_ADLER)
1110 #define FINALIZE_ADLER(a) if (a >= MOD_ADLER) a -= MOD_ADLER
1111 
1112  Uint4 a = sum & 0xffff, b = sum >> 16;
1113 
1114  const size_t kMaxLen = 5548u;
1115  while (len) {
1116  if ( len >= kMaxLen ) {
1117  len -= kMaxLen;
1118  for ( size_t i = 0; i < kMaxLen/4; ++i ) {
1119  b += a += Uint1(data[0]);
1120  b += a += Uint1(data[1]);
1121  b += a += Uint1(data[2]);
1122  b += a += Uint1(data[3]);
1123  data += 4;
1124  }
1125  } else {
1126  for ( size_t i = len >> 2; i; --i ) {
1127  b += a += Uint1(data[0]);
1128  b += a += Uint1(data[1]);
1129  b += a += Uint1(data[2]);
1130  b += a += Uint1(data[3]);
1131  data += 4;
1132  }
1133  for ( len &= 3; len; --len ) {
1134  b += a += Uint1(data[0]);
1135  data += 1;
1136  }
1137  }
1138  ADJUST_ADLER(a);
1139  ADJUST_ADLER(b);
1140  }
1141  // It can be shown that a <= 0x1013a here, so a single subtract will do.
1142  FINALIZE_ADLER(a);
1143  // It can be shown that b can reach 0xffef1 here.
1144  ADJUST_ADLER(b);
1145  FINALIZE_ADLER(b);
1146  return (b << 16) | a;
1147 }
1148 
1149 
1151 {
1152  InitTables();
1153  s_PrintTable(out, "s_CRC32TableForward", s_CRC32TableForward);
1154  s_PrintTable(out, "s_CRC32TableReverse", s_CRC32TableReverse);
1155  s_PrintTable(out, "s_CRC32CTableReverse", s_CRC32CTableReverse);
1156 }
1157 
1158 
1159 void CChecksumBase::x_Update(const char* str, size_t count)
1160 {
1161  switch ( m_Method ) {
1162  case eCRC32:
1163  case eCRC32CKSUM:
1165  break;
1166  case eCRC32ZIP:
1167  case eCRC32INSD:
1169  break;
1170  case eCRC32C:
1171 #ifdef USE_CRC32C_INTEL
1172  if ( s_IsCRC32CIntelEnabled() ) {
1173  m_Value.v32 = s_UpdateCRC32CIntel(m_Value.v32, str, count);
1174  break;
1175  }
1176 #endif
1178  break;
1179  case eAdler32:
1180  m_Value.v32 = s_UpdateAdler32(m_Value.v32, str, count);
1181  break;
1182  case eMD5:
1183  m_Value.md5->Update(str, count);
1184  break;
1185  case eCityHash32:
1186  _ASSERT(!m_CharCount);
1187  m_Value.v32 = CityHash32(str, count);
1188  break;
1189  case eCityHash64:
1190  _ASSERT(!m_CharCount);
1191  m_Value.v64 = CityHash64(str, count);
1192  break;
1193  case eFarmHash32:
1194  _ASSERT(!m_CharCount);
1195  m_Value.v32 = farmhash::Hash32(str, count);
1196  break;
1197  case eFarmHash64:
1198  _ASSERT(!m_CharCount);
1199  m_Value.v64 = farmhash::Hash64(str, count);
1200  break;
1201  case eMurmurHash2_32:
1202  {{
1203  _ASSERT(!m_CharCount);
1204  int n = count > kMax_Int ? kMax_Int : (int)count;
1205  m_Value.v32 = MurmurHash2(str, n, (uint32_t)m_Seed);
1206  }}
1207  break;
1208  case eMurmurHash2_64:
1209  {{
1210  _ASSERT(!m_CharCount);
1211  int n = count > kMax_Int ? kMax_Int : (int)count;
1212  m_Value.v64 = MurmurHash64A(str, n, m_Seed);
1213  }}
1214  break;
1215  case eMurmurHash3_32:
1216  {{
1217  _ASSERT(!m_CharCount);
1218  int n = count > kMax_Int ? kMax_Int : (int)count;
1220  }}
1221  break;
1222  default:
1223  _ASSERT(0);
1224  break;
1225  }
1226 }
1227 
1228 
1229 //////////////////////////////////////////////////////////////////////////////
1230 //
1231 // NHash
1232 //
1233 
1235 {
1236  return ::CityHash32(str.data(), str.length());
1237 }
1238 
1239 Uint4 NHash::CityHash32(const char* str, size_t len)
1240 {
1242 }
1243 
1245 {
1246  return ::CityHash64(str.data(), str.length());
1247 }
1248 
1249 Uint8 NHash::CityHash64(const char* str, size_t len)
1250 {
1252 }
1253 
1255 {
1256  return farmhash::Hash32(str.data(), str.length());
1257 
1258 }
1259 
1260 Uint4 NHash::FarmHash32(const char* str, size_t len)
1261 {
1262  return farmhash::Hash32(str, len);
1263 }
1264 
1266 {
1267  return farmhash::Hash64(str.data(), str.length());
1268 }
1269 
1270 Uint8 NHash::FarmHash64(const char* str, size_t len)
1271 {
1272  return farmhash::Hash64(str, len);
1273 }
1274 
1276 {
1277  _ASSERT(str.length() <= kMax_Int);
1278  return ::MurmurHash2(str.data(), (int)str.length(), seed);
1279 }
1280 
1281 Uint4 NHash::MurmurHash2(const char* str, size_t len, Uint4 seed)
1282 {
1283  _ASSERT(len <= kMax_Int);
1284  return ::MurmurHash2(str, (int)len, seed);
1285 }
1286 
1288 {
1289  _ASSERT(str.length() <= kMax_Int);
1290  return ::MurmurHash64A(str.data(), (int)str.length(), seed);
1291 }
1292 
1294 {
1295  _ASSERT(len <= kMax_Int);
1297 }
1298 
1300 {
1301  _ASSERT(str.length() <= kMax_Int);
1302  Uint4 result;
1303  ::MurmurHash3_x86_32(str.data(), (int)str.length(), seed, &result);
1304  return result;
1305 }
1306 
1308 {
1309  _ASSERT(len <= kMax_Int);
1310  Uint4 result;
1312  return result;
1313 }
1314 
1315 
1316 
1317 //////////////////////////////////////////////////////////////////////////////
1318 //
1319 // CChecksumException
1320 //
1321 
1323 {
1324  switch (GetErrCode()) {
1325  case eStreamIO: return "eStreamError";
1326  case eFileIO: return "eFileError";
1327  default: return CException::GetErrCodeString();
1328  }
1329 }
1330 
1331 
1332 /////////////////////////////////////////////////////////////////////////////
1333 //
1334 // CChecksumStreamWriter
1335 //
1336 
1338  : m_Checksum(method)
1339 {
1340 }
1341 
1342 
1344 {
1345 }
1346 
1347 
1348 ERW_Result CChecksumStreamWriter::Write(const void* buf, size_t count,
1349  size_t* bytes_written)
1350 {
1351  m_Checksum.AddChars((const char*)buf, count);
1352  if (bytes_written) {
1353  *bytes_written = count;
1354  }
1355  return eRW_Success;
1356 }
1357 
1358 
1360 {
1361  return eRW_Success;
1362 }
1363 
1364 
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
void MurmurHash3_x86_32(const void *key, int len, uint32_t seed, void *out)
@ eNone
None specified.
Definition: blast_def.h:326
static void s_PrintTable(CNcbiOstream &out, const char *name, const TCRC32Table(&table)[kCRC32Tables])
Definition: checksum.cpp:473
static const char sx_CharCount[]
Definition: checksum.cpp:89
#define s_UpdateCRC32Reverse_4(crc, str, table)
Definition: checksum.cpp:739
#define s_UpdateCRC32Reverse_2(crc, str, table)
Definition: checksum.cpp:730
CChecksum & ComputeFileChecksum_deprecated(const string &path, CChecksum &checksum)
Definition: checksum.cpp:424
static Uint4 s_UpdateAdler32(Uint4 sum, const char *data, size_t len)
Definition: checksum.cpp:1105
static void s_InitTableCRC32Reverse()
Definition: checksum.cpp:95
#define s_UpdateCRC32Forward_1(crc, str, table)
Definition: checksum.cpp:679
static const size_t kCRC32Size
Definition: checksum.cpp:77
static const char sx_End[]
Definition: checksum.cpp:87
#define s_UpdateCRC32Forward_4(crc, str, table)
Definition: checksum.cpp:694
#define s_UpdateCRC32Forward_2(crc, str, table)
Definition: checksum.cpp:685
static const char sx_LineCount[]
Definition: checksum.cpp:88
#define FINALIZE_ADLER(a)
#define ADJUST_ADLER(a)
static Uint4 s_UpdateCRC32Forward(Uint4 checksum, const char *str, size_t count, const TCRC32Table(&table)[kCRC32Tables])
Definition: checksum.cpp:769
#define TABLES_COUNT
Definition: checksum.cpp:82
static void s_InitTableCRC32Forward()
Definition: checksum.cpp:94
Uint4 TCRC32Table[kCRC32Size]
Definition: checksum.cpp:78
#define s_UpdateCRC32Reverse_1(crc, str, table)
Definition: checksum.cpp:722
static const char sx_Start[]
Definition: checksum.cpp:86
#define s_UpdateCRC32Forward_8(crc, str, table)
Definition: checksum.cpp:705
static Uint4 s_UpdateCRC32Reverse(Uint4 checksum, const char *str, size_t count, const TCRC32Table(&table)[kCRC32Tables])
Definition: checksum.cpp:830
#define s_UpdateCRC32Reverse_8(crc, str, table)
Definition: checksum.cpp:750
static void s_InitTableCRC32CReverse()
Definition: checksum.cpp:96
Checksum and hash calculation classes.
uint32 CityHash32(const char *buf, size_t len)
uint64 CityHash64(const char *buf, size_t len)
CChecksumBase – Base class with auxiliary methods for CHash and CChecksum.
Definition: checksum.hpp:55
CChecksumException –.
Definition: checksum.hpp:430
CChecksum – Checksum calculator.
Definition: checksum.hpp:302
CFileException –.
Definition: ncbifile.hpp:136
Class for support low level input/output for files.
Definition: ncbifile.hpp:3475
CHash – Hash calculator.
Definition: checksum.hpp:195
Definition: md5.hpp:46
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const TCRC32Table s_CRC32CTableReverse[TABLES_COUNT]
Definition: crc32tables.c:1067
static const TCRC32Table s_CRC32TableReverse[TABLES_COUNT]
Definition: crc32tables.c:534
static const TCRC32Table s_CRC32TableForward[TABLES_COUNT]
Definition: crc32tables.c:1
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
Uint4 uint32_t
static Uint4 MurmurHash3_x86_32(const CTempString str, Uint4 seed=0)
MurmurHash3 (32-bit version only)
Definition: checksum.cpp:1299
size_t GetBits(void) const
Return size of checksum/hash in bits (32, 64).
Definition: checksum.hpp:496
size_t m_CharCount
Number of processed chars.
Definition: checksum.hpp:145
CNcbiOstream & WriteChecksum(CNcbiOstream &out) const
Write checksum calculation results into output stream.
Definition: checksum.cpp:301
CHash(EMethod method=eDefault)
Default constructor.
Definition: checksum.cpp:218
EMethodDef m_Method
Current method.
Definition: checksum.hpp:141
Uint8 v64
Used to store 64-bit results.
Definition: checksum.hpp:155
void AddFile(const string &file_path)
Update checksum with the file data.
Definition: checksum.cpp:373
CChecksum(EMethod method=eDefault)
Default constructor.
Definition: checksum.cpp:279
union CChecksumBase::@930 m_Value
Checksum/Hash computation result.
static void InitTables(void)
Initialize static tables used in CRC32 calculation.
Definition: checksum.cpp:463
void x_Free(void)
Cleanup (used in destructor and assignment operator).
Definition: checksum.hpp:561
virtual const char * GetErrCodeString(void) const override
Translate from an error code value to its string representation.
Definition: checksum.cpp:1322
static Uint4 MurmurHash2(const CTempString str, Uint4 seed=0)
MurmurHash2.
Definition: checksum.cpp:1275
static void SetSeed(Uint8 seed)
Unique seed used by some hash methods.
Definition: checksum.cpp:241
CMD5 * md5
Used for MD5 calculation.
Definition: checksum.hpp:156
virtual ERW_Result Flush(void)
Flush pending data (if any) down to the output device.
Definition: checksum.cpp:1359
string GetResultHex(void) const
Return string with checksum/hash in hexadecimal form.
Definition: checksum.cpp:153
CNcbiOstream & WriteHexSum(CNcbiOstream &out) const
Definition: checksum.cpp:326
CChecksumStreamWriter(CChecksum::EMethod method)
Construct object to compute checksum for written data.
Definition: checksum.cpp:1337
EMethod
Method used to compute hash.
Definition: checksum.hpp:199
static void PrintTables(CNcbiOstream &out)
Print C++ code for CRC32 tables for direct inclusion into library.
Definition: checksum.cpp:1150
bool ValidChecksumLineLong(const char *line, size_t len) const
Check for checksum line.
Definition: checksum.cpp:314
static Uint4 CityHash32(const CTempString str)
CityHash.
Definition: checksum.cpp:1234
static Uint8 CityHash64(const CTempString str)
Definition: checksum.cpp:1244
virtual ERW_Result Write(const void *buf, size_t count, size_t *bytes_written=0)
Virtual methods from IWriter.
Definition: checksum.cpp:1348
virtual ~CChecksumStreamWriter(void)
Definition: checksum.cpp:1343
Uint4 GetChecksum(void) const
Return calculated checksum.
Definition: checksum.hpp:341
EMethodDef
All supported methods for CHash and CCheksum.
Definition: checksum.hpp:58
CChecksum m_Checksum
Checksum calculator.
Definition: checksum.hpp:468
void x_Update(const char *str, size_t len)
Update current control sum with data provided.
Definition: checksum.cpp:1159
static Uint8 FarmHash64(const CTempString str)
Definition: checksum.cpp:1265
CChecksumBase & operator=(const CChecksumBase &other)
Assignment operator.
Definition: checksum.cpp:134
void NextLine(void)
Definition: checksum.cpp:365
CChecksum ComputeFileChecksum(const string &path, CChecksum::EMethod method)
Compute checksum for the given file.
Definition: checksum.cpp:443
static Uint8 MurmurHash64A(const CTempString str, Uint8 seed=0)
Definition: checksum.cpp:1287
CNcbiOstream & WriteChecksumData(CNcbiOstream &out) const
Definition: checksum.cpp:340
Uint8 GetResult64(void) const
Return calculated result.
Definition: checksum.hpp:547
size_t m_LineCount
Number of processed lines.
Definition: checksum.hpp:390
CChecksum & operator=(const CChecksum &other)
Assignment operator.
Definition: checksum.cpp:293
CChecksumBase(EMethodDef method)
Default constructor.
Definition: checksum.cpp:109
void AddChars(const char *str, size_t len)
Update current control sum with data provided.
Definition: checksum.hpp:602
Uint4 GetResult32(void) const
Return calculated result.
Definition: checksum.hpp:514
~CChecksumBase()
Destructor.
Definition: checksum.cpp:116
EMethod GetMethod(void) const
Get current method used to compute checksum.
Definition: checksum.hpp:508
Uint4 ComputeFileCRC32(const string &path)
Compute CRC32 checksum for the given file.
Definition: checksum.cpp:456
void x_Reset(EMethodDef method)
Reset the object to prepare it to the next computation using selected method.
Definition: checksum.cpp:171
static Uint4 FarmHash32(const CTempString str)
FarmHash.
Definition: checksum.cpp:1254
EMethod
Method used to compute control sum.
Definition: checksum.hpp:306
void AddStream(CNcbiIstream &is)
Update checksum with the stream data.
Definition: checksum.cpp:395
static Uint8 m_Seed
Unique seed used by some hash methods.
Definition: checksum.hpp:150
CHash & operator=(const CHash &other)
Assignment operator.
Definition: checksum.cpp:230
void Calculate(const CTempString str)
Calculate hash.
Definition: checksum.hpp:570
@ eCityHash32
CityHash, 32-bit result.
Definition: checksum.hpp:80
@ eFarmHash32
FarmHash, 32-bit result.
Definition: checksum.hpp:87
@ eCRC32INSD
Inverted CRC32ZIP.
Definition: checksum.hpp:65
@ eMurmurHash2_64
MurmurHash2 for x64, 64-bit result.
Definition: checksum.hpp:96
@ eAdler32
A bit faster than CRC32ZIP, not recommended for small data sizes.
Definition: checksum.hpp:74
@ eCRC32
32-bit Cyclic Redundancy Check.
Definition: checksum.hpp:60
@ eMurmurHash3_32
MurmurHash3 for x86, 32-bit result.
Definition: checksum.hpp:97
@ eCityHash64
CityHash, 64-bit result.
Definition: checksum.hpp:81
@ eMurmurHash2_32
MurmurHash2 for x86, 32-bit result.
Definition: checksum.hpp:95
@ eCRC32ZIP
Exact zip CRC32.
Definition: checksum.hpp:62
@ eCRC32CKSUM
CRC32 implemented by cksum utility.
Definition: checksum.hpp:67
@ eFarmHash64
FarmHash, 64-bit result.
Definition: checksum.hpp:88
@ eMD5
Message Digest version 5.
Definition: checksum.hpp:75
@ eCRC32C
CRC32C (Castagnoli).
Definition: checksum.hpp:70
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
TErrCode GetErrCode(void) const
Definition: ncbiexpt.hpp:1493
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)
Generic macro to re-throw an exception.
Definition: ncbiexpt.hpp:737
@ eRead
File can be read.
Definition: ncbifile.hpp:3435
@ eOpen
Open an existing file, or create a new one.
Definition: ncbifile.hpp:3425
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
unsigned int uintptr_t
Definition: ncbitype.h:197
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_Int
Definition: ncbi_limits.h:184
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
ERW_Result
Result codes for I/O operations.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
@ eRW_Success
Everything is okay, I/O completed.
#define kEmptyStr
Definition: ncbistr.hpp:123
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
char * buf
int i
yy_size_t n
int len
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static void byte(MDB_val *v)
Definition: mdb_dump.c:81
const struct ncbi::grid::netcache::search::fields::SIZE size
uint32_t Hash32(const char *s, size_t len)
uint64_t Hash64(const char *s, size_t len)
unsigned int a
Definition: ncbi_localip.c:102
#define MOD_ADLER
Definition: ncbi_util.c:1169
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static uint32_t _mm_crc32_u8(uint32_t, uint8_t)
Definition: sse2neon.h:8398
static uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
Definition: sse2neon.h:8382
static uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
Definition: sse2neon.h:8366
static uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
Definition: sse2neon.h:8350
Definition: _hash_fun.h:40
#define _ASSERT
static int seed
Definition: test_table.cpp:132
else result
Definition: token2.c:20
Modified on Sun Apr 14 05:24:57 2024 by modify_doxy.py rev. 669887