NCBI C++ ToolKit
LargeInt.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*****************************************************************************
2  * GATB : Genome Assembly Tool Box
3  * Copyright (C) 2014 INRIA
4  * Authors: R.Chikhi, G.Rizk, E.Drezen
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Affero General Public License as
8  * published by the Free Software Foundation, either version 3 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Affero General Public License for more details.
15  *
16  * You should have received a copy of the GNU Affero General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *****************************************************************************/
19 
20 /** \file LargeInt.hpp
21  * \date 01/03/2013
22  * \author edrezen
23  * \brief Class that manages large integers
24  *
25  * arbitrary-precision integer library
26  * very limited: only does what minia needs (but not what minia deserves)
27  * This file holds interfaces related to the Design Pattern Observer.
28  */
29 
30 #ifndef _GATB_CORE_TOOLS_MATH_LARGEINT_HPP_
31 #define _GATB_CORE_TOOLS_MATH_LARGEINT_HPP_
32 
33 /********************************************************************************/
34 
35 #include <stdint.h>
36 #include <algorithm>
37 #include <iostream>
38 #include <array>
39 #include <algorithm>
40 
41 #include "config.hpp"
42 
43 extern std::array<const unsigned char, 256> revcomp_4NT;
44 extern std::array<const char, 4> bin2NT;
45 
46 /********************************************************************************/
47 namespace DeBruijn {
48 /********************************************************************************/
49 
50 inline static u_int64_t oahash64 (u_int64_t elem)
51 {
52  u_int64_t code = elem;
53  code = code ^ (code >> 14); //supp
54  code = (~code) + (code << 18);
55  code = code ^ (code >> 31);
56  code = code * 21;
57  code = code ^ (code >> 11);
58  code = code + (code << 6);
59  code = code ^ (code >> 22);
60 
61  return code;
62 }
63 
64 
65 /** \brief Large integer class
66  *
67  * The LargeInt class provides methods for integer calculus. It has a template parameter
68  * 'precision' giving the number of bits used the integer representation. For instance:
69  * - LargeInt<1> : representation of integers up to 2^64
70  * - LargeInt<2> : representation of integers up to 2^128
71  * - etc
72  *
73  * This template class has a specialization for precision=1. In this case, native 64 bits
74  * integers are used.
75  *
76  * This template class may have a specialization for precision=2. If the used operating
77  * system allows it, native 128 bits integers are used.
78  *
79  * In the other cases, the LargeInt provides a generic integer calculus class. Note that
80  * such an implementation could be optimized in several ways, including direct assembly
81  * code for maximum speed.
82  *
83  * The LargeInt class is hugely used throughout the GATB project since it encodes kmers values.
84  *
85  * The LargeInt class is mainly used with the IntegerTemplate class, where 4 specializations
86  * of LargeInt are used as template types of IntegerTemplate.
87  *
88  * \see IntegerTemplate
89  */
90 template<int precision> class LargeInt {
91 public:
92 
93  /** Get the name of the class used by the variant (ie. one of the Ti template class parameters)
94  * \return the class name.
95  */
96  static const char* getName ()
97  {
98  static char buffer[256];
99  static bool first = true;
100  if (first) { first = false; snprintf (buffer, sizeof(buffer), "LargeInt<%d>", precision); }
101  return buffer;
102  }
103 
104  /** Get the 64 less significant bits of the LargeInt object as a native integer type.
105  * \return (part of) the LargeInt object as a native integer type.
106  */
107  u_int64_t getVal() const { return this->value[0]; }
108 
109  /** Get the size of an instance of the class
110  * \return the size of an object (in bits).
111  */
112  static const size_t getSize () { return 8*sizeof(u_int64_t)*precision; }
113 
114  /********************************************************************************/
115  /** Constructor.
116  * \param[in] val : initial value of the large integer. */
117  LargeInt(const u_int64_t& val = 0) {
118  value[0] = val;
119  for (int i = 1; i < precision; i++)
120  value[i] = 0;
121  }
122 
123  LargeInt(const std::string& kmer) : LargeInt(0) {
124  int sizeKmer = kmer.size();
125  for (int i = 0; i < sizeKmer; i++) {
126  operator<<=(2);
127  value[0] += std::find(bin2NT.begin(), bin2NT.end(), kmer[i]) - bin2NT.begin();
128  }
129  }
130 
131  template <typename T>
132  LargeInt(const T& a, const T& b) : LargeInt(0) {
133  for(T i = a; i < b; ++i) {
134  operator<<=(2);
135  value[0] += std::find(bin2NT.begin(), bin2NT.end(), *i) - bin2NT.begin();
136  }
137  }
138 
139  /********************************************************************************/
140  /** Operator +
141  * \param[in] other : operand
142  * \return sum of object and the operand.
143  */
144  LargeInt operator+ (const LargeInt& other) const
145  {
147  int carry = 0;
148  for (int i = 0 ; i < precision ; i++)
149  {
150  result.value[i] = this->value[i] + other.value[i] + carry;
151  carry = (result.value[i] < this->value[i]) ? 1 : 0;
152  }
153 
154  return result;
155  }
156 
157  /********************************************************************************/
158  /** Operator -
159  * \param[in] other : operand
160  * \return subtraction of object and the operand.
161  */
162  LargeInt operator- (const LargeInt& other) const
163  {
165  int carry = 0;
166  for (int i = 0 ; i < precision ; i++)
167  {
168  result.value[i] = this->value[i] - other.value[i] - carry;
169  carry = (result.value[i] > this->value[i]) ? 1 : 0;
170  }
171 
172  return result;
173  }
174 
175  /********************************************************************************/
176  /** Operator *
177  * \param[in] coeff : operand
178  * \return multiplication of the object by the coeff.
179  */
180  LargeInt operator*(const int& coeff) const
181  {
182  LargeInt result (*this);
183  // minia doesn't have that many multiplications cases
184 
185  if (coeff == 2 || coeff == 4)
186  {
187  result = result << (coeff / 2);
188  }
189  else
190  {
191  if (coeff == 21)
192  {
193  result = (result << 4) + (result << 2) + result;
194  }
195  else
196  {
197  printf("unsupported LargeInt multiplication: %d\n",coeff);
198  exit(1);
199  }
200  }
201 
202  return result;
203  }
204 
205  /********************************************************************************/
206  /** Operator /
207  * \param[in] divisor : operand
208  * \return division of the object by the divisor.
209  */
210  LargeInt operator/(const uint32_t& divisor) const
211  {
213  std::fill( result.value, result.value + precision, 0 );
214 
215  // inspired by Divide32() from http://subversion.assembla.com/svn/pxcode/RakNet/Source/BigInt.cpp
216 
217  u_int64_t r = 0;
218  uint32_t mask32bits = ~0;
219  for (int i = precision-1; i >= 0; --i)
220  {
221  for (int j = 1; j >= 0; --j) // [j=1: high-32 bits, j=0: low-32 bits] of array[i]
222  {
223  u_int64_t n = (r << 32) | ((this->value[i] >> (32*j)) & mask32bits );
224  result.value[i] = result.value[i] | (((n / divisor) & mask32bits) << (32*j));
225  r = n % divisor;
226  }
227  }
228 
229  return result;
230  }
231 
232  /********************************************************************************/
233  /** Operator %
234  * \param[in] divisor: operand
235  * \return modulo of the object by the operand.
236  */
237  uint32_t operator%(const uint32_t& divisor) const
238  {
239  u_int64_t r = 0;
240  uint32_t mask32bits = ~0;
241  for (int i = precision-1; i >= 0; --i)
242  {
243  for (int j = 1; j >= 0; --j) // [j=1: high-32 bits, j=0: low-32 bits] of array[i]
244  {
245  u_int64_t n = (r << 32) | ((this->value[i] >> (32*j)) & mask32bits );
246  r = n % divisor;
247  }
248  }
249 
250  return (uint32_t)r;
251  }
252 
253  /********************************************************************************/
254  /** Operator ^
255  * \param[in] other : operand
256  * \return operator^ of the object by the operand.
257  */
258  LargeInt operator^(const LargeInt& other) const
259  {
261  for (int i=0 ; i < precision ; i++)
262  result.value[i] = this->value[i] ^ other.value[i];
263 
264  return result;
265  }
266 
267  /********************************************************************************/
268  /** Operator |
269  * \param[in] other : operand
270  * \return operator| of the object by the operand.
271  */
272  LargeInt operator|(const LargeInt& other) const
273  {
275  for (int i=0 ; i < precision ; i++)
276  result.value[i] = this->value[i] | other.value[i];
277 
278  return result;
279  }
280 
281  /********************************************************************************/
282  /** Operator &
283  * \param[in] other : operand
284  * \return operator& of the object by the operand.
285  */
286  LargeInt operator&(const LargeInt& other) const
287  {
289  for (int i=0 ; i < precision ; i++)
290  result.value[i] = this->value[i] & other.value[i];
291 
292  return result;
293  }
294 
295  /********************************************************************************/
296  /** Operator &
297  * \param[in] other : operand
298  * \return operator& of the object by the operand.
299  */
300  LargeInt operator&(const char& other) const
301  {
303  result.value[0] = this->value[0] & other;
304  return result;
305  }
306 
307  /********************************************************************************/
308  /** Operator ~
309  * \return negation of the object
310  */
312  {
314  for (int i=0 ; i < precision ; i++)
315  result.value[i] = ~this->value[i];
316 
317  return result;
318  }
319 
320  /********************************************************************************/
321  /** Operator <<. Note: this method is likely to be hugely used when we want to get
322  * neighbors of a given kmer encoded as a LargeInt object.
323  * \param[in] coeff : operand
324  * \return left shift of the object
325  */
326  LargeInt operator<<(const int& coeff) const
327  {
328  LargeInt result (0);
329 
330  int large_shift = coeff / 64;
331  int small_shift = coeff % 64;
332 
333  for (int i = large_shift ; i < precision-1; i++)
334  {
335  result.value[i] = result.value[i] | (this->value[i-large_shift] << small_shift);
336 
337  if (small_shift == 0) // gcc "bug".. u_int64_t x; x>>64 == 1<<63, x<<64 == 1
338  {
339  result.value[i+1] = 0;
340  }
341  else
342  {
343  result.value[i+1] = this->value[i-large_shift] >> (64 - small_shift);
344  }
345 
346  }
347  result.value[precision-1] = result.value[precision-1] | (this->value[precision-1-large_shift] << small_shift);
348 
349  return result;
350  }
351 
352  /********************************************************************************/
353  /** Operator >>. Note: this method is likely to be hugely used when we want to get
354  * neighbors of a given kmer encoded as a LargeInt object.
355  * \param[in] coeff : operand
356  * \return right shift of the object
357  */
358  LargeInt operator>>(const int& coeff) const
359  {
360  LargeInt result (0);
361 
362  int large_shift = coeff / 64;
363  int small_shift = coeff % 64;
364 
365  result.value[0] = (this->value[large_shift] >> small_shift);
366 
367  for (int i = 1 ; i < precision - large_shift ; i++)
368  {
369  result.value[i] = (this->value[i+large_shift] >> small_shift);
370  if (small_shift == 0) // gcc "bug".. u_int64_t x; x>>64 == 1<<63, x<<64 == 1
371  {
372  result.value[i-1] = result.value[i-1];
373  }
374  else
375  {
376  result.value[i-1] = result.value[i-1] | (this->value[i+large_shift] << (64 - small_shift));
377  }
378  }
379 
380  return result;
381  }
382 
383  /********************************************************************************/
384  /** Operator !=
385  * \param[in] c: operand
386  * \return inequality
387  */
388  bool operator!=(const LargeInt& c) const
389  {
390  for (int i = 0 ; i < precision ; i++)
391  if( this->value[i] != c.value[i] )
392  return true;
393  return false;
394  }
395 
396  /********************************************************************************/
397  /** Operator ==
398  * \param[in] c: operand
399  * \return equality
400  */
401  bool operator==(const LargeInt& c) const
402  {
403  for (int i = 0 ; i < precision ; i++)
404  if( this->value[i] != c.value[i] )
405  return false;
406  return true;
407  }
408 
409  /********************************************************************************/
410  /** Operator <
411  * \param[in] c: operand
412  */
413  bool operator<(const LargeInt& c) const
414  {
415  for (int i = precision-1 ; i>=0 ; --i)
416  if( this->value[i] != c.value[i] )
417  return this->value[i] < c.value[i];
418 
419  return false;
420  }
421 
422  /********************************************************************************/
423  /** Operator <=
424  * \param[in] c: operand
425  */
426  bool operator<=(const LargeInt& c) const
427  {
428  return operator==(c) || operator<(c);
429  }
430 
431  /********************************************************************************/
432  /** Operator +=
433  * \param[in] other : operand
434  * \return addition and affectation
435  */
437  {
438  // NOT so easy to optimize because of the carry
439  *this = *this + other;
440  return *this;
441  }
442 
443  /********************************************************************************/
444  /** Operator ^=
445  * \param[in] other : operand
446  * \return xor and affectation
447  */
449  {
450  for (int i=0 ; i < precision ; i++) { this->value[i] ^= other.value[i]; }
451  return *this;
452  }
453 
454  /********************************************************************************/
455  /** Operator &=
456  * \param[in] other : operand
457  * \return and and affectation
458  */
460  {
461  for (int i=0 ; i < precision ; i++) { this->value[i] &= other.value[i]; }
462  return *this;
463  }
464 
465  /********************************************************************************/
466  /** Operator |=
467  * \param[in] other : operand
468  * \return or and affectation
469  */
471  {
472  for (int i=0 ; i < precision ; i++) { this->value[i] |= other.value[i]; }
473  return *this;
474  }
475 
476  /********************************************************************************/
477  /** Operator <<=
478  * \param[in] coeff : operand
479  * \return left shift and affectation
480  */
481  LargeInt& operator<<= (const int& coeff)
482  {
483  *(this) = (*this) << coeff; return *this;
484  }
485 
486  /********************************************************************************/
487  /** Operator >>=
488  * \param[in] coeff : operand
489  * \return right shift and affectation
490  */
491  LargeInt& operator>>= (const int& coeff)
492  {
493  *(this) = (*this) >> coeff; return *this;
494  }
495 
496  /********************************************************************************/
497  /** Output stream operator for the IntegerTemplate class
498  * \param[in] s : the output stream to be used.
499  * \param[in] l : the object to output
500  * \return the modified output stream.
501  */
502  friend std::ostream & operator<<(std::ostream & s, const LargeInt<precision> & l)
503  {
504  int i=0;
505 
506  /** We want to display the number in hexa (easier to do...) */
507  s << std::hex;
508 
509  /** We skip leading 0. */
510  for (i=precision-1; i>=0 && l.value[i]==0; i--) {}
511 
512  /** We dump the different parts of the large integer. */
513  for ( ; i>=0 ; i--) { s << l.value[i]; if (i>=1) { s << "."; } }
514 
515  /** We go back to decimal format. */
516  s << std::dec;
517 
518  /** We return the output stream. */
519  return s;
520  }
521 
522  /********************************************************************************/
523  /** Computes a kmer value as polynom. We may have conversion from the data buffer to
524  * a nucleotide code. This is done through the provided functor.
525  * \param[in] data : kmer given as a buffer of nucleotides
526  * \param[in] size : size of the kmer
527  * \param[in] fct : convert the ith entry in the buffer into a nucleotide code (A=0, C=1, T=2 and G=3)
528  */
529  template<typename Map>
530  static LargeInt polynom (const char* data, size_t size, Map fct)
531  {
532  LargeInt res (0);
533  for (size_t i=0; i<size; ++i) { res = res * 4 + fct(data[i]); }
534  return res;
535  }
536 
537  /********************************************************************************/
538  /** Print corresponding kmer in ASCII
539  * \param[in] sizeKmer : kmer size.
540  * \return the ASCII string
541  */
542  std::string toString (size_t sizeKmer) const
543  {
544  std::string seq(sizeKmer,'A');
545  for (size_t i=0; i<sizeKmer; i++) { seq[sizeKmer-i-1] = bin2NT [(*this)[i]]; }
546 
547  return seq;
548  }
549 
550  /********************************************************************************/
551  /** Operator[] access the ith nucleotide in the given integer. For instance a[4] get the 5th nucleotide of
552  * a kmer encoded as an Integer object.
553  * \param[in] idx : index of the nucleotide to be retrieved
554  * \return the nucleotide value as follow: A=0, C=1, T=2 and G=3
555  */
556  u_int8_t operator[] (size_t idx) const { return (this->value[idx/32] >> (2*idx%64)) & 3; }
557 
558  u_int64_t oahash() const {
559  // hash = XOR_of_series[hash(i-th chunk iof 64 bits)]
560  u_int64_t result = 0, chunk, mask = ~0;
561  LargeInt intermediate = *this;
562  for (size_t i=0;i<precision;i++) {
563  chunk = (intermediate & mask).value[0];
564  intermediate = intermediate >> 64;
565  result ^= oahash64 (chunk);
566  }
567  return result;
568  }
569  u_int64_t* GetGuts() { return value; }
570 
571 private:
572  template<int T> friend LargeInt<T> revcomp (const LargeInt<T>& i, size_t sizeKmer);
573 
574  u_int64_t value[precision];
575 };
576 
577 /********************************************************************************/
578 template<int precision> inline LargeInt<precision> revcomp (const LargeInt<precision>& x, size_t sizeKmer)
579 {
580  const LargeInt<precision> res = x;
581 
582  unsigned char* kmerrev = (unsigned char *) (&(res.value[0]));
583  unsigned char* kmer = (unsigned char *) (&(x.value[0]));
584 
585  for (size_t i=0; i<8*precision; ++i)
586  {
587  kmerrev[8*precision-1-i] = revcomp_4NT [kmer[i]];
588  }
589 
590  return (res >> (2*( 32*precision - sizeKmer)) ) ;
591 }
592 
593 /********************************************************************************/
594 /******************** SPECIALIZATION FOR precision=1 ********************/
595 /********************************************************************************/
596 #include "LargeInt1.hpp"
597 
598 /********************************************************************************/
599 /******************** SPECIALIZATION FOR precision=2 ********************/
600 /********************************************************************************/
601 #include "LargeInt2.hpp"
602 
603 /********************************************************************************/
604 } /* end of namespace */
605 /********************************************************************************/
606 
607 #endif /* _GATB_CORE_TOOLS_MATH_LARGEINT_HPP_ */
std::array< const unsigned char, 256 > revcomp_4NT
Definition: Model.hpp:28
std::array< const char, 4 > bin2NT
Definition: Model.hpp:24
ncbi::TMaskedQueryRegions mask
Large integer class.
Definition: LargeInt.hpp:90
LargeInt & operator+=(const LargeInt &other)
Operator +=.
Definition: LargeInt.hpp:436
LargeInt operator-(const LargeInt &other) const
Operator -.
Definition: LargeInt.hpp:162
LargeInt operator|(const LargeInt &other) const
Operator |.
Definition: LargeInt.hpp:272
LargeInt(const std::string &kmer)
Definition: LargeInt.hpp:123
LargeInt operator/(const uint32_t &divisor) const
Operator /.
Definition: LargeInt.hpp:210
LargeInt & operator>>=(const int &coeff)
Operator >>=.
Definition: LargeInt.hpp:491
friend std::ostream & operator<<(std::ostream &s, const LargeInt< precision > &l)
Output stream operator for the IntegerTemplate class.
Definition: LargeInt.hpp:502
LargeInt operator&(const char &other) const
Operator &.
Definition: LargeInt.hpp:300
LargeInt & operator|=(const LargeInt &other)
Operator |=.
Definition: LargeInt.hpp:470
bool operator<(const LargeInt &c) const
Operator <.
Definition: LargeInt.hpp:413
u_int64_t value[precision]
Definition: LargeInt.hpp:574
bool operator==(const LargeInt &c) const
Operator ==.
Definition: LargeInt.hpp:401
LargeInt & operator&=(const LargeInt &other)
Operator &=.
Definition: LargeInt.hpp:459
LargeInt(const T &a, const T &b)
Definition: LargeInt.hpp:132
LargeInt operator&(const LargeInt &other) const
Operator &.
Definition: LargeInt.hpp:286
u_int64_t getVal() const
Get the 64 less significant bits of the LargeInt object as a native integer type.
Definition: LargeInt.hpp:107
std::string toString(size_t sizeKmer) const
Print corresponding kmer in ASCII.
Definition: LargeInt.hpp:542
bool operator<=(const LargeInt &c) const
Operator <=.
Definition: LargeInt.hpp:426
static const size_t getSize()
Get the size of an instance of the class.
Definition: LargeInt.hpp:112
LargeInt(const u_int64_t &val=0)
Constructor.
Definition: LargeInt.hpp:117
LargeInt operator>>(const int &coeff) const
Operator >>.
Definition: LargeInt.hpp:358
bool operator!=(const LargeInt &c) const
Operator !=.
Definition: LargeInt.hpp:388
LargeInt operator~() const
Operator ~.
Definition: LargeInt.hpp:311
static const char * getName()
Get the name of the class used by the variant (ie.
Definition: LargeInt.hpp:96
LargeInt & operator<<=(const int &coeff)
Operator <<=.
Definition: LargeInt.hpp:481
LargeInt & operator^=(const LargeInt &other)
Operator ^=.
Definition: LargeInt.hpp:448
uint32_t operator%(const uint32_t &divisor) const
Operator %.
Definition: LargeInt.hpp:237
u_int64_t * GetGuts()
Definition: LargeInt.hpp:569
LargeInt operator^(const LargeInt &other) const
Operator ^.
Definition: LargeInt.hpp:258
u_int8_t operator[](size_t idx) const
Operator[] access the ith nucleotide in the given integer.
Definition: LargeInt.hpp:556
friend LargeInt< T > revcomp(const LargeInt< T > &i, size_t sizeKmer)
static LargeInt polynom(const char *data, size_t size, Map fct)
Computes a kmer value as polynom.
Definition: LargeInt.hpp:530
u_int64_t oahash() const
Definition: LargeInt.hpp:558
LargeInt operator<<(const int &coeff) const
Operator <<.
Definition: LargeInt.hpp:326
LargeInt operator+(const LargeInt &other) const
Operator +.
Definition: LargeInt.hpp:144
LargeInt operator*(const int &coeff) const
Operator *.
Definition: LargeInt.hpp:180
#define T(s)
Definition: common.h:230
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static char precision
Definition: genparams.c:28
char data[12]
Definition: iconv.c:80
Uint4 uint32_t
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
string
Definition: cgiapp.hpp:687
exit(2)
int i
yy_size_t n
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static u_int64_t oahash64(u_int64_t elem)
Definition: LargeInt.hpp:50
LargeInt< precision > revcomp(const LargeInt< precision > &x, size_t sizeKmer)
Definition: LargeInt.hpp:578
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Definition: inftrees.h:24
else result
Definition: token2.c:20
Modified on Sat May 25 14:16:41 2024 by modify_doxy.py rev. 669887