1 #ifndef DATA_HISTOGRAM__HPP
2 #define DATA_HISTOGRAM__HPP
39 #define _USE_MATH_DEFINES
113 template <
typename TValue =
int,
typename TScale = TValue,
typename TCounter = U
int8>
210 template <
typename V,
typename S =
TSum,
211 std::enable_if_t<g_HistogramValueTypeHavePlus<S>(),
int> = 0>
219 template <
typename V,
typename S =
TSum,
220 std::enable_if_t<!g_HistogramValueTypeHavePlus<S>(),
int> = 0>
228 #define RETURN_MT_SAFE(member) \
463 unsigned pos,
unsigned n,
469 TScale start_value, TScale
end_value, TScale step,
470 TScale*
arr,
unsigned pos,
unsigned n,
476 TScale*
arr,
unsigned pos,
unsigned n,
541 template <
typename TValue,
typename TScale,
typename TCounter>
601 void x_Shift(
size_t index,
typename TTimeBins::iterator current_it);
618 template <
typename TValue,
typename TScale,
typename TCounter>
625 EScaleView scale_view
627 : m_Min(
min_value), m_Max(max_value), m_NumBins(n_bins), m_IsMT(
false)
629 if ( m_Min > m_Max ) {
636 m_Starts.reset(
new TScale[m_NumBins]);
637 m_Counters.reset(
new TCounter[m_NumBins]);
639 x_CalculateBins(m_Min, m_Max, 0, m_NumBins, scale_type, scale_view);
645 template <
typename TValue,
typename TScale,
typename TCounter>
669 std::unique_ptr<TScale[]> tmp_starts(
new TScale[
m_NumBins]);
670 memcpy(tmp_starts.get() + n_bins,
m_Starts.get(),
sizeof(TScale) * n_prev);
683 template <
typename TValue,
typename TScale,
typename TCounter>
686 TValue max_value,
unsigned n_bins,
EScaleType scale_type)
690 if ( max_value <=
m_Max ) {
707 std::unique_ptr<TScale[]> tmp_starts(
new TScale[
m_NumBins]);
708 memcpy(tmp_starts.get(),
m_Starts.get(),
sizeof(TScale) * n_prev);
721 template <
typename TValue,
typename TScale,
typename TCounter>
727 return (
unsigned) ceil(sqrt(
n));
730 if (
n < 20)
return 5;
731 if (
n <= 50)
return 6;
732 if (
n <= 100)
return 7;
733 if (
n <= 200)
return 8;
734 if (
n <= 500)
return 9;
735 if (
n <= 1000)
return 10;
738 return 1 + (unsigned) ceil(
log2(
n));
740 return (
unsigned) ceil(pow(
n,
double(1)/3));
748 template <
typename TValue,
typename TScale,
typename TCounter>
758 template <
typename TValue,
typename TScale,
typename TCounter>
771 template <
typename TValue,
typename TScale,
typename TCounter>
791 template <
typename TValue,
typename TScale,
typename TCounter>
808 template <
typename TValue,
typename TScale,
typename TCounter>
817 while (d = (right - left), d > 1) {
834 template <
typename TValue,
typename TScale,
typename TCounter>
843 EScaleView scale_view
846 const char* errmsg_step =
"Impossible to calculate scale step, please change TScale type, range, or number of bins";
847 const char* errmsg_dup =
"Impossible to calculate scales bin starting position, please change TScale type, range, or number of bins";
864 scale_type ==
eLog2 ||
883 template <
typename TValue,
typename TScale,
typename TCounter>
893 EScaleView scale_view
907 TScale median = start_value + (
end_value - start_value)/2;
913 arr[pos] = start_value;
926 for (
unsigned i = 0;
i <
n;
i++) {
927 arr[pos+
i] = start_value + step*
i;
932 for (
unsigned i = 1;
i <=
n;
i++) {
933 arr[pos+1-
i] = start_value - step*
i;
939 template <
typename TValue,
typename TScale,
typename TCounter>
949 EScaleView scale_view
961 TScale median = start_value + (
end_value - start_value)/2;
963 arr[pos] = start_value;
982 std::unique_ptr<TScale[]>
tmp(
new TScale[
n*2]);
983 TScale* tmp_arr =
tmp.get();
989 arr[pos + n2] = tmp_arr[
n - 1];
990 arr[pos + n2 + 1] = tmp_arr[
n + 1];
993 for (
unsigned i = 0;
i < n2;
i++) {
994 arr[pos +
i] = tmp_arr[
i*2];
996 for (
unsigned i=2, j=
n+3;
i <= n2;
i++, j+=2) {
997 arr[pos + n2 +
i] = tmp_arr[j];
1017 for (
unsigned i = 1;
i <
n;
i++) {
1023 arr[pos] = start_value;
1028 for (
unsigned i = 1;
i <
n;
i++) {
1039 template <
typename TValue,
typename TScale,
typename TCounter>
1043 switch (scale_type) {
1060 template <
typename TValue,
typename TScale,
typename TCounter>
1064 switch (scale_type) {
1066 return (TScale)pow(M_E, scale_value);
1068 return (TScale)pow(2, scale_value);
1070 return (TScale)pow(10, scale_value);
1083 template <
typename TValue,
typename TScale,
typename TCounter>
1085 : m_NumBins(0), m_IsMT(
false)
1092 template <
typename TValue,
typename TScale,
typename TCounter>
1095 if (
this == &other)
return;
1102 template <
typename TValue,
typename TScale,
typename TCounter>
1106 if (
this == &other)
return *
this;
1114 template <
typename TValue,
typename TScale,
typename TCounter>
1156 template <
typename TValue,
typename TScale,
typename TCounter>
1178 template <
typename TValue,
typename TScale,
typename TCounter>
1182 if (std::numeric_limits<TScale>::is_integer) {
1190 template <
typename TValue,
typename TScale,
typename TCounter>
1194 if (
this == &other)
return;
1211 template <
typename TValue,
typename TScale,
typename TCounter>
1215 if (
this == &other)
return;
1233 template <
typename TValue,
typename TScale,
typename TCounter>
1245 TScale* starts_cur =
m_Starts.get();
1246 TScale* starts_other = other.
m_Starts.get();
1254 TCounter* counters_other = other.
m_Counters.get();
1256 counters_cur[
i] += counters_other[
i];
1272 template <
typename TValue,
typename TScale,
typename TCounter>
1281 template <
typename TValue,
typename TScale,
typename TCounter>
1286 m_TimeBins.front().histogram.Add(
value);
1290 template <
typename TValue,
typename TScale,
typename TCounter>
1297 x_Shift(0, m_TimeBins.begin());
1302 template <
typename TValue,
typename TScale,
typename TCounter>
1308 while (m_TimeBins.size() > 1)
1309 m_TimeBins.pop_back();
1310 m_TimeBins.front().histogram.Reset();
1311 m_TimeBins.front().n_ticks = 1;
1316 template <
typename TValue,
typename TScale,
typename TCounter>
1319 typename ncbi::CHistogramTimeSeries<TValue, TScale, TCounter>::TTimeBins::iterator current_it)
1321 if (m_TimeBins.size() <= index + 1) {
1323 x_AppendBin(m_TimeBins.front().histogram,
TTicks(1) << index);
1326 auto next_it = current_it;
1328 next_it->histogram.StealCountersFrom(current_it->histogram);
1331 current_it->n_ticks /= 2;
1335 auto next_it = current_it;
1338 if (next_it->n_ticks ==
TTicks(1) << index) {
1340 next_it->n_ticks *= 2;
1341 next_it->histogram.StealCountersFrom(current_it->histogram);
1343 current_it->n_ticks /= 2;
1348 x_Shift(index + 1, next_it);
1350 next_it->histogram.StealCountersFrom(current_it->histogram);
1353 current_it->n_ticks /= 2;
1357 template <
typename TValue,
typename TScale,
typename TCounter>
1368 template <
typename TValue,
typename TScale,
typename TCounter>
1373 m_TimeBins.push_back(
1374 STimeBin{model_histogram.
Clone(THistogram::eCloneStructureOnly),
A series of same-structured histograms covering logarithmically (base 2) increasing time periods....
CHistogram – collect the distribution of the numerical data samples.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static vector< string > arr
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
unsigned GetNumberOfBins() const
Return the number ot bins on the combined scale.
void x_AddCountersFrom(CHistogram &other)
Add counters from 'other' histogram.
TCounter m_UpperAnomalyCount
Number of anomaly values > m_Max.
EEstimateNumberOfBinsRule
Rules to calculate an estimated numbers of bins on the base of the expected number of observations.
std::unique_ptr< TCounter[]> m_Counters
Combined scale: counters - the number of measurements for each bin.
unsigned int TTicks
Type of the unit of time.
typename std::conditional< std::is_floating_point< TValue >::value, double, TIntegral >::type TArithmetic
void x_AddBisection(TValue value)
Add value to the data distribution using a bisection search method.
void x_AppendBin(const THistogram &model_histogram, TTicks n_ticks)
CHistogram(CHistogram &&other)
void MT_Lock() const
MT locking.
void x_CalculateBinsLog(TScale start_value, TScale end_value, TScale *arr, unsigned pos, unsigned n, EScaleType scale_type, EScaleView scale_view)
Calculate bins starting positions for a logarithmic scale.
TCounter m_LowerAnomalyCount
Number of anomaly values < m_Min.
CHistogram Clone(EClone how=eCloneAll) const
Clone histogram structure.
void x_Add(TValue value)
Add value to the data distribution (internal version without locking).
TSum m_Sum
Sum of the all added values (if applicable for TValue)
THistogram histogram
Histogram for the ticks.
TTicks GetCurrentTick(void) const
Number of ticks the histogram series has handled.
list< STimeBin > TTimeBins
Type of the series of histograms.
TSum GetSum(void) const
Return the sum of all added values.
CHistogram & operator=(const CHistogram &)
TCounter m_Count
Number of counted values (sum all m_Counters[])
void x_CalculateBinsLinear(TScale start_value, TScale end_value, TScale step, TScale *arr, unsigned pos, unsigned n, EScaleView scale_view)
Calculate bins starting positions for a linear scale.
size_t GetLowerAnomalyCount() const
Get number of hits whose values were less than GetMin().
TValue GetMin() const
Get the lower bound of the combined scale.
typename std::conditional< std::is_arithmetic< TValue >::value, TArithmetic, TValue >::type TSum
std::mutex m_Mutex
MT protection mutex.
TScale x_FuncInverse(EScaleType scale_type, TScale scale_value)
Inverse scale function.
void AddCountersFrom(const CHistogram &other)
Add counters from 'other' histogram to this histogram, 'other' doesn't changes.
void GetBinCounters(vector< TCounter > &counters)
Get counters for the combined scale's bins.
void x_CalculateBins(TScale start_value, TScale end_value, unsigned pos, unsigned n, EScaleType scale_type, EScaleView scale_view)
Calculate bins starting positions.
void EnableMT(void)
Add MT protection to histogram.
TValue m_Max
Maximum value (the upper bound of combined scale)
const TScale * GetBinStartsPtr() const
Get starting positions for bins on the combined scale (not MT safe).
bool x_IsEqual(TScale a, TScale b)
Check that 'a' and 'b' scale values are equal (or almost equal for floating scales).
TValue GetMax() const
Get the upper bound of the combined scale.
TValue m_Min
Minimum value (the lower bound of combined scale)
CHistogram & operator=(CHistogram &&other)
TScale x_Func(EScaleType scale_type, TScale value)
Scale function.
void x_AddLinear(TValue value)
Add value to the data distribution using a linear search method.
STimeBin & operator=(const STimeBin &other)
constexpr bool g_HistogramValueTypeHavePlus()
unsigned m_NumBins
Number of bins (m_Starts[]/m_Counts[] length)
CHistogram(const CHistogram &)
Prevent copying.
void Add(TValue value)
Add value to the data distribution.
STimeBin(THistogram &&h, TTicks t)
void x_MoveFrom(CHistogram &other)
Move data from 'other' histogram. 'other' became invalid.
void AddLeftScale(TValue min_value, unsigned n_bins, EScaleType scale_type)
Add auxiliary left/right scales.
typename std::conditional< std::numeric_limits< TValue >::is_signed, int64_t, uint64_t >::type TIntegral
Sum type: double for all floating points TValue types, int64_t/uint64_t for integral,...
const TCounter * GetBinCountersPtr() const
Get counters for the combined scale's bins (not MT safe).
void Add(const V &v)
Add value to the data distribution.
std::unique_ptr< TScale[]> m_Starts
Combined scale: starting bins positions.
TCounter GetCount() const
Get total number of hits whose value fell between GetMin() and GetMax().
CHistogram(void)
Default constructor.
size_t GetUpperAnomalyCount() const
Get number of hits whose values were greater than GetMax().
void Rotate()
Merge the most recent (now active) histogram data into the time series.
STimeBin(const STimeBin &other)
CHistogram< TValue, TScale, TCounter > THistogram
void x_Shift(size_t index, typename TTimeBins::iterator current_it)
CHistogram(TValue min_value, TValue max_value, unsigned n_bins, EScaleType scale_type=eLinear, EScaleView scale_view=eMonotonic)
Constructor.
static unsigned EstimateNumberOfBins(size_t n, EEstimateNumberOfBinsRule rule=0)
Estimate numbers of bins on the base of the expected number of observations 'n'.
EScaleView
Methods to build bins for a specified scale.
#define RETURN_MT_SAFE(member)
void Reset()
Reset to the initial state.
CHistogramTimeSeries(THistogram &model_histogram)
void Reset()
Reset all data counters.
bool m_IsMT
MT protection flag.
decltype((T &)(std::declval< T >().operator+=(std::declval< const T & >()))) T_HistogramValueTypeHavePlus
Helper types for CHistogram<>::GetSum() support.
void GetBinStarts(vector< TScale > &positions)
Get starting positions for bins on the combined scale.
void StealCountersFrom(CHistogram &other)
Add counters from 'other' histogram to this histogram, then reset the counters of 'other' histogram.
void x_Reset()
Reset all data counters (internal version without locking).
TTicks n_ticks
Number of ticks in this histogram.
TTimeBins GetHistograms() const
Histograms – in the order from the most recent to the least recent.
void AddRightScale(TValue max_value, unsigned n_bins, EScaleType scale_type)
@ eJuran
Juran's "Quality Control Handbook" that provide guidelines to select the number of bins for histogram...
@ eRice
Rice's rule. Presented as a simple alternative to Sturge's rule.
@ eSquareRoot
Square root rule.
@ eSturge
Herbert Sturge's rule.
@ eLog2
Binary logarithmic scale with a base 2.
@ eLog10
Common logarithmic scale with a base 10.
@ eLog
Natural logarithmic scale with a base e ~ 2.72.
@ eLinear
Arithmetic or linear scale.
@ eCloneStructureOnly
Clone structure only (the counters will be zeroed)
@ eCloneAll
Clone whole histogram, with scale and counters.
@ eMonotonic
Use specified scale method to calculate bins sizes from a minimum to a maximum value.
@ eSymmetrical
Determine a mean for a specified value range and calculates bins sizes using specified scale to both ...
unsigned int
A callback function used to compare two keys in a database.
const GenericPointer< typename T::ValueType > T2 value
The NCBI C++/STL use hints.
A histograms which covers a certain number of ticks.
Helper template to check that type Type have some method declared using TypeChecker<Type>.