33 #ifndef C_SEQ_MASKER_ISTAT_H
34 #define C_SEQ_MASKER_ISTAT_H
68 : divisor_( divisor/(8*sizeof(
Uint4 )) ), cba_( cba )
95 Uint4 arg_use_max_count,
97 Uint4 arg_use_min_count )
98 : threshold( arg_threshold ),
99 textend( arg_textend ),
100 max_count( arg_max_count ),
101 use_max_count( arg_use_max_count ),
102 min_count( arg_min_count ),
103 use_min_count( arg_use_min_count ),
136 {
return ambig_unit; }
156 {
return opt_data_.
cba_ == 0 ? 0 : &opt_data_; }
160 return fmt_gen_algo_ver;
165 fmt_gen_algo_ver = v;
202 { threshold = arg_threshold; }
209 { textend = arg_textend; }
226 { max_count = arg_max_count; }
244 { use_max_count = arg_use_max_count; }
262 if( min_count != 0 && min_count < arg_min_count ) {
265 <<
") is less than the one stored with the "
266 <<
"N-mer counts (" << arg_min_count <<
")."
267 <<
"The value " << arg_min_count
268 <<
" will be used." );
269 min_count = arg_min_count;
271 else if( min_count == 0 ) {
272 min_count = arg_min_count;
292 return fmt_version.get();
303 { use_min_count = arg_use_min_count; }
310 { unit_size = arg_unit_size; }
318 { ambig_unit = arg_ambig_unit; }
328 { opt_data_ = opt_data; }
341 std::vector< double >
const &
GetCountMap()
const {
return count_map; }
350 int major,
int minor,
int patch ) {
Defines an interface for accessing the unit counts information.
virtual ~CSeqMaskerIstat()
Object destructor.
void SetStatAlgoVersion(CSeqMaskerVersion const &v)
Set the version of the algorithm used to generate counts.
virtual Uint4 trueat(Uint4 unit) const =0
Get the true count for an n-mer.
void SetFmtEncoding(string const &e)
Set the statistics file format encoding.
void SetCountMap(std::vector< double > const &cm)
Uint4 get_min_count() const
Get the value of T_low.
void set_use_min_count(Uint4 arg_use_min_count)
Set the count value for units with actual counts below T_low.
std::unique_ptr< CComponentVersionInfo > fmt_version
Format version of the statistics file from which the data was read.
std::vector< double > const & GetCountMap() const
CSeqMaskerIstat(Uint4 arg_threshold, Uint4 arg_textend, Uint4 arg_max_count, Uint4 arg_use_max_count, Uint4 arg_min_count, Uint4 arg_use_min_count)
Object constructor.
void set_textend(Uint4 arg_textend)
Set the value of T_extend.
void set_threshold(Uint4 arg_threshold)
Set the value of T_threshold.
CSeqMaskerWindow::TUnit ambig_unit
virtual Uint4 at(Uint4 unit) const =0
Get the unit count of a given unit.
CSeqMaskerVersion fmt_gen_algo_ver
version of the algorithm used to generate counts
string const & GetMetaData() const
Return the metadata string.
void SetMetaData(string const &md)
Set metadata string.
Uint4 GetMaxCount() const
void set_use_max_count(Uint4 arg_use_max_count)
Set the count value for units with actual counts above T_high.
CSeqMaskerIstat(const CSeqMaskerIstat &)
Uint4 get_use_max_count() const
Get the count value for units with actual counts above T_high.
void SetMaxCount(Uint4 mc)
CSeqMaskerWindow::TUnit AmbigUnit() const
Get the value of the unit used to represent an ambuguity.
CSeqMaskerIstat & operator=(const CSeqMaskerIstat &)
std::vector< double > count_map
void set_optimization_data(const optimization_data &opt_data)
Set optimization parameters.
Uint4 operator[](Uint4 unit) const
Look up the count value of a given unit.
void SetFmtVersion(string const &name, int major, int minor, int patch)
Set the statistics file format version.
Uint4 get_textend() const
Get the value of T_extend.
void set_ambig_unit(const CSeqMaskerWindow::TUnit &arg_ambig_unit)
Set the ambiguity unit value.
Uint4 get_max_count() const
Get the current value of T_high.
CSeqMaskerVersion const & GetStatAlgoVersion() const
Return the version of the algorithm used to generate counts.
optimization_data opt_data_
virtual Uint1 UnitSize() const =0
Get the unit size.
Uint4 get_use_min_count() const
Get the count value for units with actual counts below T_low.
CComponentVersionInfo const * GetFmtVersion() const
Return the format version of the source statistics file.
const optimization_data * get_optimization_data() const
Get the data structure optimization parameters.
string const & GetFmtEncoding() const
Return the encoding of the source statistics file.
void set_unit_size(Uint1 arg_unit_size)
Set the unit size.
void set_min_count(Uint4 arg_min_count)
Set the value of T_low.
Uint4 get_threshold() const
Get the value of T_threshold.
void set_max_count(Uint4 arg_max_count)
Set the value of T_high.
Base class for computing and saving unit counts data.
Uint4 TUnit
Integer type used to represent units within a window.
The NCBI C++ standard methods for dealing with std::string.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Warning(CExceptionArgs_Base &args)
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define CComponentVersionInfo
#define NCBI_XALGOWINMASK_EXPORT
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Defines Limits for the types used in NCBI C/C++ toolkit.
Structure containing information about optimization parameters used.
optimization_data(Uint4 divisor, Uint4 *cba)
Object constructor.
Uint4 divisor_
How many units are represented by one 4-byte word in cba_ array.
Uint4 * cba_
Bit array with zeroes where all corresponding units have counts below t_extend.