NCBI C++ ToolKit
win_mask_config.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: win_mask_config.hpp 98008 2022-09-19 12:38:44Z morgulis $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksandr Morgulis
27  *
28  * File Description:
29  * Header file for CWinMaskConfig class.
30  *
31  */
32 
33 #ifndef C_WIN_MASK_CONFIG_H
34 #define C_WIN_MASK_CONFIG_H
35 
36 #include <string>
37 #include <set>
38 #include <sstream>
39 
40 #include <corelib/ncbistre.hpp>
41 #include <corelib/ncbistr.hpp>
42 #include <corelib/ncbiargs.hpp>
43 
47 
49 
50 class CMaskReader;
51 class CMaskWriter;
52 
53 /**
54 **\brief Winmasker configuration errors.
55 **
56 ** This class encapsulates information about different kind of
57 ** exceptions that may occur in winmasker configuration.
58 **/
60 {
61 public:
62 
63  /**
64  **\brief Error codes.
65  **
66  **/
67  enum EErrCode
68  {
69  eInputOpenFail, /**< Can not open input file. */
70  eReaderAllocFail, /**< Memory allocation for input
71  reader object failed. */
72  eInconsistentOptions /**< Option validation failure. */
73  };
74 
75  /**
76  **\brief Get the description of an error.
77  **
78  ** Returns the string corresponding to the error code of
79  ** this exception object.
80  **
81  **\return error description.
82  **
83  **/
84  virtual const char * GetErrCodeString() const override;
85 
87 };
88 
89 
90 /**
91  **\brief Objects of this class contain winmasker configuration data.
92  **
93  ** The class is also responsible for validation of command line arguments.
94  **/
96 {
97 public:
98 
102 
103  enum EAppType
104  {
109  eGenerateMasksWithDuster
110  };
111 
112  /**
113  **\brief Object constructor.
114  **
115  **\param args C++ toolkit style command line arguments.
116  **\param type type of application; eAny means application needs to be decided based on args
117  **\param determine_input when false, CWinMaskConfig is not concerned with the input files
118  **
119  **/
120  CWinMaskConfig( const CArgs & args, EAppType type = eAny, bool determine_input = true );
121 
122  /** Destructor */
123  ~CWinMaskConfig();
124 
125  /**
126  **\brief Get the input reader object.
127  **
128  **\return the current input reader.
129  **
130  **/
131  CMaskReader & Reader();
132 
133  /**
134  **\brief Get the output writer object.
135  **
136  **\return the current output writer.
137  **
138  **/
139  CMaskWriter & Writer() { return *writer; }
140 
141  /**
142  **\brief Get the t_extend value.
143  **
144  **\return the current t_extend value.
145  **/
146  Uint4 Textend() const { return textend; }
147 
148  /**
149  **\brief Get the average unit score threshold.
150  **
151  **\return the current value of average unit score
152  ** that triggers masking.
153  **
154  **/
155  Uint4 CutoffScore() const { return cutoff_score; }
156 
157  /**
158  **\brief Get the maximum unit score.
159  **
160  **\return the current value of the maximum unit score.
161  ** Any ecode with larger score will be assigned
162  ** the score value specified by -setmaxscore
163  ** command line option.
164  **
165  **/
166  Uint4 MaxScore() const { return max_score; }
167 
168  /**
169  **\brief Get the minimum unit score.
170  **
171  **\return the current value of the minimum unit score.
172  ** Any ecode with smaller score will be assigned
173  ** the score value specified by -setminscore
174  ** command line option.
175  **
176  **/
177  Uint4 MinScore() const { return min_score; }
178 
179  double MinScorePct() const { return t_low_pct; }
180  double ExtendScorePct() const { return t_extend_pct; }
181  double ThresScorePct() const { return t_thres_pct; }
182  double MaxScorePct() const { return t_high_pct; }
183 
184  /**
185  **\brief Get the alternative score for high scoring units.
186  **
187  **\return the score value that is assigned to units that
188  ** have original score larger than the value
189  ** specified by -maxscore command line option.
190  **
191  **/
192  Uint4 SetMaxScore() const { return set_max_score; }
193 
194  /**
195  **\brief Get the alternative score for low scoring units.
196  **
197  **\return the score value that is assigned to units that
198  ** have original score smaller than the value
199  ** specified by -minscore command line option.
200  **
201  **/
202  Uint4 SetMinScore() const { return set_min_score; }
203 
204  /**
205  **\brief Get the window size.
206  **
207  **\return the current window size.
208  **/
209  Uint1 WindowSize() const { return window_size; }
210 
211  /**
212  **\brief Get the name of the length statistics file.
213  **
214  **\return the name of the file containing the unit length
215  ** statistics.
216  **
217  **/
218  const string LStatName() const { return lstat_name; }
219 
220  /**
221  **\brief Flag to run the interval merging passes.
222  **
223  **\return true if interval merging is requested, false
224  ** otherwise.
225  **
226  **/
227  bool MergePass() const { return merge_pass; }
228 
229  /**
230  **\brief Average unit score triggering the interval merging.
231  **
232  ** For each pair of consequtive mask intervals winmasker
233  ** that are candidates for merging (see description of
234  ** CWinMaskConfig::MeanMergeCutoffDist()) winmasker evaluates
235  ** the mean unit score of all units in the interval starting
236  ** at the start of the first interval and ending at the end
237  ** of the second interval. If the result is greater or equal
238  ** than the value returned by this function the intervals are
239  ** merged.
240  **
241  **\return the value of the mean unit score triggering
242  ** merging of masked intervals which is the value
243  ** of -mscore command line option to winmasker.
244  **
245  **/
246  Uint4 MergeCutoffScore() const { return merge_cutoff_score; }
247 
248  /**
249  **\brief Distance at which intervals are merged unconditionally.
250  **
251  **\return The distance in base pairs such that if two consequtive
252  ** masked intervals are closer to each other than that
253  ** distance then they are merged unconditionally. This is
254  ** the value of -mabs command line option to winmasker.
255  **
256  **/
257  Uint4 AbsMergeCutoffDist() const { return abs_merge_cutoff_dist; }
258 
259  /**
260  **\brief Distance at which intervals are considered candidates for
261  ** merging.
262  **
263  **\return The distance in base pairs such that if two consequtive
264  ** masked intervals are closer to each other tham that
265  ** distance then they are considered candidates for
266  ** merging. They have to pass mean average unit score
267  ** test to be merged (see description of
268  ** CWinMaskConfig::MergeCutoffScore()). This is the
269  ** value of -mmean command line option to winmasker.
270  **
271  **/
272  Uint4 MeanMergeCutoffDist() const { return mean_merge_cutoff_dist; }
273 
274  /**
275  **\brief Type of the event triggering the masking.
276  **
277  **\return string describing the type of the event that would trigger
278  ** masking of a window. The allowed values are:\n
279  ** \b mean - average unit score exceeds the threshold;\n
280  ** \b min - minimum unit score exceeds the threshold.
281  **
282  **/
283  const string Trigger() const { return trigger; }
284 
285  /**
286  **\brief Number of units to count.
287  **
288  ** If "-trigger min" was specified on the command line, then
289  ** this parameter is the number of units that have to be
290  ** above threshold to trigger masking.
291  **
292  **\return number of units to count.
293  **
294  **/
295  Uint1 TMin_Count() const { return tmin_count; }
296 
297  /**
298  **\brief Whether discontiguous units are used.
299  **
300  **\return true if discontiguous units should be used;
301  ** false otherwise
302  **
303  **/
304  bool Discontig() const { return discontig; }
305 
306  /**
307  **\brief Pattern to form discontiguous units.
308  **
309  ** Pattern is a 4-byte long bit mask. Bit n is set
310  ** to 1 iff the n-th base in a window should not be
311  ** used in a pattern.
312  **
313  **\return the base pattern to from discontiguous units
314  **
315  **/
316  Uint4 Pattern() const { return pattern; }
317 
318  /**
319  **\brief Window step.
320  **
321  **\return the number of bases between two consequtive
322  ** windows
323  **
324  **/
325  Uint4 WindowStep() const { return window_step; }
326 
327  /**
328  **\brief Unit step.
329  **
330  **\return the distance between consequtive units within
331  ** a window
332  **
333  **/
334  Uint1 UnitStep() const { return unit_step; }
335 
336  /**
337  **\brief Unit step to use for interval merging.
338  **
339  **\return the distance between units used to estimate
340  ** average unit score of the span of two
341  ** intervals that are candidates for merging.
342  **
343  **/
344  Uint1 MergeUnitStep() const { return merge_unit_step; }
345 
346  /**
347  **\brief Type of application to run
348  **
349  **\return eComputeCounts to compute unit counts
350  ** eConvertCounts to convert unit counts from one format to another
351  ** eGenerateMasks to generate masks using only WindowMasker
352  ** eGenerateMasks to generate masks using both WindowMasker and DustMasker
353  **
354  **/
355  EAppType AppType() const { return app_type; }
356 
357  /**
358  **\brief Use a list of fasta files.
359  **
360  **\return true indicates that -input parameter specifies a file
361  ** containing a list of input fasta files;
362  ** flase indicates that -input paramater specifies a
363  ** single input fasta file
364  **
365  **/
366  bool FaList() const { return fa_list; }
367 
368  /**
369  **\brief Memory available for n-mer frequency counting.
370  **
371  **\return memory in megabytes
372  **
373  **/
374  Uint4 Mem() const { return mem; }
375 
376  /**
377  **\brief n-mer size used for n-mer frequency counting.
378  **
379  **\return n-mer size in base pairs
380  **
381  **/
382  Uint1 UnitSize() const { return unit_size; }
383 
384  /**\brief Total genome length
385  **
386  **\return genome length as supplied on command line
387  **/
388  Uint8 GenomeSize() const { return genome_size; }
389 
390  /**
391  **\brief Value of the -input parameter.
392  **
393  **\return value of the -input parameter.
394  **
395  **/
396  string Input() const { return input; }
397 
398  /**
399  **\brief Value of the -output parameter.
400  **
401  **\return value of the -output parameter.
402  **
403  **/
404  string Output() const { return output; }
405 
406  /**
407  **\brief Percentage thresholds.
408  **
409  ** Comma separated list of floating point numbers
410  ** between 0.0 and 100.0 used to compute winmask
411  ** score thresholds. The corresponding score
412  ** thresholds will be added as comments to the
413  ** end of the output. For each number the program
414  ** finds the score such that the corresponding
415  ** fraction of different n-mers has the lower score.
416  **
417  **\return comma separated list of values
418  **
419  **/
420  string Th() const { return th; }
421 
422  /**
423  **\brief Dust window.
424  **
425  **\return dust window
426  **
427  **/
428  Uint4 DustWindow() const { return dust_window; }
429 
430  /**
431  **\brief Dust level.
432  **
433  **\return dust level
434  **
435  **/
436  Uint4 DustLevel() const { return dust_level; }
437 
438  /**
439  **\brief Dust linker (in bps).
440  **
441  **\return dust linker
442  **
443  **/
444  Uint4 DustLinker() const { return dust_linker; }
445 
446  /**
447  **\brief Check for possibly duplicate sequences in the input.
448  **
449  **\return true to check for duplicates;
450  ** false otherwise
451  **
452  **/
453  bool CheckDup() const { return checkdup; }
454 
455  /**
456  **\brief Format in which the unit counts generator should
457  ** generate its output.
458  **
459  **\return unit counts file format
460  **
461  **/
462  const string SFormat() const
463  {
464  ostringstream r;
465  r << sformat << smem;
466  return r.str();
467  }
468 
469  /**
470  ** \brief Input file format.
471  **
472  ** \return string indicating input file format.
473  **/
474  const string InFmt() const
475  {
476  return iformatstr;
477  }
478 
479  /**
480  **\brief The set of query ids to process.
481  **
482  ** Only the sequences from the input file that match
483  ** one of the ids in this list will be processed.
484  **
485  **\return the set of query ids to process
486  **/
487  const CIdSet * Ids() const { return ids; }
488 
489  /**
490  **\brief The set of query ids to exclude from processing.
491  **
492  ** The sequences from the input file that match this
493  ** one of the ids in this list will be excluded from
494  ** processing.
495  **
496  **\return the set of query ids to exclude from processing
497  **/
498  const CIdSet * ExcludeIds() const { return exclude_ids; }
499 
500  /**\brief Whether to use bit array optimization for
501  ** optimized binary counts format.
502  **\return true if optimization should be used; false otherwise
503  **/
504  bool UseBA() const { return use_ba; }
505 
506  /**\brief Use CSeq_id objects to match/print sequence ids.
507  **
508  **\return true if CSeq_id objects should be used;
509  ** false if strings should be used
510  **/
511  bool MatchId() const { return !text_match; }
512 
513  /**\brief Get metadata string to be added to the counts file.
514  */
515  string const GetMetaData() const { return metadata; }
516 
517  static void AddWinMaskArgs(CArgDescriptions &arg_desc,
518  EAppType type = eAny,
519  bool determine_input = true);
520 
521 private:
522 
523  /// Prohibit copy constructor
524  /// @param rhs object to copy from [in]
526 
527  /// Prohibit assignment operator
528  /// @param rhs object to copy from [in]
530 
531  /**\internal
532  **\brief This class is the resource allocator/initializer for
533  ** winmasker input streams (used for safe exception
534  ** handling).
535  **
536  **/
538  {
539  public:
540 
541  /**\internal
542  **\brief Object constructor.
543  **
544  ** Objects are usually constructed at the point of
545  ** dynamic allocation of corresponding resource.
546  **
547  **\param newResource points to the istream resource.
548  **
549  **/
550  CIstreamProxy( CNcbiIstream * newResource = NULL )
551  : resource( newResource ) {}
552 
553  /**\internal
554  **\brief Object destructor.
555  **
556  ** Frees the resource unless it points to the standard input.
557  **
558  **/
560  { if( resource && resource != &NcbiCin ) delete resource; }
561 
562  /**\internal
563  **\brief Cast to bool operator.
564  **
565  **\return true if the resource is non NULL, false otherwise.
566  **
567  **/
568  operator bool() const { return resource != NULL; }
569 
570  //@{
571  /**\internal
572  **\brief Dereference operator.
573  **
574  **\return resource object pointed to by the internal pointer.
575  **
576  **/
577  CNcbiIstream & operator*() { return *resource; }
578  const CNcbiIstream & operator*() const { return *resource; }
579  //@}
580 
581  //@{
582  /**\internal
583  **\brief Field access operator.
584  **
585  **\return this operator return the internal pointer stored in
586  ** the object.
587  **
588  **/
589  CNcbiIstream * operator->() { return resource; }
590  const CNcbiIstream * operator->() const { return resource; }
591  //@}
592 
593  private:
594 
595  /**\internal
596  **\brief Pointer to the resource that is managed by this
597  ** object.
598  **
599  **/
601  };
602 
603  /**
604  **\brief Read the list of sequence ids from a given file.
605  **
606  **\param file_name file to read the ids from
607  **\param id_list where to store the ids
608  **/
609  static void FillIdList( const string & file_name,
610  CIdSet & id_list );
611 
612  /**
613  * @brief Create the CMaskWriter instance for this class
614  *
615  * @param args command line arguments
616  * @param format format of the output to be written
617  *
618  * @return writer based on the format requested
619  * @throws runtime_error if the output format is not recognized
620  */
621  CMaskWriter* x_GetWriter(const CArgs& args);
622 
623  static EAppType s_DetermineAppType( const CArgs & args, EAppType user_specified_type );
624 
625  /**\name Window based masker configuration.
626  **/
627  //@{
628  EAppType app_type; /**< type of application to run */
629  CIstreamProxy is; /**< input file resource manager */
630  CMaskReader * reader; /**< input reader object */
631  CMaskWriter * writer; /**< output writer object */
632  string lstat_name; /**< name of the file containing unit length statitsics */
633  double t_low_pct, /**< minimum allowed unit score as percentage of units with lower count */
634  t_extend_pct, /**< minimum score for interval extension as percentage of units with lower count */
635  t_thres_pct, /**< threshold score for starting masking as percentage of units with lower count */
636  t_high_pct; /**< highest allowed unit score as percentage of units with lower count */
637  Uint4 textend; /**< t_extend value for extension of masked intervals */
638  Uint4 cutoff_score; /**< window score that triggers masking */
639  Uint4 max_score; /**< maximum allowed unit score */
640  Uint4 min_score; /**< minimum allowed unit score */
641  Uint4 set_max_score; /**< score to use for high scoring units */
642  Uint4 set_min_score; /**< score to use for low scoring units */
643  Uint1 window_size; /**< length of a window in base pairs */
644  bool merge_pass; /**< perform extra interval merging passes or not */
645  Uint4 merge_cutoff_score; /**< average unit score triggering interval merging */
646  Uint4 abs_merge_cutoff_dist; /**< distance triggering unconditional interval merging */
647  Uint4 mean_merge_cutoff_dist; /**< distance at which intervals are considered for merging */
648  string trigger; /**< type of the event that triggers masking */
649  Uint1 tmin_count; /**< number of units to count for min trigger */
650  bool discontig; /**< true, if using discontiguous units */
651  Uint4 pattern; /**< base pattern to use for discontiguous units */
652  Uint4 window_step; /**< window step */
653  Uint1 unit_step; /**< unit step */
654  Uint1 merge_unit_step; /**< unit step to use when merging intervals */
655  bool fa_list; /**< indicates whether input is a list of fasta file names */
656  Uint4 mem; /**< memory available for unit counts generator */
657  Uint1 unit_size; /**< unit size (used in unit counts generator */
658  Uint8 genome_size; /**< total size of the genome in bases */
659  string input; /**< input file name */
660  string output; /**< output file name (may be empty to indicate stdout) */
661  string th; /**< percetages to compute winmask thresholds */
662  Uint4 dust_window; /**< window size for dusting */
663  Uint4 dust_level; /**< level value for dusting */
664  Uint4 dust_linker; /**< number of bases to use for linking */
665  bool checkdup; /**< check for duplicate contigs */
666  string iformatstr; /**< input format */
667  string sformat; /**< unit counts format for counts generator */
668  Uint4 smem; /**< memory (in megabytes available for masking stage) */
669  CIdSet * ids; /**< set of ids to process */
670  CIdSet * exclude_ids; /**< set of ids to exclude from processing */
671  bool use_ba; /**< use bit array based optimization */
672  bool text_match; /**< identify seq ids by string matching */
673  string metadata; /**< metadata associated with counts file */
674  //@}
675 };
676 
678 
679 #endif
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Virtual base class for all input readers.
Definition: mask_reader.hpp:50
A base class for winmasker output writers.
Definition: mask_writer.hpp:52
Winmasker configuration errors.
virtual const char * GetErrCodeString() const override
Get the description of an error.
NCBI_EXCEPTION_DEFAULT(CWinMaskConfigException, CException)
@ eInconsistentOptions
Option validation failure.
@ eInputOpenFail
Can not open input file.
@ eReaderAllocFail
Memory allocation for input reader object failed.
const CNcbiIstream * operator->() const
CIstreamProxy(CNcbiIstream *newResource=NULL)
const CNcbiIstream & operator*() const
Objects of this class contain winmasker configuration data.
string trigger
type of the event that triggers masking
Uint4 merge_cutoff_score
average unit score triggering interval merging
string iformatstr
input format
Uint4 dust_level
level value for dusting
string Th() const
Percentage thresholds.
bool merge_pass
perform extra interval merging passes or not
double MinScorePct() const
double t_low_pct
minimum allowed unit score as percentage of units with lower count
Uint4 MeanMergeCutoffDist() const
Distance at which intervals are considered candidates for merging.
const CIdSet * ExcludeIds() const
The set of query ids to exclude from processing.
CMaskWriter * writer
output writer object
CMaskReader * reader
input reader object
Uint4 smem
memory (in megabytes available for masking stage)
Uint1 TMin_Count() const
Number of units to count.
bool MergePass() const
Flag to run the interval merging passes.
bool FaList() const
Use a list of fasta files.
CWinMaskUtil::CIdSet CIdSet
Uint4 pattern
base pattern to use for discontiguous units
Uint4 min_score
minimum allowed unit score
double MaxScorePct() const
const CIdSet * Ids() const
The set of query ids to process.
Uint1 merge_unit_step
unit step to use when merging intervals
bool CheckDup() const
Check for possibly duplicate sequences in the input.
bool use_ba
use bit array based optimization
Uint1 UnitStep() const
Unit step.
Uint4 SetMinScore() const
Get the alternative score for low scoring units.
bool UseBA() const
Whether to use bit array optimization for optimized binary counts format.
CWinMaskConfig(const CWinMaskConfig &rhs)
Prohibit copy constructor.
string Input() const
Value of the -input parameter.
const string InFmt() const
Input file format.
bool fa_list
indicates whether input is a list of fasta file names
double t_extend_pct
minimum score for interval extension as percentage of units with lower count
Uint1 tmin_count
number of units to count for min trigger
bool discontig
true, if using discontiguous units
Uint1 UnitSize() const
n-mer size used for n-mer frequency counting.
Uint4 DustLinker() const
Dust linker (in bps).
double ExtendScorePct() const
const string Trigger() const
Type of the event triggering the masking.
Uint4 mem
memory available for unit counts generator
Uint8 genome_size
total size of the genome in bases
EAppType AppType() const
Type of application to run.
CWinMaskConfig & operator=(const CWinMaskConfig &rhs)
Prohibit assignment operator.
CIstreamProxy is
input file resource manager
Uint4 WindowStep() const
Window step.
Uint4 mean_merge_cutoff_dist
distance at which intervals are considered for merging
string Output() const
Value of the -output parameter.
string output
output file name (may be empty to indicate stdout)
CIdSet * exclude_ids
set of ids to exclude from processing
string sformat
unit counts format for counts generator
CWinMaskUtil::CIdSet_TextMatch CIdSet_TextMatch
Uint4 cutoff_score
window score that triggers masking
bool MatchId() const
Use CSeq_id objects to match/print sequence ids.
Uint4 window_step
window step
EAppType app_type
type of application to run
Uint4 DustWindow() const
Dust window.
const string LStatName() const
Get the name of the length statistics file.
string const GetMetaData() const
Get metadata string to be added to the counts file.
Uint4 set_max_score
score to use for high scoring units
const string SFormat() const
Format in which the unit counts generator should generate its output.
Uint4 textend
t_extend value for extension of masked intervals
Uint4 Mem() const
Memory available for n-mer frequency counting.
Uint4 Textend() const
Get the t_extend value.
string metadata
metadata associated with counts file
Uint4 max_score
maximum allowed unit score
string input
input file name
bool Discontig() const
Whether discontiguous units are used.
double t_thres_pct
threshold score for starting masking as percentage of units with lower count
Uint1 unit_size
unit size (used in unit counts generator
Uint4 DustLevel() const
Dust level.
CIdSet * ids
set of ids to process
Uint4 MaxScore() const
Get the maximum unit score.
Uint4 SetMaxScore() const
Get the alternative score for high scoring units.
Uint4 Pattern() const
Pattern to form discontiguous units.
Uint4 dust_linker
number of bases to use for linking
Uint8 GenomeSize() const
Total genome length.
Uint4 MergeCutoffScore() const
Average unit score triggering the interval merging.
Uint1 unit_step
unit step
Uint4 dust_window
window size for dusting
Uint4 abs_merge_cutoff_dist
distance triggering unconditional interval merging
bool text_match
identify seq ids by string matching
Uint4 set_min_score
score to use for low scoring units
string lstat_name
name of the file containing unit length statitsics
Uint4 MinScore() const
Get the minimum unit score.
double ThresScorePct() const
Uint4 AbsMergeCutoffDist() const
Distance at which intervals are merged unconditionally.
string th
percetages to compute winmask thresholds
CWinMaskUtil::CIdSet_SeqId CIdSet_SeqId
Uint4 CutoffScore() const
Get the average unit score threshold.
Uint1 window_size
length of a window in base pairs
CMaskWriter & Writer()
Get the output writer object.
Uint1 MergeUnitStep() const
Unit step to use for interval merging.
bool checkdup
check for duplicate contigs
Uint1 WindowSize() const
Get the window size.
double t_high_pct
highest allowed unit score as percentage of units with lower count
Implementation of CIdSet that compares CSeq_id handles.
Implementation of CIdSet that does substring matching.
Base class for sets of seq_id representations used with -ids and -exclude-ids options.
The NCBI C++ standard methods for dealing with std::string.
static ulg window_size
const char * file_name[]
#define bool
Definition: bool.h:34
static SQLCHAR output[256]
Definition: print.c:5
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiCin
Definition: ncbistre.hpp:542
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NCBI_XALGOWINMASK_EXPORT
Definition: ncbi_export.h:1033
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
static int input()
GenericReader< UTF8< char >, UTF8< char >, CrtAllocator > Reader
Reader with UTF8 encoding and default allocator.
Definition: fwd.h:88
Defines command line argument related classes.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Definition: type.c:6
Modified on Fri Apr 12 17:23:06 2024 by modify_doxy.py rev. 669887