70 #define WIN_MASK_APP_VER_MAJOR 1
71 #define WIN_MASK_APP_VER_MINOR 0
72 #define WIN_MASK_APP_VER_PATCH 0
106 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
126 if( aConfig.
Output() ==
"-" ) {
145 if(aConfig.
InFmt() ==
"seqids")
152 if( aConfig.
Output() ==
"-" ) {
202 if(aConfig.
InFmt() ==
"seqids"){
203 ERR_POST(
Error <<
"windowmasker with seqids input not implemented yet");
234 Uint4 total = 0, total_masked = 0;
251 for ( ; bs_iter; ++bs_iter) {
264 unique_ptr< CSeqMasker::TMaskList > mask_info( theMasker(
data ) );
269 unique_ptr< CSeqMasker::TMaskList > dust_info(
270 (*duster)(
data, *mask_info.get() ) );
275 theWriter.
Print( bsh, *mask_info,
GetArgs()[
"parse_seqids"] );
277 for( CSeqMasker::TMaskList::const_iterator
i = mask_info->begin();
278 i != mask_info->end(); ++
i )
279 masked +=
i->second -
i->first + 1;
281 total_masked += masked;
282 _TRACE(
"Number of positions masked: " << masked );
287 _TRACE(
"Total number of positions: " << total );
288 _TRACE(
"Total number of positions masked: " << total_masked );
User-defined methods of the data storage class.
#define WIN_MASK_APP_VER_MAJOR
#define WIN_MASK_APP_VER_PATCH
#define WIN_MASK_APP_VER_MINOR
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Virtual base class for all input readers.
virtual CRef< objects::CSeq_entry > GetNextSequence()=0
Read the next sequence from the source stream.
A base class for winmasker output writers.
virtual void Print(objects::CBioseq_Handle &bsh, const TMaskList &mask, bool parsed_id=false)=0
Output masked sequence data.
This class encapsulates the dusting functionality of winmask.
static CSeqMaskerVersion FormatVersion
Format version.
static CSeqMaskerVersion FormatVersion
Format version.
static CSeqMaskerVersion FormatVersion
Format version.
static CSeqMaskerVersion FormatVersion
Format version.
static CSeqMaskerVersion StatAlgoVersion
Version of the statistics generation algorithm.
Main interface to window based masker functionality.
static void MergeMaskInfo(TMaskList *dest, const TMaskList *src)
Merge together two result lists.
vector< TMaskedInterval > TMaskList
A type representing the total of masking information about a sequence.
static CSeqMaskerVersion AlgoVersion
Version of window masking algorithm.
static const char *const USAGE_LINE
Short description of the program.
CWinMaskApplication()
Application constructor.
virtual void Init(void)
Initialization.
virtual int Run(void)
Main routine of the window based masker.
Objects of this class contain winmasker configuration data.
string Th() const
Percentage thresholds.
double MinScorePct() const
Uint4 MeanMergeCutoffDist() const
Distance at which intervals are considered candidates for merging.
const CIdSet * ExcludeIds() const
The set of query ids to exclude from processing.
Uint1 TMin_Count() const
Number of units to count.
bool MergePass() const
Flag to run the interval merging passes.
bool FaList() const
Use a list of fasta files.
@ eGenerateMasksWithDuster
double MaxScorePct() const
const CIdSet * Ids() const
The set of query ids to process.
bool CheckDup() const
Check for possibly duplicate sequences in the input.
Uint1 UnitStep() const
Unit step.
Uint4 SetMinScore() const
Get the alternative score for low scoring units.
bool UseBA() const
Whether to use bit array optimization for optimized binary counts format.
string Input() const
Value of the -input parameter.
const string InFmt() const
Input file format.
Uint1 UnitSize() const
n-mer size used for n-mer frequency counting.
Uint4 DustLinker() const
Dust linker (in bps).
double ExtendScorePct() const
const string Trigger() const
Type of the event triggering the masking.
EAppType AppType() const
Type of application to run.
Uint4 WindowStep() const
Window step.
string Output() const
Value of the -output parameter.
CMaskReader & Reader()
Get the input reader object.
Uint4 DustWindow() const
Dust window.
const string LStatName() const
Get the name of the length statistics file.
string const GetMetaData() const
Get metadata string to be added to the counts file.
const string SFormat() const
Format in which the unit counts generator should generate its output.
Uint4 Mem() const
Memory available for n-mer frequency counting.
Uint4 Textend() const
Get the t_extend value.
bool Discontig() const
Whether discontiguous units are used.
Uint4 DustLevel() const
Dust level.
Uint4 MaxScore() const
Get the maximum unit score.
Uint4 SetMaxScore() const
Get the alternative score for high scoring units.
Uint4 Pattern() const
Pattern to form discontiguous units.
Uint8 GenomeSize() const
Total genome length.
Uint4 MergeCutoffScore() const
Average unit score triggering the interval merging.
Uint4 MinScore() const
Get the minimum unit score.
static void AddWinMaskArgs(CArgDescriptions &arg_desc, EAppType type=eAny, bool determine_input=true)
double ThresScorePct() const
Uint4 AbsMergeCutoffDist() const
Distance at which intervals are merged unconditionally.
Uint4 CutoffScore() const
Get the average unit score threshold.
CMaskWriter & Writer()
Get the output writer object.
Uint1 MergeUnitStep() const
Unit step to use for interval merging.
Uint1 WindowSize() const
Get the window size.
Class responsible for converting unit counts between different formats.
This class encapsulates the n-mer frequency counts generation functionality of winmasker.
Base class for sets of seq_id representations used with -ids and -exclude-ids options.
static bool consider(const objects::CBioseq_Handle &bsh, const CIdSet *ids, const CIdSet *exclude_ids)
Check if the given bioseq should be considered for processing.
Operators to edit gaps in sequences.
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideLogfile
Hide log file description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ fHideVersion
Hide version description.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
@ eDiag_Warning
Warning message.
void Error(CExceptionArgs_Base &args)
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
TSeqPos GetBioseqLength(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
E_Choice Which(void) const
Which variant is currently selected.
@ e_not_set
No variant selected.
@ eMol_na
just a nucleic acid
Contains the command line options common to filtering algorithms.
const string version
version string
NCBI C++ auxiliary debug macros.
CRef< objects::CObjectManager > om