NCBI C++ ToolKit
Classes | Functions | Variables
win_mask_dup_table.cpp File Reference
#include <ncbi_pch.hpp>
#include <vector>
#include <string>
#include <map>
#include <corelib/ncbitype.h>
#include <corelib/ncbistre.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seqloc/Seq_id.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/scope.hpp>
#include <objmgr/seq_entry_handle.hpp>
#include <objmgr/bioseq_ci.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/util/sequence.hpp>
#include <algo/winmask/win_mask_dup_table.hpp>
#include <algo/winmask/win_mask_util.hpp>
+ Include dependency graph for win_mask_dup_table.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.

Classes

class  dup_lookup_table
 
struct  dup_lookup_table::sample_loc
 
class  dup_lookup_table::sample
 
class  tracker
 
struct  tracker::result
 

Functions

 USING_SCOPE (objects)
 
bool operator< (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs)
 
bool operator> (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs)
 
bool operator== (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs)
 
void CheckDuplicates (const vector< string > &input, const string &infmt, const CWinMaskUtil::CIdSet *ids, const CWinMaskUtil::CIdSet *exclude_ids)
 Check for possibly duplicate sequences in the input. More...
 

Variables

const Uint4 SAMPLE_LENGTH = 100
 
const Uint4 SAMPLE_SKIP = 10000
 
const Uint4 MIN_SEQ_LENGTH = 50000
 
const Uint4 MAX_OFFSET_ERROR = 5
 
const Uint4 MIN_MATCH_COUNT = 4
 

Function Documentation

◆ CheckDuplicates()

void CheckDuplicates ( const vector< string > &  input,
const string infmt,
const CWinMaskUtil::CIdSet ids,
const CWinMaskUtil::CIdSet exclude_ids 
)

Check for possibly duplicate sequences in the input.

input contains the list of input file names. The files should be in the fasta format. The function checks the input sequences for duplication and reports possible duplicates to the standard error.

Parameters
inputlist of input file names
infmtinput format
idsset of ids to check
exclude_idsset of ids to ignore

Definition at line 535 of file win_mask_dup_table.cpp.

References dup_lookup_table::sample::begin(), CWinMaskUtil::consider(), CBioseq_Handle::eCoding_Iupac, eGetId_Best, dup_lookup_table::sample::end(), CBioseq_Handle::GetBioseqLength(), GetId(), CObjectManager::GetInstance(), CSeq_id::GetLabel(), CSeqVector::GetSeqData(), CBioseq_Handle::GetSeqVector(), i, input(), MIN_SEQ_LENGTH, om, SAMPLE_LENGTH, SAMPLE_SKIP, and table.

Referenced by CWinMaskCountsGenerator::operator()().

◆ operator<()

◆ operator==()

bool operator== ( const dup_lookup_table::sample_loc lhs,
const dup_lookup_table::sample_loc rhs 
)
inline

Definition at line 242 of file win_mask_dup_table.cpp.

◆ operator>()

Definition at line 228 of file win_mask_dup_table.cpp.

◆ USING_SCOPE()

USING_SCOPE ( objects  )

Variable Documentation

◆ MAX_OFFSET_ERROR

const Uint4 MAX_OFFSET_ERROR = 5

Definition at line 62 of file win_mask_dup_table.cpp.

Referenced by tracker::operator()().

◆ MIN_MATCH_COUNT

const Uint4 MIN_MATCH_COUNT = 4

Definition at line 63 of file win_mask_dup_table.cpp.

Referenced by tracker::operator()(), and tracker::~tracker().

◆ MIN_SEQ_LENGTH

const Uint4 MIN_SEQ_LENGTH = 50000

Definition at line 60 of file win_mask_dup_table.cpp.

Referenced by CheckDuplicates().

◆ SAMPLE_LENGTH

const Uint4 SAMPLE_LENGTH = 100

Definition at line 58 of file win_mask_dup_table.cpp.

Referenced by dup_lookup_table::add_seq_info(), and CheckDuplicates().

◆ SAMPLE_SKIP

const Uint4 SAMPLE_SKIP = 10000
Modified on Sat Feb 24 07:48:41 2024 by modify_doxy.py rev. 669887