NCBI C++ ToolKit
|
#include <ncbi_pch.hpp>
#include <vector>
#include <string>
#include <map>
#include <corelib/ncbitype.h>
#include <corelib/ncbistre.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seqloc/Seq_id.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/scope.hpp>
#include <objmgr/seq_entry_handle.hpp>
#include <objmgr/bioseq_ci.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/util/sequence.hpp>
#include <algo/winmask/win_mask_dup_table.hpp>
#include <algo/winmask/win_mask_util.hpp>
Go to the source code of this file.
Go to the SVN repository for this file.
Classes | |
class | dup_lookup_table |
struct | dup_lookup_table::sample_loc |
class | dup_lookup_table::sample |
class | tracker |
struct | tracker::result |
Functions | |
USING_SCOPE (objects) | |
bool | operator< (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs) |
bool | operator> (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs) |
bool | operator== (const dup_lookup_table::sample_loc &lhs, const dup_lookup_table::sample_loc &rhs) |
void | CheckDuplicates (const vector< string > &input, const string &infmt, const CWinMaskUtil::CIdSet *ids, const CWinMaskUtil::CIdSet *exclude_ids) |
Check for possibly duplicate sequences in the input. More... | |
Variables | |
const Uint4 | SAMPLE_LENGTH = 100 |
const Uint4 | SAMPLE_SKIP = 10000 |
const Uint4 | MIN_SEQ_LENGTH = 50000 |
const Uint4 | MAX_OFFSET_ERROR = 5 |
const Uint4 | MIN_MATCH_COUNT = 4 |
void CheckDuplicates | ( | const vector< string > & | input, |
const string & | infmt, | ||
const CWinMaskUtil::CIdSet * | ids, | ||
const CWinMaskUtil::CIdSet * | exclude_ids | ||
) |
Check for possibly duplicate sequences in the input.
input contains the list of input file names. The files should be in the fasta format. The function checks the input sequences for duplication and reports possible duplicates to the standard error.
input | list of input file names |
infmt | input format |
ids | set of ids to check |
exclude_ids | set of ids to ignore |
Definition at line 535 of file win_mask_dup_table.cpp.
References dup_lookup_table::sample::begin(), CWinMaskUtil::consider(), data, CBioseq_Handle::eCoding_Iupac, eGetId_Best, dup_lookup_table::sample::end(), CBioseq_Handle::GetBioseqLength(), GetId(), CObjectManager::GetInstance(), CSeq_id::GetLabel(), CBioseq_Handle::GetSeqVector(), i, input(), MIN_SEQ_LENGTH, om, SAMPLE_LENGTH, SAMPLE_SKIP, and table.
Referenced by CWinMaskCountsGenerator::operator()().
|
inline |
Definition at line 210 of file win_mask_dup_table.cpp.
References false, dup_lookup_table::sample_loc::offset, dup_lookup_table::sample_loc::seqnum, and true.
|
inline |
Definition at line 242 of file win_mask_dup_table.cpp.
|
inline |
Definition at line 228 of file win_mask_dup_table.cpp.
USING_SCOPE | ( | objects | ) |
Definition at line 62 of file win_mask_dup_table.cpp.
Referenced by tracker::operator()().
Definition at line 63 of file win_mask_dup_table.cpp.
Referenced by tracker::operator()(), and tracker::~tracker().
Definition at line 60 of file win_mask_dup_table.cpp.
Referenced by CheckDuplicates().
Definition at line 58 of file win_mask_dup_table.cpp.
Referenced by dup_lookup_table::add_seq_info(), and CheckDuplicates().
Definition at line 59 of file win_mask_dup_table.cpp.
Referenced by dup_lookup_table::add_seq_info(), CheckDuplicates(), tracker::operator()(), tracker::report_match(), and tracker::~tracker().