53 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
59 #ifndef SKIP_DOXYGEN_PROCESSING
73 data.resize(sequence.length());
74 for (
size_t i=0;
i < sequence.length();
i++) {
77 bioseq->
SetInst().SetLength((
unsigned int)sequence.length());
80 bioseq->
SetId().clear();
86 template <
class TKmerCounts>
97 counts.Reset(seqloc, *scope);
105 const string seq1 =
"AAAAAAAAA";
106 const string seq2 =
"BBBBBBBBBB";
109 for (
int i=3;
i < 6;
i++) {
110 for (
int j=15;j <= 20;j++) {
138 const string seq_x =
"AXAXAXAXAXAXAX";
139 const string seq_xonly =
"XXXXXXXXXXXXX";
143 for (
int i=3;
i < 6;
i++) {
154 const string seq_short =
"ABC";
193 const int kKmerLen = 4;
198 vector< CRef<CSeq_loc> > seqs;
199 vector<CSparseKmerCounts> counts_vect;
200 TKMethods::TDistMatrix dmat;
203 BOOST_REQUIRE_EQUAL(status, 0);
204 BOOST_REQUIRE(seqs.size() > 0);
205 BOOST_REQUIRE(!scope.
Empty());
208 BOOST_CHECK_THROW(TKMethods::ComputeCounts(vector<
CRef<CSeq_loc> >(),
209 *scope, counts_vect),
219 TKMethods::ComputeCounts(seqs, *scope, counts_vect);
220 BOOST_REQUIRE_EQUAL(counts_vect.size(), seqs.size());
222 TKMethods::ComputeDistMatrix(counts_vect,
223 TKMethods::eFractionCommonKmersGlobal, dmat);
225 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
226 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
228 TKMethods::ComputeDistMatrix(counts_vect,
229 TKMethods::eFractionCommonKmersLocal, dmat);
231 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
232 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
238 TKMethods::ComputeCounts(seqs, *scope, counts_vect);
239 BOOST_REQUIRE_EQUAL(counts_vect.size(), seqs.size());
241 TKMethods::ComputeDistMatrix(counts_vect,
242 TKMethods::eFractionCommonKmersGlobal, dmat);
244 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
245 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
247 TKMethods::ComputeDistMatrix(counts_vect,
248 TKMethods::eFractionCommonKmersLocal, dmat);
250 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
251 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
256 TKMethods::ComputeCounts(seqs, *scope, counts_vect);
257 BOOST_REQUIRE_EQUAL(counts_vect.size(), seqs.size());
259 TKMethods::ComputeDistMatrix(counts_vect,
260 TKMethods::eFractionCommonKmersGlobal, dmat);
262 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
263 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
265 TKMethods::ComputeDistMatrix(counts_vect,
266 TKMethods::eFractionCommonKmersLocal, dmat);
268 BOOST_REQUIRE_EQUAL(dmat.GetRows(), seqs.size());
269 BOOST_CHECK_CLOSE(dmat(0, 1), 0.0, 1e-6);
static const int kAlphabetSize
The aligner internally works only with the ncbistdaa alphabet.
Exception class for Kmer counts.
Kmer counts for alignment free sequence similarity computation implemented as a sparse vector.
static unsigned int GetKmerLength(void)
Get default kmer length.
static void SetUseCompressed(bool use_comp)
Set default option for using compressed alphabet.
static void SetAlphabetSize(unsigned size)
Set Default alphabet size.
unsigned int GetNumCounts(void) const
Get number of all k-mers found in the sequence.
static unsigned int CountCommonKmers(const CSparseKmerCounts &v1, const CSparseKmerCounts &v2, bool repetitions=true)
Copmute number of common kmers between two count vectors.
static void SetKmerLength(unsigned len)
Set default k-mer length.
static vector< Uint1 > & SetTransTable(void)
Set default compressed alphabet letter translation table.
Interface for computing and manipulating k-mer counts vectors that allows for different implementatio...
int ReadFastaQueries(const string &filename, vector< CRef< objects::CSeq_loc > > &seqs, CRef< objects::CScope > &scope, bool parse_deflines, objects::CSeqIdGenerator *id_generator)
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
virtual void SetParams()
Called at the beginning of Run, before creating thread pool.
TId & SetId(void)
Assign a value to Id data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
BOOST_AUTO_TEST_CASE(TestSparseKmerCounts)
static CRef< CBioseq > s_CreateBioseq(const string &sequence, int id)
static void s_CreateKmerCounts(const string &seq, TKmerCounts &counts)
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Utility stuff for more convenient using of Boost.Test library.