33 #ifndef C_SR_SEARCH_IMPL_HPP
34 #define C_SR_SEARCH_IMPL_HPP
46 template< typename index_t >
55 index_impl_( dynamic_cast<
TIndex & >(*index) )
60 typedef typename TIndex::TOffsetIterator
TIter;
70 Uint4 idx,
bool fw_strand,
71 CResCache & res_cache, vector< TIter > & single_res_cache,
72 const vector< TSeqPos > & positions );
76 const vector< TSeqPos > & positions,
80 vector< TIter > & cache,
81 vector< Uint1 > & cache_set );
83 bool searchOneMismatch(
85 const vector< TSeqPos > & positions,
89 vector< TIter > & scache,
90 const vector< Uint1 > & scache_set,
94 vector< TIter > single_res_cache[2];
95 vector< Uint1 > single_res_cache_set[2];
96 vector< TIter > scache1[2];
97 vector< TIter > scache2[2];
98 vector< Uint1 > scache_set1[2];
99 vector< Uint1 > scache_set2[2];
103 template<
typename index_t >
106 vector< TIter > & single_res_cache,
107 const vector< TSeqPos > & positions )
109 res_cache.
set( idx, fw_strand );
112 if( idx != single_res_cache.size() ) {
113 for(
Uint4 i = 0;
i < single_res_cache.size(); ++
i )
116 TIter & s = single_res_cache[
i];
119 if(
r.empty() ) copyOffsets(
r, s );
121 Uint4 p = (idx == 0) ? pos - hkey_width_ : pos;
122 mergeOffsets(
r, s, p );
123 if(
r.empty() )
break;
126 else {
r.clear();
break; }
130 for(
Uint4 i = 0, pos = 0;
131 i < single_res_cache.size() - 2; ++
i, pos += hkey_width_ ) {
132 TIter & s = single_res_cache[
i];
135 if(
r.empty() ) copyOffsets(
r, s );
137 mergeOffsets(
r, s, pos );
138 if(
r.empty() )
break;
141 else {
r.clear();
break; }
147 template<
typename index_t >
154 index_impl_.getLOff(
offset ) };
162 template<
typename index_t >
172 for( TSRResults::iterator ires =
results.begin();
173 ires !=
results.end(); ++ires ) {
178 offres.
seqnum = index_impl_.getLId( o );
179 offres.
soff = index_impl_.getLOff( o );
181 if( offres.
seqnum > ires->seqnum ||
182 ( offres.
seqnum == ires->seqnum &&
183 offres.
soff >= ires->soff + step ) )
185 else if( !
offsets.Advance() )
break;
190 if( offres.
seqnum == ires->seqnum &&
191 offres.
soff == ires->soff + step )
203 template<
typename index_t >
206 const vector< TSeqPos > & positions,
208 vector< TIter > & cache, vector< Uint1 > & cache_set )
214 if(
ambig )
return true;
215 cache_set[cache_idx] =
true;
217 offsets = index_impl_.OffsetIterator(
key, hkey_width_ );
223 while( (++
i) != positions.size() ) {
225 key = getNMer( seq, pos, fw_strand,
ambig );
226 if(
ambig )
return true;
228 offsets = index_impl_.OffsetIterator(
key, hkey_width_ );
230 cache_set[cache_idx] =
true;
231 if(
offsets.end() ) max_pos = pos + hkey_width_;
235 else max_pos = hkey_width_;
241 template<
typename index_t >
243 const CSeqVector & seq,
const vector< TSeqPos > & positions,
245 vector< TIter > & scache,
const vector< Uint1 > & scache_set,
249 results_info.
clear();
250 static const Uint1 letters[4] = {
'A',
'C',
'G',
'T' };
252 for(
TSeqPos i = 0;
i < positions.size(); ++
i )
253 if( !scache_set[
i] ) {
255 if(
ambig )
return true;
256 scache[
i] = index_impl_.OffsetIterator(
key, hkey_width_ );
261 pair< TSeqPos, TSeqPos >
range =
262 Pos2Index(
i, seq.
size(), mismatch_info );
264 if( !cache.
is_set( mismatch_info.
idx, fw_strand ) ) {
266 mismatch_info.
idx, fw_strand,
267 cache, scache, positions );
271 if(
r.empty() ) {
i =
range.second;
continue; }
275 Uint1 orig_letter = seq[p];
277 for(
Uint4 j = 0; j < 4; ++j )
278 if( letters[j] != orig_letter ) {
279 Uint1 subst_letter = letters[j];
282 fw_strand,
ambig,
i, subst_letter );
283 if(
ambig )
return true;
285 index_impl_.OffsetIterator(
key, hkey_width_ );
290 r_entry.
init(
i, subst_letter, mismatch_info.
key_pos[0] );
298 fw_strand,
ambig,
i, subst_letter );
299 if(
ambig )
return true;
301 index_impl_.OffsetIterator(
309 if( mismatch_info.
idx == 0 )
310 mergeResults(
results,
r, hkey_width_ );
326 template<
typename index_t >
330 bool paired = (sdata.
seq_2 != 0);
340 vector< TSeqPos > positions1 = GetQNmerPositions( sz1 );
341 vector< TSeqPos > positions2 = GetQNmerPositions( sz2 );
343 if( positions1.empty() || positions2.empty() )
return;
345 TSeqPos maxpos1[2] = { sz1, sz1 };
346 TSeqPos maxpos2[2] = { sz2, sz2 };
356 for(
Uint1 strand = 0; strand < 2; ++strand ) {
357 scache1[strand].clear();
358 scache_set1[strand].clear();
359 scache1[strand].resize( positions1.size() );
360 scache_set1[strand].resize( positions1.size(), 0 );
361 hk_data_.exact_1[strand].clear();
363 seq1, positions1, strand,
364 hk_data_.exact_1[strand], maxpos1[strand],
365 scache1[strand], scache_set1[strand] );
367 scache2[strand].clear();
368 scache_set2[strand].clear();
369 scache2[strand].resize( positions2.size() );
370 scache_set2[strand].resize( positions2.size(), 0 );
371 hk_data_.exact_2[strand].clear();
373 seq2, positions2, strand,
374 hk_data_.exact_2[strand], maxpos2[strand],
375 scache2[strand], scache_set2[strand] );
381 bool matches_found =
false;
384 if( !hk_data_.exact_1[
i].empty() )
385 for(
Uint1 j = 0; j < 2; ++j )
386 if( !hk_data_.exact_2[j].empty() ) {
390 hk_data_.exact_1[
i], hk_data_.exact_2[j],
394 matches_found =
true;
406 const bool & mismatch = sdata.
mismatch;
408 if( mismatch && !matches_found ) {
409 matches_found =
false;
410 res_cache1.
init( positions1.size() + 1 );
411 res_cache2.
init( positions2.size() + 1 );
421 for(
Uint1 strand = 0; strand < 2; ++strand ) {
422 ambig = searchOneMismatch(
423 seq1, positions1, maxpos1[strand], strand,
424 hk_data_.mm_1[strand],
425 scache1[strand], scache_set1[strand],
428 ambig = searchOneMismatch(
429 seq2, positions2, maxpos2[strand], strand,
430 hk_data_.mm_2[strand],
431 scache2[strand], scache_set2[strand],
439 if( !hk_data_.exact_1[
i].empty() ) {
440 for(
Uint1 j = 0; j < 2; ++j ) {
441 if( !hk_data_.mm_2[j].empty() ) {
443 for(
Uint4 ind = 0; ind < hk_data_.mm_2[j].size(); ++ind ) {
452 matches_found =
true;
470 if( !hk_data_.mm_1[
i].empty() ) {
471 for(
Uint1 j = 0; j < 2; ++j ) {
472 if( !hk_data_.exact_2[j].empty() ) {
474 for(
Uint4 ind = 0; ind < hk_data_.mm_1[
i].size(); ++ind ) {
479 it.
results, hk_data_.exact_2[j],
483 matches_found =
true;
499 if( !hk_data_.mm_2[j].empty() ) {
501 for(
Uint4 iind = 0; iind < hk_data_.mm_1[
i].size(); ++iind ) {
502 for(
Uint4 jind = 0; jind < hk_data_.mm_2[j].size(); ++jind ) {
513 matches_found =
true;
537 if( !matches_found ) {
538 matches_found =
false;
550 if( !hk_data_.exact_1[
i].empty() ) {
551 matches_found =
true;
555 sz1, hk_data_.exact_1[
i],
i,
556 false, 0, (
Uint1)
'-', 0, 1 ) ) {
561 if( !matches_found ) {
570 if( !hk_data_.mm_1[
i].empty() ) {
571 matches_found =
true;
573 for(
Uint4 ind = 0; ind < hk_data_.mm_1[
i].size(); ++ind ) {
591 if( !matches_found ) outres.
level_1 = EM;
593 matches_found =
false;
600 nr += outres.
res.size();
603 if( !hk_data_.exact_2[
i].empty() ) {
606 sz2, hk_data_.exact_2[
i],
i,
607 false, 0, (
Uint1)
'-', 0, 2 ) ) {
611 matches_found =
true;
614 if( !matches_found ) {
620 nr += outres.
res.size();
623 if( !hk_data_.mm_2[
i].empty() )
624 for(
Uint4 ind = 0; ind < hk_data_.mm_2[
i].size(); ++ind ) {
645 const bool & mismatch = sdata.
mismatch;
648 vector< TSeqPos > positions = GetQNmerPositions( sz );
649 bool exact_matches_found =
false;
650 TSeqPos max_mismatch_pos[2] = { sz, sz };
660 if( !positions.empty() ) {
661 bool fw_strand =
true;
662 res_cache.
init( positions.size() + 1 );
666 single_res_cache[fw_strand].clear();
667 single_res_cache_set[fw_strand].clear();
668 single_res_cache[fw_strand].resize( positions.size() );
669 single_res_cache_set[fw_strand].resize(
670 positions.size(),
false );
675 seq, positions, fw_strand,
results,
676 max_mismatch_pos[fw_strand],
677 single_res_cache[fw_strand],
678 single_res_cache_set[fw_strand] );
681 if( reportResults( outres,
nr, sz,
results, fw_strand ) ) {
686 fw_strand = !fw_strand;
687 if( !
results.empty() ) exact_matches_found =
true;
688 }
while( !fw_strand );
693 if( mismatch && !exact_matches_found ) {
694 bool fw_strand =
true;
705 fw_strand = !fw_strand;
707 ambig = searchOneMismatch(
709 max_mismatch_pos[fw_strand], fw_strand,
711 single_res_cache[fw_strand],
712 single_res_cache_set[fw_strand],
716 for(
Uint4 ind = 0; ind < results_info.size(); ++ind ) {
720 sz, it.
results, fw_strand, mismatch,
727 }
while( !fw_strand );
TSRResults & at(Uint4 idx, bool fw_strand)
bool is_set(Uint4 idx, bool fw_strand) const
void set(Uint4 idx, bool fw_strand)
bool searchOneMismatch(const CSeqVector &seq, const vector< TSeqPos > &positions, Uint4 max_pos, bool fw_strand, CMismatchResultsInfo &results_info, vector< TIter > &scache, const vector< Uint1 > &scache_set, CResCache &cache)
void mergeOffsets(TSRResults &resutls, TIter &offsets, Uint4 step)
CSRSearch_Impl(CRef< CDbIndex > index, TSeqPos d, TSeqPos dfuzz)
void copyOffsets(TSRResults &resutls, TIter &offsets)
virtual void search(const SSearchData &sdata, TResults &results)
bool searchExact(const CSeqVector &seq, const vector< TSeqPos > &positions, bool fw_strand, TSRResults &results, TSeqPos &max_pos, vector< TIter > &cache, vector< Uint1 > &cache_set)
TIndex::TOffsetIterator TIter
void setResults4Idx(Uint4 idx, bool fw_strand, CResCache &res_cache, vector< TIter > &single_res_cache, const vector< TSeqPos > &positions)
vector< SSRResult > TSRResults
vector< TSRPairedResult > TSRPairedResults
unsigned int TSeqPos
Type for sequence locations and lengths.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::KEY key
Magic spell ;-) needed for some weird compilers... very empiric.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static PCRE2_SIZE * offsets
static SLJIT_INLINE sljit_ins nr(sljit_gpr dst, sljit_gpr src)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
USING_SCOPE(ncbi::objects)
void init(TSeqPos pos, Uint4 letter, Uint1 adj)
TSeqPos mismatch_position
vector< SResultData > res
static bool ambig(char c)