32 #ifndef ALGO_SEQUENCE___POLYA__HPP
33 #define ALGO_SEQUENCE___POLYA__HPP
51 template <
typename Iterator>
60 template <
typename Iterator>
70 template <
typename Iterator>
80 template <
typename Iterator>
85 TSeqPos max_following_bases = 0);
91 template<
typename Iterator>
102 case 'A':
return 'T';
103 case 'T':
return 'A';
104 case 'C':
return 'G';
105 case 'G':
return 'C';
106 default:
return *
tmp;
177 template <
typename ForwardIterator1,
typename ForwardIterator2>
178 ForwardIterator1
ItrSearch(ForwardIterator1 first1, ForwardIterator1 last1,
179 ForwardIterator2 first2, ForwardIterator2 last2)
181 ptrdiff_t d1 = last1 - first1;
182 ptrdiff_t d2 = last2 - first2;
187 ForwardIterator1 current1 = first1;
188 ForwardIterator2 current2 = first2;
190 while (current2 != last2) {
191 if (!(*current1 == *current2)) {
203 return (current2 == last2) ? first1 : last1;
210 template <
typename Iterator>
221 template <
typename Iterator>
224 string motif1(
"AATAAA");
225 string motif2(
"ATTAAA");
227 Iterator pos = begin;
229 Iterator uStrmMotif = pos;
230 while (uStrmMotif != end) {
232 uStrmMotif =
ItrSearch(pos, end, motif1.begin(), motif1.end());
233 if (uStrmMotif == end) {
234 uStrmMotif =
ItrSearch(pos, end, motif2.begin(), motif2.end());
237 if (uStrmMotif != end) {
238 if (end - uStrmMotif < 16) {
241 pos = uStrmMotif + 15;
244 Iterator maxCleavage = (end - pos < 21) ? end : pos + 21;
246 while (pos < maxCleavage) {
247 unsigned int aRun = 0;
248 for (++pos; pos < maxCleavage && aRun < 3; ++pos) {
256 Iterator cleavageSite = pos - aRun;
259 unsigned int numA = 0, numOther = 0;
260 for (Iterator p = cleavageSite; p < end; ++p) {
268 for(Iterator p = end - 1;
269 p >= cleavageSite &&
TSeqPos(end - p) <= max_following_bases+1;
271 if (numOther + numA > 0 &&
272 ((
double) numA / (numA+numOther)) > 0.95) {
275 return TSeqRange(cleavageSite - begin, p - begin);
295 template<
typename Iterator>
306 cleavageSite = tail.
GetTo();
315 template <
typename Iterator>
329 max_following_bases);
332 int seqLen = end - begin;
333 tail_result.
Set(seqLen - 1 - tail.
GetTo(),
Implementation [in header because of templates].
bool operator!=(const CRevComp_It &it) const
bool operator<(const CRevComp_It &it) const
CRevComp_It & operator-=(int i)
bool operator==(const CRevComp_It &it) const
CRevComp_It operator++(int)
bool operator>=(const CRevComp_It &it) const
CRevComp_It operator--(int)
CRevComp_It(const Iterator &it)
CRevComp_It operator+(int i) const
bool operator>(const CRevComp_It &it) const
CRevComp_It & operator+=(int i)
CRevComp_It & operator--(void)
CRevComp_It operator-(int i) const
char operator*(void) const
bool operator<=(const CRevComp_It &it) const
CRevComp_It & operator++(void)
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
position_type GetLength(void) const
TThisType & Set(position_type from, position_type to)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
TSeqRange FindPolyARange(Iterator begin, Iterator end, TSeqPos max_following_bases)
PRE : two random access iterators pointing to sequence data [begin, end); maximum number of non-A bas...
TSignedSeqPos FindPolyA(Iterator begin, Iterator end)
PRE : two random access iterators pointing to sequence data [begin, end) POST: poly-A tail cleavage s...
EPolyTail FindPolyTail(Iterator begin, Iterator end, TSignedSeqPos &cleavageSite, TSeqPos min_length=1)
PRE : two random access iterators pointing to sequence data [begin, end); minimum length for tail POS...
ForwardIterator1 ItrSearch(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, ForwardIterator2 last2)