NCBI C++ ToolKit
primercheck.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 
2 /* $Id: primercheck.cpp 101947 2024-03-07 19:10:58Z jianye $
3  * ===========================================================================
4  *
5  * PUBLIC DOMAIN NOTICE
6  * National Center for Biotechnology Information
7  *
8  * This software/database is a "United States Government Work" under the
9  * terms of the United States Copyright Act. It was written as part of
10  * the author's official duties as a United States Government employee and
11  * thus cannot be copyrighted. This software/database is freely available
12  * to the public for use. The National Library of Medicine and the U.S.
13  * Government have not placed any restriction on its use or reproduction.
14  *
15  * Although all reasonable efforts have been taken to ensure the accuracy
16  * and reliability of the software and data, the NLM and the U.S.
17  * Government do not and cannot warrant the performance or results that
18  * may be obtained by using this software or data. The NLM and the U.S.
19  * Government disclaim all warranties, express or implied, including
20  * warranties of performance, merchantability or fitness for any particular
21  * purpose.
22  *
23  * Please cite the author in any work or product based on this material.
24  *
25  * ===========================================================================
26  *
27  * Author: Jian Ye
28  */
29 
30 /** @file primercheck.hpp
31  * primer specificity checking tool
32  *
33  */
34 
35 #include <ncbi_pch.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbienv.hpp>
38 #include <corelib/ncbiargs.hpp>
39 #include <corelib/ncbistre.hpp>
40 #include <serial/objistr.hpp>
41 #include <serial/objostr.hpp>
42 #include <serial/serial.hpp>
46 #include <objmgr/seq_vector.hpp>
50 
51 
52 
58 
59 
63 
64 USING_SCOPE (sequence);
65 
66 static const double k_MinOverlapLenFactor = 0.45;
67 static const double k_Min_Percent_Identity = 0.64999;
68 static const int k_MaxReliableGapNum = 3;
69 
71  CSeq_align_set& input_seqalign,
72  CScope &scope,
73  int word_size,
74  TSeqPos allowed_total_mismatch,
75  TSeqPos allowed_3end_mismatch,
76  TSeqPos max_mismatch)
77  : m_TemplateHandle(template_handle),
78  m_Id(template_handle.GetSeqId()),
79  m_WordSize(word_size),
80  m_AllowedTotalMismatch(allowed_total_mismatch),
81  m_Allowed3EndMismatch(allowed_3end_mismatch),
82  m_MaxMismatch(max_mismatch),
83  m_UseITree(false),
84  m_MismatchRegionLength3End(10),
85  m_MaxHSPSize(0),
86  m_NumNonSpecificTarget(20),
87  m_MaxTargetPerSequence(100)
88 {
89  x_SortHit(input_seqalign);
90  if(!input_seqalign.Get().empty()){
92  scope.GetBioseqHandle(input_seqalign.Get().front()
93  ->GetSeq_id(0)).GetBioseqLength());
94  }
95  const CRef<CSeq_id> wid
96  = FindBestChoice(template_handle.GetBioseqCore()->GetId(),
101 }
102 
104 {
105  for (TSeqPos i = 0; i < m_SortHit.size(); i ++) {
106  for (TSeqPos j = 0; j < m_SortHit[i].first.size(); j ++) {
107  delete m_SortHit[i].first[j];
108  }
109  for (TSeqPos j = 0; j < m_SortHit[i].second.size(); j ++) {
110  delete m_SortHit[i].second[j];
111  }
112  }
113 
114  for (int i = 0; i < (int)m_RangeTreeListPlusStrand.size(); i ++) {
116  };
117 
118  for (int i = 0; i < (int)m_RangeTreeListMinusStrand.size(); i ++) {
120  };
121 
122 }
123 
125  CScope & scope)
126  : m_Hits(hits),
127  m_Scope(&scope),
128  m_FeatureScope(NULL)
129 {
130  m_SlaveRangeCache.resize(m_Hits->m_SortHit.size());
131  if (m_Hits->m_MaxHSPSize > 0) {
136  }
137 }
138 
140 {
141  if (m_Hits->m_MaxHSPSize > 0) {
146  };
147 }
148 
149 static void s_CountGaps(const string& xcript,
150  TSeqPos& master_start_gap,
151  TSeqPos& master_end_gap,
152  TSeqPos& slave_start_gap,
153  TSeqPos& slave_end_gap,
154  char master_gap_char,
155  char slave_gap_char) {
156 
157  for(int i = 0; i < (int)xcript.size(); i ++){
158  if (xcript[i] == master_gap_char) {
159  master_start_gap ++;
160  } else {
161  break;
162  }
163  }
164 
165  for(int i = (int)xcript.size() - 1; i >= 0; i--){
166  if (xcript[i] == master_gap_char) {
167  master_end_gap ++;
168  } else {
169  break;
170  }
171  }
172 
173  if (master_start_gap == 0) {//can only have gap on either master or slave, not both
174  for(SIZE_TYPE i = 0; i < xcript.size(); i ++){
175  if (xcript[i] == slave_gap_char) {
176  slave_start_gap ++;
177  } else {
178  break;
179  }
180  }
181  }
182  if (master_end_gap == 0){
183  for(int i = (int)xcript.size() - 1; i >= 0; i--){
184  if (xcript[i] == slave_gap_char) {
185  slave_end_gap ++;
186  } else {
187  break;
188  }
189  }
190  }
191 }
192 
194  bool& end_gap,
195  TSeqPos& num_total_mismatch,
196  TSeqPos& num_3end_mismatch,
197  TSeqPos& num_total_gap,
198  TSeqPos& num_3end_gap,
199  bool is_left_primer,
200  int& max_num_continuous_match,
201  CRange<TSignedSeqPos>& aln_range) {
202 
203 
204  int num_continuous_match = 0;
205  num_total_mismatch = 0;
206  num_3end_mismatch = 0;
207  num_total_gap = 0;
208  num_3end_gap = 0;
209  max_num_continuous_match = 0;
210 
211  CRef<CAlnVec> av(new CAlnVec(primer_denseg, *m_Scope));
212  //need to calculate the mismatch and gaps.
213  aln_range.SetFrom (0);
214  aln_range.SetTo(av->GetAlnStop());
215  av->SetGapChar('-');
216  av->SetEndChar('-');
217  string master_string;
218  av->GetAlnSeqString(master_string, 0, aln_range);
219  string slave_string;
220  av->GetAlnSeqString(slave_string, 1, aln_range);
221  char gap_char = av->GetGapChar(0);
222  int num_bp = 0;
223  int master_letter_len = 0;
224  for(int i=0; i< (int)master_string.size(); i++){
225  if (master_string[i] != gap_char) {
226  master_letter_len ++;
227  }
228  }
229  for(int i=0; i < (int)master_string.size(); i++){
230 
231  if (master_string[i] == gap_char) {
232  if (is_left_primer) {
233  if (num_bp > (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
234  num_3end_gap ++;
235  }
236  } else {
237  if (num_bp < (int)(m_Hits->m_MismatchRegionLength3End - 1)){
238  num_3end_gap ++;
239  }
240  }
241  num_total_gap ++;
242  } else if (slave_string[i] == gap_char) {
243  if (is_left_primer) {
244  if (num_bp > (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
245  num_3end_gap ++;
246  }
247  } else {
248  if (num_bp < (int)(m_Hits->m_MismatchRegionLength3End - 1)){
249  num_3end_gap ++;
250  }
251  }
252  num_bp ++;
253  num_total_gap ++;
254  } else if(master_string[i]!=slave_string[i]){
255  if (is_left_primer) {
256  if (num_bp >= (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
257  num_3end_mismatch ++;
258  }
259  } else {
260  if (num_bp < (int)m_Hits->m_MismatchRegionLength3End){
261  num_3end_mismatch ++;
262  }
263  }
264  num_total_mismatch ++;
265  num_bp ++;
266  } else {
267  num_bp ++;
268  }
269  if(master_string[i]== slave_string[i]){
270  num_continuous_match ++;
271  if (max_num_continuous_match < num_continuous_match) {
272  max_num_continuous_match = num_continuous_match;
273  }
274  } else {
275  //reset
276  num_continuous_match = 0;
277  }
278  }
279  // cerr<< "master = " << master_string << endl;
280  // cerr<< "slave = " << slave_string << endl;
281  if (master_string[0] == gap_char ||
282  master_string[(int)master_string.size() - 1] == gap_char ||
283  slave_string[0] == gap_char ||
284  slave_string[(int)slave_string.size() - 1] == gap_char ||
285  (int)num_total_gap >= k_MaxReliableGapNum) {
286  //blast local alignment ends with gaps may not be an accurate alignment
287  end_gap = true;
288  }
289 }
290 
291 CRef<CDense_seg> s_DoNWalign (const CRange<TSeqPos>& desired_align_range,
292  string& master_seq,
293  const CAlnVec& av,
294  TSeqPos hit_full_start,
295  TSeqPos hit_full_stop,
296  ENa_strand hit_strand,
297  string& xcript,
298  bool& nw_align_modified) {
299  nw_align_modified = false;
300  CRef<CDense_seg> den_ref (NULL);
301  string hit_seq;
302  const CBioseq_Handle& hit_handle = av.GetBioseqHandle(1);
303  if (hit_strand == eNa_strand_minus) {
307  hit_full_stop - 1,
308  (int)av.GetBioseqHandle(1).GetBioseqLength() -
309  hit_full_start, hit_seq);
310  // cerr << "strand minus" << endl;
311  } else {
314  GetSeqData(hit_full_start, hit_full_stop + 1, hit_seq);
315  }
316  //cerr << "global master=" << master_seq << " hit=" << hit_seq << endl;
317 
318  CRef<CNWAligner> aligner (new CNWAligner(master_seq, hit_seq));
319  aligner->SetWm(1);
320  aligner->SetWms(-1);
321  aligner->SetWg(-1);
322  aligner->SetWs(-4);
323  aligner->SetScoreMatrix(NULL);
324  aligner->Run();
325  xcript = aligner->GetTranscriptString();
326  //cerr << "original script=" << xcript << endl;
327  den_ref = aligner->GetDense_seg(desired_align_range.GetFrom(),
329  av.GetSeqId(0),
330  (hit_strand ==
331  eNa_strand_minus ? hit_full_stop :
332  hit_full_start),
333  hit_strand,
334  av.GetSeqId(1));
335 
336  /* unique_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, cerr));
337  cerr << "original denseg:" << endl;
338  *out << *den_ref;
339  cerr << endl;*/
340  TSeqPos master_start_gap = 0;
341  TSeqPos master_end_gap = 0;
342  TSeqPos slave_start_gap = 0;
343  TSeqPos slave_end_gap = 0;
344  s_CountGaps(xcript, master_start_gap, master_end_gap, slave_start_gap, slave_end_gap, 'I', 'D');
345  if (slave_start_gap > 0 || slave_end_gap > 0) {
346 
347  //extending slave row
348  TSeqPos new_hit_full_start;
349  TSeqPos new_hit_full_stop;
350  if (av.IsPositiveStrand(1)) {
351  new_hit_full_start = max((int)(hit_full_start - slave_start_gap), 0);
352  new_hit_full_stop = min(hit_full_stop + slave_end_gap, av.GetBioseqHandle(1).
353  GetBioseqLength() - 1);
354  } else {
355  new_hit_full_start = max((int)(hit_full_start - slave_end_gap), 0);
356  new_hit_full_stop = min(hit_full_stop + slave_start_gap, av.GetBioseqHandle(1).
357  GetBioseqLength() - 1);
358  }
359 
360  /* cerr << "in hit_full_start =" << hit_full_start << endl;
361  cerr << "in hit_full_stop =" << hit_full_stop << endl;
362  cerr << "in new_hit_full_start =" << new_hit_full_start << endl;
363  cerr << "in new_hit_full_stop =" << new_hit_full_stop << endl;*/
364  //realign again only if hit seq can extend (i.e., not at seq end or start)
365  if (!(new_hit_full_start == hit_full_start &&
366  new_hit_full_stop == hit_full_stop)) {
367  if (av.IsPositiveStrand(1)) {
368  for(int i = hit_full_start - new_hit_full_start - 1; i >= 0 ; i --){
369  xcript[i] = 'M';
370  }
371  for(int i = (int)xcript.size() - 1 - (new_hit_full_stop - hit_full_stop); i < (int)xcript.size(); i ++) {
372  xcript[i] = 'M';
373  }
374  } else {
375  for(int i = new_hit_full_stop - hit_full_stop - 1; i >= 0 ; i --){
376  xcript[i] = 'M';
377  }
378  for(int i = (int)xcript.size() - 1 - (hit_full_start - new_hit_full_start); i < (int)xcript.size(); i ++) {
379  xcript[i] = 'M';
380  }
381 
382  }
383  hit_full_start = new_hit_full_start;
384  hit_full_stop = new_hit_full_stop;
385  den_ref = new CDense_seg;
386  den_ref->FromTranscript(desired_align_range.GetFrom(),
388  (hit_strand ==
389  eNa_strand_minus ? new_hit_full_stop :
390  new_hit_full_start),
391  hit_strand,
392  xcript);
393  CRef<CSeq_id> master_id (new CSeq_id);
394  CRef<CSeq_id> slave_id (new CSeq_id);
395  master_id->Assign(av.GetSeqId(0));
396  slave_id->Assign(av.GetSeqId(1));
397  den_ref->SetIds().push_back(master_id);
398  den_ref->SetIds().push_back(slave_id);
399  nw_align_modified = true;
400  }
401  }
402 
403 
404  if (master_start_gap > 0 || master_end_gap > 0) {
405  //deleting the master gaps
406  xcript = xcript.substr(master_start_gap);
407  xcript = xcript.substr(0, xcript.size() - master_end_gap);
408 
409  TSeqPos new_hit_full_start;
410  TSeqPos new_hit_full_stop;
411  if (av.IsPositiveStrand(1)) {
412  new_hit_full_start = hit_full_start + master_start_gap;
413  new_hit_full_stop = hit_full_stop - master_end_gap;
414  } else {
415  new_hit_full_start = hit_full_start + master_end_gap;
416  new_hit_full_stop = hit_full_stop - master_start_gap;
417  }
418  /*
419  cerr << "in hit_full_start =" << hit_full_start << endl;
420  cerr << "in hit_full_stop =" << hit_full_stop << endl;
421  cerr << "in new_hit_full_start =" << new_hit_full_start << endl;
422  cerr << "in new_hit_full_stop =" << new_hit_full_stop << endl;*/
423  den_ref = new CDense_seg;
424  den_ref->FromTranscript(desired_align_range.GetFrom(),
426  (hit_strand ==
427  eNa_strand_minus ? new_hit_full_stop :
428  new_hit_full_start),
429  hit_strand,
430  xcript);
431  CRef<CSeq_id> master_id (new CSeq_id);
432  CRef<CSeq_id> slave_id (new CSeq_id);
433  master_id->Assign(av.GetSeqId(0));
434  slave_id->Assign(av.GetSeqId(1));
435  den_ref->SetIds().push_back(master_id);
436  den_ref->SetIds().push_back(slave_id);
437  nw_align_modified = true;
438  }
439 
440 
441  /* CNWFormatter fmt (*aligner);
442 
443  string text;
444  fmt.AsText(&text, CNWFormatter::eFormatType2);
445  cerr << text << endl;
446 
447  unique_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, cerr));
448  cerr << "final denseg:" << endl;
449  *out << *den_ref;
450  cerr << endl;*/
451  return den_ref;
452 }
453 
454 
456  const CSeq_align& input_hit,
457  TSeqPos& num_total_mismatch,
458  TSeqPos& num_3end_mismatch,
459  TSeqPos& num_total_gap,
460  TSeqPos& num_3end_gap,
461  bool is_left_primer,
462  int& max_num_continuous_match,
463  int& align_length,
464  TSeqPos master_local_start,
465  TSeqPos master_local_stop,
466  ENa_strand hit_strand,
467  bool& nw_align_modified) {
468  /* unique_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, cerr));
469  cerr << endl << "input_hit:" << endl;
470  *out << input_hit;
471  cerr << endl;*/
472 
473  string master_seq;
475  GetSeqData(desired_align_range.GetFrom(),
476  desired_align_range.GetTo() + 1, master_seq);
477  // cerr << "desired_align_range.GetFrom=" << desired_align_range.GetFrom() << endl;
478  //cerr << "desired_align_range.Getto=" << desired_align_range.GetTo() << endl;
479  string hit_seq;
480  //global hit start
481  TSeqPos hit_full_start;
482  TSeqPos hit_full_stop;
483 
484 
485  TSeqPos full_master_start = max(desired_align_range.GetFrom(),
486  master_local_start);
487 
488  TSeqPos full_master_stop = min(desired_align_range.GetTo(), master_local_stop);
489 
490  CRef<CAlnVec> av(new CAlnVec(input_hit.GetSegs().GetDenseg(), *m_Scope));
491 
492  CRange<int> full_master_range(full_master_start, full_master_stop);
493 
494 
495  CRef<CAlnMap::CAlnChunkVec> master_chunk =
496  av->GetSeqChunks(0, full_master_range,
498 
500  int longest_chunk_index = 0;
501  int longest_chunk_size = 0;
502  //find the chunk that is the longest aligned region. We will use that as
503  //basis for finding the sujbect region for global alignment
504  for (int chunk_index = 0; chunk_index < master_chunk->size(); chunk_index ++) {
505 
506  chunk_ref = (*master_chunk)[chunk_index];
507  if (!chunk_ref->IsGap()) {
508  if (chunk_ref->GetAlnRange().GetLength() > longest_chunk_size) {
509  longest_chunk_size = chunk_ref->GetAlnRange().GetLength();
510  longest_chunk_index = chunk_index;
511 
512  }
513  }
514  /* cerr << "longest size=" << longest_chunk_size
515  << " longest index =" << longest_chunk_index
516  << " start = " << chunk_ref->GetAlnRange().GetFrom()
517  << " stop =" << chunk_ref->GetAlnRange().GetTo() << endl;*/
518  }
519  CRange<int> longest_chunk_range = (*master_chunk)[longest_chunk_index]->GetRange();
520 
521  int hit_start_adjust = longest_chunk_range.GetFrom() -
522  desired_align_range.GetFrom();
523  int hit_stop_adjust = desired_align_range.GetTo() -
524  longest_chunk_range.GetTo();
525 
526  if(av->IsPositiveStrand(1)) {
527  hit_full_start =
528  max((int)(av->GetSeqPosFromSeqPos(1, 0, longest_chunk_range.GetFrom(),
529  CAlnMap::eBackwards, true) -
530  hit_start_adjust), 0);
531 
532  hit_full_stop =
533  min(int(av->GetSeqPosFromSeqPos(1, 0, longest_chunk_range.GetTo(),
534  CAlnMap::eBackwards, true) +
535  hit_stop_adjust), (int)av->GetBioseqHandle(1).
536  GetBioseqLength() - 1);
537  } else {
538  hit_full_start =
539  max((int)(av->GetSeqPosFromSeqPos(1, 0, longest_chunk_range.GetTo(),
540  CAlnMap::eBackwards, true) -
541  hit_stop_adjust), 0);
542 
543  hit_full_stop =
544  min(int(av->GetSeqPosFromSeqPos(1, 0, longest_chunk_range.GetFrom(),
545  CAlnMap::eBackwards, true) +
546  hit_start_adjust),
547  (int)av->GetBioseqHandle(1).GetBioseqLength() - 1);
548 
549  }
550  /*cerr << "longest_chunk_range.GetFrom=" << longest_chunk_range.GetFrom() << endl;
551  cerr << "longest_chunk_range.GetTo=" << longest_chunk_range.GetTo() << endl;
552  cerr << "hit_start_adjust="<< hit_start_adjust << endl;
553  cerr << "hit_stop_adjust="<< hit_stop_adjust << endl;
554  cerr << "full_master_start =" << full_master_start << endl;
555  cerr << "full_master_stop =" << full_master_stop << endl;
556  cerr << "hit_full_start =" << hit_full_start << endl;
557  cerr << "hit_full_stop =" << hit_full_stop << endl;
558  */
559  string xcript;
560  nw_align_modified = false;
561  CRef<objects::CDense_seg> den_ref = s_DoNWalign(desired_align_range,
562  master_seq, *av, hit_full_start,
563  hit_full_stop,
564  hit_strand, xcript, nw_align_modified);
565 
566 
567  //sometimes master seq end aligns to an gap which should be deleted on master row
568  //but should be extended on slave row
569 
570  TSeqPos match = 0;
571  TSeqPos total_mismatch = 0;
572  TSeqPos total_insertion = 0;
573  TSeqPos total_deletion = 0;
574  TSeqPos mismatch_3end = 0;
575  TSeqPos insertion_3end = 0;
576  TSeqPos deletion_3end = 0;
577  TSeqPos num_master_gap = 0;
578  int num_continuous_match = 0;
579  max_num_continuous_match = 0;
580 
581  if (!nw_align_modified) {
582 
583 
584  align_length = (int)xcript.size();
585 
586  ITERATE(string, iter, xcript) {
587  switch(*iter) {
588  case 'I':
589  ++ num_master_gap;
590  break;
591  default:
592  break;
593  }
594  }
595 
596  TSeqPos master_letter_len = (TSeqPos)xcript.size() - num_master_gap;
597  int num_bp = 0;
598  ITERATE(string, the_iter, xcript) {
599  switch(*the_iter) {
600 
601  case 'R':
602  if (is_left_primer) {
603  if (num_bp >= (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
604  mismatch_3end ++;
605  }
606  } else {
607  if (num_bp < (int)m_Hits->m_MismatchRegionLength3End){
608  mismatch_3end ++;
609  }
610  }
611  total_mismatch ++;
612  num_bp ++;
613  num_continuous_match = 0;
614  break;
615 
616  case 'M':
617  if (is_left_primer) {
618  if (num_bp >=(int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
619  match ++;
620  }
621  } else {
622  if (num_bp < (int)m_Hits->m_MismatchRegionLength3End){
623  match ++;
624  }
625  }
626  num_bp ++;
627  num_continuous_match ++;
628  if (max_num_continuous_match < num_continuous_match) {
629  max_num_continuous_match = num_continuous_match;
630  }
631  break;
632 
633  case 'I':
634  if (is_left_primer) {
635  if (num_bp > (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
636  insertion_3end ++;
637  }
638  } else {
639  if (num_bp < (int)(m_Hits->m_MismatchRegionLength3End - 1)){
640  insertion_3end ++;
641  }
642  }
643  total_insertion ++;
644  num_bp ++;
645  num_continuous_match = 0;
646  break;
647 
648  case 'D':
649  if (is_left_primer) {
650  if (num_bp > (int)(master_letter_len - m_Hits->m_MismatchRegionLength3End)){
651  deletion_3end ++;
652  }
653  } else {
654  if (num_bp < (int)(m_Hits->m_MismatchRegionLength3End - 1)){
655  deletion_3end ++;
656  }
657  }
658  total_deletion ++;
659  num_continuous_match = 0;
660  break;
661  }
662  }
663 
664  }
665  /* cout << "length =" << xcript.size() << endl;
666  cout << "mis = " << mismatch << endl;
667  cout << "match = " << match << endl;
668  cout << "insert = " << insertion << endl;
669  cout << "delete = " << deletion << endl;*/
670  num_total_mismatch = total_mismatch;
671  num_total_gap = total_insertion + total_deletion;
672  num_3end_mismatch = mismatch_3end;
673  num_3end_gap = insertion_3end + deletion_3end;
674 
675  return den_ref;
676 }
677 
678 
679 /*is_left_primer controls whether mismatch is at the 5' (pass false) or 3' end (pass true).
680 pass true for left or false for right primer if both primers are on the same strand.
681 For self primers...for left primer window, pass true for both left and right primers.
682 For right primers, pass false for both
683 */
685 x_FillGlobalAlignInfo(const CRange<TSeqPos>& desired_align_range,
686  SHspInfo* input_hsp_info,
687  TSeqPos& num_total_mismatch,
688  TSeqPos& num_3end_mismatch,
689  TSeqPos& num_total_gap,
690  TSeqPos& num_3end_gap,
691  bool is_left_primer,
692  TSeqPos index,
693  ENa_strand hit_strand)
694 {
695 
696  num_total_mismatch = 0;
697  num_3end_mismatch = 0;
698  num_total_gap = 0;
699  num_3end_gap = 0;
700  int max_num_continuous_match = 0;
701  CRef<CSeq_align> global_align(NULL);
702  CConstRef<CSeq_align> input_hit = input_hsp_info->hsp;
703  CRange<TSeqPos> master_range = input_hsp_info->master_range;
704  CRange<TSeqPos> hit_range = input_hsp_info->slave_range;
705 
706  CRange<TSeqPos> primer_master_overlap = desired_align_range.
707  IntersectionWith(master_range);
708  //check if primer loc overlaps with hits
709  if (primer_master_overlap.GetLength() >=
710  k_MinOverlapLenFactor*desired_align_range.GetLength()) {
711  SAlnCache cache_id;
712 
713  cache_id.hit_id = index;
714  cache_id.primer_start = desired_align_range.GetFrom();
715  cache_id.primer_stop = desired_align_range.GetTo();
716  cache_id.master_start= master_range.GetFrom();
717  cache_id.hit_start = hit_range.GetFrom();
718  if (hit_strand == eNa_strand_minus) {
719  cache_id.is_positive_strand = false;
720  } else {
721  cache_id.is_positive_strand = true;
722  }
723 
724  if(is_left_primer) {
725  cache_id.is_left_primer = true;
726  } else {
727  cache_id.is_left_primer = false;
728  }
729 
731 
732  if(ii != m_Cache.end() ) {
733  //already cached
734  SPrimerMatch tmp = (*ii).second;
735  global_align = tmp.aln;
736  num_total_mismatch = tmp.num_total_mismatch;
737  num_3end_mismatch = tmp.num_3end_mismatch;
738  num_total_gap = tmp.num_total_gap;
739  num_3end_gap = tmp.num_3end_gap;
740  } else {//new range and hit
741  TSeqPos master_local_start = input_hsp_info->master_range.GetFrom();
742  TSeqPos master_local_stop = input_hsp_info->master_range.GetTo();
743  bool do_global_alignment = true;
744  if (desired_align_range.GetFrom() >= master_local_start &&
745  desired_align_range.GetTo() <= master_local_stop) {
746  do_global_alignment = false;
747 
748  CRef<CDense_seg> primer_denseg =
749  input_hit->GetSegs().GetDenseg().ExtractSlice(0, desired_align_range.GetFrom(),
750  desired_align_range.GetTo());
751  CRange<TSignedSeqPos> aln_range;
752  x_FindMatchInfoForAlignment(*primer_denseg, do_global_alignment, num_total_mismatch,
753  num_3end_mismatch, num_total_gap,num_3end_gap,
754  is_left_primer,
755  max_num_continuous_match, aln_range);
756 
757  if (!do_global_alignment) {
758  double percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.GetLength();
759  if (max_num_continuous_match >= m_Hits->m_WordSize &&
760  percent_ident > k_Min_Percent_Identity &&
761  num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
762  (num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
763  num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
764  CRef<CSeq_align> aln_ref(new CSeq_align());
766 
767  aln_ref->SetSegs().SetDenseg(*primer_denseg);
768 
769  global_align = aln_ref;
770  }
771  }
772  }
773  //only extend if the hit alignment does not completely covers the primer window
774  if (do_global_alignment) {
775  int align_length = 1;
776  bool nw_align_modified = false;
777  num_total_mismatch = 0;
778  num_3end_mismatch = 0;
779  num_total_gap = 0;
780  num_3end_gap = 0;
781  max_num_continuous_match = 0;
782  double percent_ident;
783  CRef<CDense_seg> den_ref = x_NW_alignment(desired_align_range, *input_hit,
784  num_total_mismatch, num_3end_mismatch,
785  num_total_gap, num_3end_gap,
786  is_left_primer, max_num_continuous_match,
787  align_length,
788  master_local_start,
789  master_local_stop, hit_strand,
790  nw_align_modified);
791  if (nw_align_modified) {
792  align_length = 1;
793  num_total_mismatch = 0;
794  num_3end_mismatch = 0;
795  num_total_gap = 0;
796  num_3end_gap = 0;
797  max_num_continuous_match = 0;
798  CRange<TSignedSeqPos> aln_range;
799  x_FindMatchInfoForAlignment(*den_ref, do_global_alignment,
800  num_total_mismatch,
801  num_3end_mismatch, num_total_gap,num_3end_gap,
802  is_left_primer,
803  max_num_continuous_match, aln_range);
804 
805  percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.GetLength();
806 
807  } else {
808  percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/align_length;
809  }
810  if (max_num_continuous_match >= m_Hits->m_WordSize &&
811  percent_ident > k_Min_Percent_Identity &&
812  num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
813  (num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
814  num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
815 
816  CRef<CSeq_align> aln_ref(new CSeq_align());
818 
819  aln_ref->SetSegs().SetDenseg(*den_ref);
820 
821  // unique_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, cout));
822  // *out << *aln_ref;
823 
824  global_align = aln_ref;
825  }
826  }
827  }
828  SPrimerMatch temp_match;
829  temp_match.num_total_mismatch = num_total_mismatch;
830  temp_match.num_3end_mismatch = num_3end_mismatch;
831  temp_match.num_total_gap = num_total_gap;
832  temp_match.num_3end_gap = num_3end_gap;
833  temp_match.aln = global_align;
834 
835  m_Cache[cache_id] = temp_match;
836  /* m_Cache.insert(map<string, SPrimerMatch >::
837  value_type(cache_id, temp_match));
838  */
839  //m_AlignCache[cache_id.c_str()] = temp_match;
840 
841  }
842 
843  return global_align;
844 }
845 
846 //determine if template and the hits map to the same chromosome location. Note this only
847 //does the check if template or hits are chromosome
849  const CSeq_align& left_align,
850  const CSeq_align& right_align) {
851  bool same_target = false;
852  if(!m_FeatureScope) {
856  string name = CGBDataLoader::GetLoaderNameFromArgs();
858  cerr << "mapping triggered" << endl;
859  }
860 
861  //the backbone such as chromosome
862  CRef<CSeq_loc> backbone_loc (0);
863  CRef<CSeq_loc> component_loc (0);
864  //try backbone and component on template or hit as we don't know which is which
865  //at least hit or template needs to be chr or contig
867  //template as backbone
868  backbone_loc = new CSeq_loc((CSeq_loc::TId &) *(m_Hits->m_Id),
871  component_loc = new CSeq_loc((CSeq_loc::TId &) left_align.GetSeq_id(1),
872  (CSeq_loc::TPoint) min(left_align.GetSeqRange(1).GetFrom(),
873  right_align.GetSeqRange(1).GetFrom()),
874  (CSeq_loc::TPoint) max(left_align.GetSeqRange(1).GetTo(),
875  right_align.GetSeqRange(1).GetTo()));
876  } else if ((hit_type & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_chromosome) {
877  //hit as backbone
878  backbone_loc = new CSeq_loc((CSeq_loc::TId &)left_align.GetSeq_id(1),
879  (CSeq_loc::TPoint) min(left_align.GetSeqRange(1).GetFrom(),
880  right_align.GetSeqRange(1).GetFrom()),
881  (CSeq_loc::TPoint) max(left_align.GetSeqRange(1).GetTo(),
882  right_align.GetSeqRange(1).GetTo()));
883  component_loc = new CSeq_loc((CSeq_loc::TId &) *(m_Hits->m_Id),
886 
887 
888  } else if ((hit_type & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_con) {
889  //hit as backbone
890  backbone_loc = new CSeq_loc((CSeq_loc::TId &)left_align.GetSeq_id(1),
891  (CSeq_loc::TPoint) min(left_align.GetSeqRange(1).GetFrom(),
892  right_align.GetSeqRange(1).GetFrom()),
893  (CSeq_loc::TPoint) max(left_align.GetSeqRange(1).GetTo(),
894  right_align.GetSeqRange(1).GetTo()));
895  component_loc = new CSeq_loc((CSeq_loc::TId &) *(m_Hits->m_Id),
898 
899 
901  //template as backbone
902  backbone_loc = new CSeq_loc((CSeq_loc::TId &) *(m_Hits->m_Id),
905  component_loc = new CSeq_loc((CSeq_loc::TId &) left_align.GetSeq_id(1),
906  (CSeq_loc::TPoint) min(left_align.GetSeqRange(1).GetFrom(),
907  right_align.GetSeqRange(1).GetFrom()),
908  (CSeq_loc::TPoint) max(left_align.GetSeqRange(1).GetTo(),
909  right_align.GetSeqRange(1).GetTo()));
910  }
911 
912  if (backbone_loc && component_loc) {
913  CSeq_id_Handle backbone_idh = sequence::GetIdHandle(*backbone_loc, m_FeatureScope);
914  CBioseq_Handle backbone_handle = m_FeatureScope->GetBioseqHandle(backbone_idh);
915  CSeq_loc_Mapper mapper(1, backbone_handle, CSeq_loc_Mapper::eSeqMap_Down);
916  mapper.KeepNonmappingRanges();
917  CRef<CSeq_loc> backbone_component = mapper.Map(*backbone_loc);
918 
919  if (backbone_component) {
920  sequence::ECompare compare_result =
921  sequence::Compare(*backbone_component, *component_loc,
923  if ( compare_result == sequence::eContains ||
924  compare_result == sequence::eContained ||
925  compare_result == sequence::eSame) {
926 
927  same_target = true;
928  }
929  }
930  }
931 
932  return same_target;
933 }
934 
935 
936 static bool SeqLocAllowed(const list<CRef<CSeq_loc> >& allowed_seq,
937  const CSeq_id& hit_id,
938  const CRange<TSeqPos>& hit_range,
939  CScope& scope) {
940  bool allowed = false;
941  ITERATE(list<CRef<CSeq_loc> >, iter, allowed_seq) {
942  if(IsSameBioseq(*((*iter)->GetId()), hit_id, &scope) &&
943  (*iter)->GetTotalRange().IntersectionWith(hit_range).GetLength() >= hit_range.GetLength()*0.95){
944  allowed = true;
945  break;
946  }
947  }
948 
949  return allowed;
950 }
951 
953  CSeq_align& right_align,
954  TSeqPos left_total_mismatch,
955  TSeqPos left_3end_mismatch,
956  TSeqPos left_total_gap,
957  TSeqPos left_3end_gap,
958  TSeqPos right_total_mismatch,
959  TSeqPos right_3end_mismatch,
960  TSeqPos right_total_gap,
961  TSeqPos right_3end_gap,
962  int product_len,
963  TSeqPos index,
964  bool is_self_forward_primer,
965  bool is_self_reverse_primer)
966 {
967 
970  CConstRef<CSeq_align> right;
971 
972  info.product_len = product_len;
973  info.left_total_mismatch = left_total_mismatch;
974  info.left_total_gap = left_total_gap;
975  info.left_3end_mismatch = left_3end_mismatch;
976  info.left_3end_gap = left_3end_gap;
977 
978  info.right_total_mismatch = right_total_mismatch;
979  info.right_total_gap = right_total_gap;
980  info.right_3end_mismatch = right_3end_mismatch;
981  info.right_3end_gap = right_3end_gap;
982  info.aln.first = &left_align;
983  info.aln.second = &right_align;
984  info.index = index;
985  info.self_forward_primer = is_self_forward_primer;
986  info.self_reverse_primer = is_self_reverse_primer;
987 
988 
989  bool left_template_aln_overlap = m_Hits->m_TemplateRange.IntersectingWith(left_align.GetSeqRange(1));
990  bool right_template_aln_overlap = m_Hits->m_TemplateRange.IntersectingWith(right_align.GetSeqRange(1));
991 
992  bool template_hit_same_id = IsSameBioseq(*(m_Hits->m_TemplateHandle.GetSeqId()), left_align.GetSeq_id(1), m_Scope);
993  const CRef<CSeq_id> hit_wid
995  GetBioseqCore()->GetId(),
997  CSeq_id::EAccessionInfo hit_type = hit_wid->IdentifyAccession();
998  CRange<TSeqPos> hit_range (min(left_align.GetSeqRange(1).GetFrom(),
999  right_align.GetSeqRange(1).GetFrom()),
1000  max(left_align.GetSeqRange(1).GetTo(),
1001  right_align.GetSeqRange(1).GetTo()));
1002 
1003  //self hits
1004  if (template_hit_same_id && left_template_aln_overlap && right_template_aln_overlap) {
1005 
1006  m_SelfHit[m_CurrentPrimerIndex].push_back(info);
1008  *hit_wid,
1009  hit_range, *m_Scope)){
1010  m_SelfHit[m_CurrentPrimerIndex].push_back(info);
1011 
1012  } else if (m_Hits->m_Id->Which() != CSeq_id::e_Local &&
1022  hit_type != m_Hits->m_TemplateType &&
1023  !template_hit_same_id &&
1024  x_SequencesMappedToSameTarget(hit_type, left_align, right_align)) {
1025  //try mapping the template
1026  m_SelfHit[m_CurrentPrimerIndex].push_back(info);
1027  cerr << "self hit by mapping" << endl;
1028 
1029  } else {
1030  bool hit_assigned = false;
1031  //allowed hits
1032  ITERATE(vector<TSeqPos>, iter, m_Hits->m_AllowedSeqidIndex) {
1033  if (index == *iter) {
1035  hit_assigned = true;
1036  break;
1037  }
1038  }
1039 
1040 
1041  //transcript variants
1042  if (m_Hits->m_Allowed_Splice_Variants && !hit_assigned) {
1044  if(IsSameBioseq(**iter, left_align.GetSeq_id(1), m_Scope)) {
1045 
1047  hit_assigned = true;
1048  break;
1049  }
1050  }
1051  }
1052 
1053 
1054  //non specific hit
1055  if (!hit_assigned) {
1056 
1059  }
1060  }
1061 
1062 }
1063 
1064 static bool
1066  const SHspInfo* info2)
1067 {
1068  int start1 = 0, start2 = 0;
1069 
1070  start1 = info1->master_range.GetFrom();
1071  start2 = info2->master_range.GetFrom();
1072 
1073  return start1 <= start2;
1074 
1075 }
1076 
1077 
1078 
1079 static bool
1082 {
1083 
1084  return info1.bit_score > info2.bit_score;
1085 
1086 }
1087 
1088 
1090 x_FindOverlappingHSP(SHspIndexInfo* left_window_index_list,
1091  int& left_window_index_list_size,
1092  SHspIndexInfo* right_window_index_list,
1093  int& right_window_index_list_size,
1094  const CRange<TSeqPos>& left_window_desired_range,
1095  const CRange<TSeqPos>& right_window_desired_range,
1096  ENa_strand hit_strand,
1097  TSeqPos hit_index,
1098  const vector<SHspInfo*>& hsp_list) {
1099  left_window_index_list_size = 0;
1100  right_window_index_list_size = 0;
1101  if (m_Hits->m_UseITree) {
1102 
1103  CRange<int> left_window_desired_range_int;
1104 
1105  left_window_desired_range_int.SetFrom(left_window_desired_range.GetFrom());
1106  left_window_desired_range_int.SetTo(left_window_desired_range.GetTo());
1107  CRange<int> right_window_desired_range_int;
1108 
1109  right_window_desired_range_int.SetFrom(right_window_desired_range.GetFrom());
1110  right_window_desired_range_int.SetTo(right_window_desired_range.GetTo());
1111  CIntervalTree::const_iterator left_window_tree_it;
1112  CIntervalTree::const_iterator right_window_tree_it;
1113  if (hit_strand == eNa_strand_minus) {
1114  left_window_tree_it = m_Hits->m_RangeTreeListMinusStrand[hit_index]->IntervalsOverlapping(left_window_desired_range_int);
1115  right_window_tree_it = m_Hits->m_RangeTreeListMinusStrand[hit_index]->IntervalsOverlapping(right_window_desired_range_int);
1116 
1117  } else {
1118  left_window_tree_it = m_Hits->m_RangeTreeListPlusStrand[hit_index]->IntervalsOverlapping(left_window_desired_range_int);
1119  right_window_tree_it = m_Hits->m_RangeTreeListPlusStrand[hit_index]->IntervalsOverlapping(right_window_desired_range_int);
1120  }
1121 
1122  for (; left_window_tree_it; ++ left_window_tree_it) {
1123  CConstRef<SHspIndex> temp (static_cast<const SHspIndex*> (&*left_window_tree_it.GetValue()));
1124  if (hsp_list[temp->index]->master_range.IntersectionWith(left_window_desired_range).GetLength() >=
1125  k_MinOverlapLenFactor*left_window_desired_range.GetLength()) {
1126  left_window_index_list[left_window_index_list_size].index = temp->index;
1127  left_window_index_list[left_window_index_list_size].bit_score = hsp_list[temp->index]->bit_score;
1128  left_window_index_list_size ++;
1129  }
1130  }
1131  for (; right_window_tree_it; ++ right_window_tree_it) {
1132  CConstRef<SHspIndex> temp (static_cast<const SHspIndex*> (&*right_window_tree_it.GetValue()));
1133  if (hsp_list[temp->index]->master_range.IntersectionWith(right_window_desired_range).GetLength() >=
1134  k_MinOverlapLenFactor*right_window_desired_range.GetLength()) {
1135  right_window_index_list[right_window_index_list_size].index = temp->index;
1136  right_window_index_list[right_window_index_list_size].bit_score = hsp_list[temp->index]->bit_score;
1137  right_window_index_list_size ++;
1138  }
1139  }
1140 
1141  } else {
1142 
1143  for (int i = 0; i <(int) hsp_list.size(); i ++ ) {
1144  //quit if master range is beyond the desired range already as
1145  //hsp is already sorted according to master start.
1146  if (hsp_list[i]->master_range.GetFrom() >= right_window_desired_range.GetTo()) {
1147  break;
1148  }
1149  if (hsp_list[i]->master_range.IntersectionWith(left_window_desired_range).
1150  GetLength() >= k_MinOverlapLenFactor*left_window_desired_range.GetLength()) {
1151  left_window_index_list[left_window_index_list_size].index = i;
1152  left_window_index_list[left_window_index_list_size].bit_score = hsp_list[i]->bit_score;
1153  left_window_index_list_size ++;
1154  }
1155 
1156  if (hsp_list[i]->master_range.IntersectionWith(right_window_desired_range).
1157  GetLength() >= k_MinOverlapLenFactor*right_window_desired_range.GetLength()) {
1158  right_window_index_list[right_window_index_list_size].index = i;
1159  right_window_index_list[right_window_index_list_size].bit_score = hsp_list[i]->bit_score;
1160  right_window_index_list_size ++;
1161  }
1162  }
1163  }
1164 
1165  //sort the index within the list such that hsp with higher score comes first
1166  //to avoid potential loss of more significant matches
1167 
1168  if (left_window_index_list_size > 0) {
1169  stable_sort(left_window_index_list, left_window_index_list + left_window_index_list_size, SortIndexListByScoreDescending);
1170  }
1171 
1172  if (right_window_index_list_size > 0) {
1173  stable_sort(right_window_index_list, right_window_index_list + right_window_index_list_size, SortIndexListByScoreDescending);
1174  }
1175 
1176 }
1177 
1179  TSeqPos hit_index)
1180 {
1181  //our primer input uses locations on the same strand notation so a valid pcr product
1182  //can only have primers on the same strand for left and right primers
1183  //because we are not using primers as is as query. we use template or for primer only case
1184  //we complement right primer and connect it with left primer (as is ) to generate an artificial query
1185  // bool hsp_on_minus_strand = (hit_strand == eNa_strand_minus ? true : false);
1186 
1187  int HspOverlappingWithLeftPrimer_size;
1188  int HspOverlappingWithRightPrimer_size;
1189  int HspOverlappingWithLeftPrimerMinusStrand_size;
1190  int HspOverlappingWithRightPrimerMinusStrand_size;
1191 
1192 
1194  HspOverlappingWithLeftPrimer_size,
1196  HspOverlappingWithRightPrimer_size,
1197  m_PrimerInfo->left,
1199  eNa_strand_plus, hit_index, sorted_hsp.first);
1200 
1201 
1203  HspOverlappingWithLeftPrimerMinusStrand_size,
1205  HspOverlappingWithRightPrimerMinusStrand_size,
1206  m_PrimerInfo->left,
1208  eNa_strand_minus, hit_index,
1209  sorted_hsp.second);
1210 
1211 
1212  bool analyze_plus_strand_first = true;
1213  //analyze the strand that have better hits first or we may miss more significant primer matches
1214  //because we limit the total number of non-specific matches
1215 
1216  //-1 because comparision with double problem
1217 
1218  if (sorted_hsp.first.size() > 0) {
1219  if (sorted_hsp.second.size() > 0 &&
1220  sorted_hsp.first[0]->bit_score < sorted_hsp.second[0]->bit_score - 1) {
1221  analyze_plus_strand_first = false;
1222  }
1223  } else {
1224  analyze_plus_strand_first = false;
1225  }
1226 
1227 
1228  if (analyze_plus_strand_first) {
1229  //analyze plus strand
1230  x_AnalyzeLeftAndRightPrimer(sorted_hsp.first,
1232  HspOverlappingWithLeftPrimer_size,
1233  HspOverlappingWithRightPrimer_size,
1234  hit_index);
1235 
1236  //analyze minus strand
1237  x_AnalyzeLeftAndRightPrimer(sorted_hsp.second,
1239  HspOverlappingWithLeftPrimerMinusStrand_size,
1240  HspOverlappingWithRightPrimerMinusStrand_size,
1241  hit_index);
1242 
1243  } else {
1244 
1245  //analyze minus strand
1246  x_AnalyzeLeftAndRightPrimer(sorted_hsp.second,
1248  HspOverlappingWithLeftPrimerMinusStrand_size,
1249  HspOverlappingWithRightPrimerMinusStrand_size,
1250  hit_index);
1251  //analyze plus strand
1252  x_AnalyzeLeftAndRightPrimer(sorted_hsp.first,
1254  HspOverlappingWithLeftPrimer_size,
1255  HspOverlappingWithRightPrimer_size,
1256  hit_index);
1257  }
1258 
1259  //only need to check the plus strand case as one primer cases are palindrom
1260  x_AnalyzeOnePrimer(sorted_hsp.first, sorted_hsp.second,
1261  HspOverlappingWithLeftPrimer_size,
1262  HspOverlappingWithRightPrimer_size,
1263  HspOverlappingWithLeftPrimerMinusStrand_size,
1264  HspOverlappingWithRightPrimerMinusStrand_size,
1265  hit_index);
1266 
1267 }
1268 
1269 
1271 x_GetSlaveRangeGivenMasterRange(const CSeq_align& input_align,
1272  CRange<TSeqPos>& master_range,
1273  int index) {
1274 
1276  SSlaveRange cache_id;
1277  cache_id.align_index = &input_align;
1278  cache_id.master_start = master_range.GetFrom();
1279  cache_id.master_stop = master_range.GetTo();
1280  map <SSlaveRange, CRange<TSeqPos>, slave_range_sort_order>::iterator ij = m_SlaveRangeCache[index].find(cache_id);
1281 
1282  if(ij != m_SlaveRangeCache[index].end() ){
1283  // cache hit
1284  slave_range = (*ij).second;
1285  //cerr << "cached range" << endl;
1286  } else {
1287  // cerr << "no cached range" << endl;
1288  CRef<CDense_seg> denseg (NULL);
1289  try {
1290  denseg = input_align.GetSegs().GetDenseg().
1291  ExtractSlice(0, master_range.GetFrom(), master_range.GetTo());
1292  slave_range = denseg->GetSeqRange(1);
1293 
1294  } catch (CSeqalignException& e) {
1295  // if (e.GetErrCode() == CSeqalignException::eInvalidAlignment) {
1296  cerr << "ExtractSlice error = " << e.what() << endl;
1297 
1298  // }
1299  }
1300  m_SlaveRangeCache[index][cache_id] = slave_range;
1301  }
1302 
1303  return slave_range;
1304 }
1305 
1306 //check if the left primer has a valid right primer on hits based on the distance and orietation
1308 x_AnalyzeLeftAndRightPrimer(const vector<SHspInfo*>& hsp_list,
1309  ENa_strand hit_strand,
1310  int HspOverlappingWithLeftPrimer_size,
1311  int HspOverlappingWithRightPrimer_size,
1312  TSeqPos hit_index)
1313 {
1314  //save right primer slave range to avoid repeated map lookup which is slow
1315 
1316  vector<CRange<TSeqPos> > right_slave_range_array(HspOverlappingWithRightPrimer_size);
1317  for (int j = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithRightPrimer_size; j ++) {
1318  int right_hsp_index = m_HspOverlappingWithRightPrimer[j].index;
1319  if (hit_strand == eNa_strand_minus) {
1320  right_hsp_index = m_HspOverlappingWithRightPrimerMinusStrand[j].index;
1321  }
1322  CRange<TSeqPos> right_primer_master_overlap =
1323  hsp_list[right_hsp_index]->master_range.IntersectionWith(m_PrimerInfo->right);
1324 
1325  right_slave_range_array[j] =
1326  x_GetSlaveRangeGivenMasterRange(*(hsp_list[right_hsp_index]->hsp),
1327  right_primer_master_overlap,
1328  hit_index);
1329  }
1330 
1331  int left_primer_hsp_index = 0;
1332  for (int i = 0; i < HspOverlappingWithLeftPrimer_size; i ++) {
1333  //each left window
1334  left_primer_hsp_index = m_HspOverlappingWithLeftPrimer[i].index;
1335  if (hit_strand == eNa_strand_minus) {
1336  left_primer_hsp_index = m_HspOverlappingWithLeftPrimerMinusStrand[i].index;
1337  }
1340  break;
1341  }
1342  CRange<TSeqPos> left_primer_master_overlap =
1343  hsp_list[left_primer_hsp_index]->master_range.IntersectionWith(m_PrimerInfo->left);
1344 
1345  //check right primer
1346  bool left_slave_range_filled = false;
1347  bool left_global_align_filled = false;
1348  CRange<TSeqPos> left_primer_hit_range;
1349  CRef<CSeq_align> left_primer_hit_global_align(NULL);
1350 
1351  TSeqPos left_total_mismatch = 0;
1352  TSeqPos left_total_gap = 0;
1353  TSeqPos left_3end_mismatch = 0;
1354  TSeqPos left_3end_gap = 0;
1355  //check the current left window against all right windows
1356  for (int j = 0; j < HspOverlappingWithRightPrimer_size; j ++) {
1357 
1358  int right_hsp_index = m_HspOverlappingWithRightPrimer[j].index;
1359  if (hit_strand == eNa_strand_minus) {
1360  right_hsp_index = m_HspOverlappingWithRightPrimerMinusStrand[j].index;
1361  }
1362 
1363  if (!left_slave_range_filled) { // only check left primer once
1364  left_primer_hit_range =
1365  x_GetSlaveRangeGivenMasterRange(*hsp_list[left_primer_hsp_index]->hsp,
1366  left_primer_master_overlap,
1367  hit_index);
1368  left_slave_range_filled = true;
1369  if (left_primer_hit_range.Empty()) {
1370  break;
1371  }
1372  }
1373 
1374  TSeqPos left_primer_hit_stop = left_primer_hit_range.GetTo();
1375  TSeqPos left_primer_hit_start = left_primer_hit_range.GetFrom();
1376 
1377 
1378  CRange<TSeqPos> right_primer_hit_range = right_slave_range_array[j];
1379  if (right_primer_hit_range.Empty()) {
1380  continue;
1381  }
1382  TSeqPos right_primer_hit_stop = right_primer_hit_range.GetTo();
1383  TSeqPos right_primer_hit_start = right_primer_hit_range.GetFrom();
1384 
1385  int product_len;
1386  if (hit_strand == eNa_strand_minus) {
1387 
1388  product_len =
1389  (left_primer_hit_start - right_primer_hit_stop + 1) +
1390  (m_PrimerInfo->right.GetLength() -1) +
1391  (m_PrimerInfo->left.GetLength() -1);
1392  } else {
1393  product_len =
1394  (right_primer_hit_start - left_primer_hit_stop + 1) +
1395  (m_PrimerInfo->right.GetLength() -1) +
1396  (m_PrimerInfo->left.GetLength() -1);
1397  }
1398 
1399  if (product_len > 0 &&
1400  product_len <=
1401  (int)(m_Hits->m_TargetSizeMax)){
1402 
1403  if (!left_global_align_filled) {
1404 
1405  left_primer_hit_global_align
1407  hsp_list[left_primer_hsp_index],
1408  left_total_mismatch,
1409  left_3end_mismatch,
1410  left_total_gap,
1411  left_3end_gap,
1412  true, hit_index, hit_strand);
1413  left_global_align_filled = true;
1414  if (!left_primer_hit_global_align) {
1415  break;
1416  }
1417  }
1418 
1419  TSeqPos right_total_mismatch = 0;
1420  TSeqPos right_total_gap = 0;
1421  TSeqPos right_3end_mismatch = 0;
1422  TSeqPos right_3end_gap = 0;
1423  CRef<CSeq_align> right_primer_hit_global_align =
1425  hsp_list[right_hsp_index],
1426  right_total_mismatch, right_3end_mismatch,
1427  right_total_gap, right_3end_gap, false,
1428  hit_index, hit_strand);
1429  if (right_primer_hit_global_align) {
1430 
1431  int pcr_product_len = 0;
1432  if (x_IsPcrLengthInRange(*left_primer_hit_global_align,
1433  *right_primer_hit_global_align,
1434  false,
1435  hit_strand,
1436  pcr_product_len)){
1437 
1438  x_SavePrimerInfo(*left_primer_hit_global_align,
1439  *right_primer_hit_global_align,
1440  left_total_mismatch,
1441  left_3end_mismatch, left_total_gap, left_3end_gap,
1442  right_total_mismatch, right_3end_mismatch,
1443  right_total_gap,
1444  right_3end_gap, pcr_product_len,
1445  hit_index, false, false);
1446  }
1447  }
1448 
1449  }
1450  }
1451  }
1452 }
1453 
1454 
1456 x_IsPcrLengthInRange(const CSeq_align& left_primer_hit_align,
1457  const CSeq_align& right_primer_hit_align,
1458  bool primers_on_different_strand,
1459  ENa_strand hit_strand,
1460  int& product_len)
1461 {
1462  bool result = false;
1463  TSeqPos left_primer_hit_stop = left_primer_hit_align.GetSeqStop(1);
1464  TSeqPos left_primer_hit_start = left_primer_hit_align.GetSeqStart(1);
1465  TSeqPos right_primer_hit_start = right_primer_hit_align.GetSeqStart(1);
1466  TSeqPos right_primer_hit_stop = right_primer_hit_align.GetSeqStop(1);
1467  product_len = 0;
1468 
1469  //self primer case
1470  if (primers_on_different_strand) {
1471 
1472  product_len = right_primer_hit_stop - left_primer_hit_start + 1;
1473 
1474  } else if (hit_strand == eNa_strand_minus) {
1475 
1476  product_len = (left_primer_hit_stop - right_primer_hit_start + 1);
1477  } else {
1478  product_len = (right_primer_hit_start - left_primer_hit_stop + 1) +
1479  (m_PrimerInfo->right.GetLength() -1) +
1480  (m_PrimerInfo->left.GetLength() -1);
1481  }
1482 
1483  if (product_len >= min((int)left_primer_hit_align.GetSeqRange(0).GetLength(),
1484  (int)right_primer_hit_align.GetSeqRange(0).GetLength()) &&
1485  product_len <= (int)m_Hits->m_TargetSizeMax) {
1486  result = true;
1487  }
1488 
1489 
1490  return result;
1491 }
1492 
1493 
1495 x_AnalyzeOnePrimer(const vector<SHspInfo*>& plus_strand_hsp_list,
1496  const vector<SHspInfo*>& minus_strand_hsp_list,
1497  int HspOverlappingWithLeftPrimer_size,
1498  int HspOverlappingWithRightPrimer_size,
1499  int HspOverlappingWithLeftPrimerMinusStrand_size,
1500  int HspOverlappingWithRightPrimerMinusStrand_size,
1501  TSeqPos hit_index) {
1502  //save right slave range to avoid repeated tree query
1503  vector<CRange<TSeqPos> > right_primer_hit_range_array(HspOverlappingWithLeftPrimerMinusStrand_size);
1504  for (int j = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1505  int right_hsp_index = m_HspOverlappingWithLeftPrimerMinusStrand[j].index;
1506  CRange<TSeqPos> right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1507 
1508  CRange<TSeqPos> left_primer_window_right_align_overlap =
1509  m_PrimerInfo->left.IntersectionWith(right_master_range);
1510 
1511  right_primer_hit_range_array[j] =
1512  x_GetSlaveRangeGivenMasterRange(*(minus_strand_hsp_list[right_hsp_index]->hsp),
1513  left_primer_window_right_align_overlap,
1514  hit_index);
1515  }
1516 
1517  for (int i = 0; i < HspOverlappingWithLeftPrimer_size; i ++) {
1518 
1519  int left_hsp_index = m_HspOverlappingWithLeftPrimer[i].index;
1522  break;
1523  }
1524 
1525  CRange<TSeqPos> left_primer_master_overlap =
1526  plus_strand_hsp_list[left_hsp_index]->master_range.IntersectionWith(m_PrimerInfo->left);
1527 
1528 
1529  CRange<TSeqPos> left_primer_hit_range;
1530  bool left_slave_range_filled = false;
1531  bool left_global_align_filled = false;
1532  TSeqPos left_total_mismatch = 0;
1533  TSeqPos left_3end_mismatch = 0;
1534  TSeqPos left_total_gap = 0;
1535  TSeqPos left_3end_gap = 0;
1536 
1537  CRef<CSeq_align> left_primer_hit_global_align(NULL);
1538  for (int j = 0; j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1539  int right_hsp_index = m_HspOverlappingWithLeftPrimerMinusStrand[j].index;
1540 
1541  if (!left_slave_range_filled) {
1542  left_primer_hit_range =
1543  x_GetSlaveRangeGivenMasterRange(*(plus_strand_hsp_list[left_hsp_index]->hsp),
1544  left_primer_master_overlap,
1545  hit_index);
1546  left_slave_range_filled = true;
1547  if (left_primer_hit_range.Empty()) {
1548  break;
1549  }
1550  }
1551 
1552  TSeqPos left_primer_hit_stop = left_primer_hit_range.GetTo();
1553 
1554 
1555  CRange<TSeqPos> right_primer_hit_range = right_primer_hit_range_array[j];
1556  if (right_primer_hit_range.Empty()) {
1557  continue;
1558  }
1559  TSeqPos right_primer_hit_start = right_primer_hit_range.GetFrom();
1560 
1561  //now check the distance of two primer windows
1562 
1563  int product_len = right_primer_hit_start - left_primer_hit_stop + 1 +
1564  + (m_PrimerInfo->right.GetLength() -1) +
1565  (m_PrimerInfo->left.GetLength() -1);
1566 
1567  if (!(product_len > 0 &&
1568  product_len <=
1569  (int)m_Hits->m_TargetSizeMax)) {
1570  continue;
1571  }
1572 
1573  TSeqPos right_total_mismatch = 0;
1574  TSeqPos right_total_gap = 0;
1575  TSeqPos right_3end_mismatch = 0;
1576  TSeqPos right_3end_gap = 0;
1577 
1578  if (!left_global_align_filled) {
1579  left_primer_hit_global_align =
1581  plus_strand_hsp_list[left_hsp_index],
1582  left_total_mismatch,
1583  left_3end_mismatch,
1584  left_total_gap, left_3end_gap,
1585  true,
1586  hit_index, eNa_strand_plus);
1587  left_global_align_filled = true;
1588  if(!left_primer_hit_global_align) {
1589  break;
1590  }
1591  }
1592  CRef<CSeq_align> right_primer_hit_global_align =
1594  minus_strand_hsp_list[right_hsp_index],
1595  right_total_mismatch,
1596  right_3end_mismatch,
1597  right_total_gap, right_3end_gap,
1598  true,
1599  hit_index, eNa_strand_minus);
1600  if (right_primer_hit_global_align) {
1601 
1602  //primer overlaps on hit should be within pcr product range
1603  int pcr_product_len = 0;
1604  bool valid_pcr_length;
1605 
1606  valid_pcr_length =
1607  x_IsPcrLengthInRange(*left_primer_hit_global_align,
1608  *right_primer_hit_global_align,
1609  true,
1611  pcr_product_len);
1612 
1613  if (valid_pcr_length) {
1614  CRef<CSeq_align> new_align_left(new CSeq_align);
1615  CRef<CSeq_align> new_align_right(new CSeq_align);
1616 
1617  new_align_left = left_primer_hit_global_align;
1618  new_align_right = right_primer_hit_global_align;
1619 
1620 
1621  x_SavePrimerInfo(*new_align_left, *new_align_right,
1622  left_total_mismatch,
1623  left_3end_mismatch, left_total_gap, left_3end_gap,
1624  right_total_mismatch, right_3end_mismatch,
1625  right_total_gap,
1626  right_3end_gap, pcr_product_len, hit_index, true, false);
1627 
1628  }
1629  }
1630  }
1631  }
1632 
1633  //check right primer
1634 
1635  // save right slave range once to avoid repeated slow tree query
1636  vector<CRange<TSeqPos> > right_primer_hit_range_array2(HspOverlappingWithRightPrimerMinusStrand_size);
1637  for (int j = 0; HspOverlappingWithRightPrimer_size > 0 && j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1638  int right_hsp_index = m_HspOverlappingWithRightPrimerMinusStrand[j].index;
1639 
1640  CRange<TSeqPos> right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1641 
1642  CRange<TSeqPos> right_primer_as_3_master_overlap =
1643  m_PrimerInfo->right.IntersectionWith(right_master_range);
1644 
1645  right_primer_hit_range_array2[j] =
1646  x_GetSlaveRangeGivenMasterRange(*(minus_strand_hsp_list[right_hsp_index]->hsp),
1647  right_primer_as_3_master_overlap,
1648  hit_index);
1649  }
1650 
1651  for (int i = 0; i < HspOverlappingWithRightPrimer_size; i ++) {
1652  int left_hsp_index = m_HspOverlappingWithRightPrimer[i].index;
1655  break;
1656  }
1657 
1658  CRange<TSeqPos> right_primer_as_5_master_overlap =
1659  plus_strand_hsp_list[left_hsp_index]->master_range.IntersectionWith(m_PrimerInfo->right);
1660  //so right primer has alignment in plus strand alignment
1661  //This means the right primer has to be on
1662  //minus strand of this alignment to work as any pcr can only go 5'->3'
1663  //so this is actually left primer
1664 
1665 
1666  CRange<TSeqPos> right_primer_as_5_hit_range;
1667  bool right_primer_as_5_slave_range_filled = false;
1668  bool left_global_align_filled = false;
1669 
1670  CRef<CSeq_align> left_primer_hit_global_align(NULL);
1671  TSeqPos left_total_mismatch = 0;
1672  TSeqPos left_3end_mismatch = 0;
1673  TSeqPos left_total_gap = 0;
1674  TSeqPos left_3end_gap = 0;
1675 
1676  //test if right window also have alignment on plus strand of any minus strand
1677  //alignment.
1678 
1679  for (int j = 0; j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1680  int right_hsp_index = m_HspOverlappingWithRightPrimerMinusStrand[j].index;
1681 
1682  if (!right_primer_as_5_slave_range_filled ) {
1683  right_primer_as_5_hit_range =
1684  x_GetSlaveRangeGivenMasterRange(*plus_strand_hsp_list[left_hsp_index]->hsp,
1685  right_primer_as_5_master_overlap,
1686  hit_index);
1687  right_primer_as_5_slave_range_filled = true;
1688  if (right_primer_as_5_hit_range.Empty()) {
1689  break;
1690  }
1691 
1692  }
1693 
1694 
1695  TSeqPos right_primer_as_5_hit_start = right_primer_as_5_hit_range.GetFrom();
1696 
1697  CRange<TSeqPos> right_primer_as_3_hit_range = right_primer_hit_range_array2[j];
1698 
1699  if (right_primer_as_3_hit_range.Empty()) {
1700  continue;
1701  }
1702  TSeqPos right_primer_as_3_hit_stop = right_primer_as_3_hit_range.GetTo();
1703 
1704 
1705  //now check the distance of two primer windows
1706 
1707  int product_len = right_primer_as_5_hit_start
1708  - right_primer_as_3_hit_stop + 1 +
1709  + (m_PrimerInfo->right.GetLength() -1) +
1710  (m_PrimerInfo->left.GetLength() -1);
1711 
1712  if (!(product_len > 0 &&
1713  product_len <=
1714  (int)(m_Hits->m_TargetSizeMax))) {
1715  continue;
1716  }
1717 
1718  TSeqPos right_total_mismatch = 0;
1719  TSeqPos right_total_gap = 0;
1720  TSeqPos right_3end_mismatch = 0;
1721  TSeqPos right_3end_gap = 0;
1722 
1723  if (!left_global_align_filled) {
1724  left_primer_hit_global_align =
1726  plus_strand_hsp_list[left_hsp_index],
1727  left_total_mismatch,
1728  left_3end_mismatch,
1729  left_total_gap, left_3end_gap,
1730  false, hit_index, eNa_strand_plus);
1731  left_global_align_filled = true;
1732  if(!left_primer_hit_global_align) {
1733  break;
1734  }
1735  }
1736  CRef<CSeq_align> right_primer_hit_global_align =
1737  x_FillGlobalAlignInfo(m_PrimerInfo->right, minus_strand_hsp_list[right_hsp_index],
1738  right_total_mismatch,
1739  right_3end_mismatch,
1740  right_total_gap, right_3end_gap,
1741  false,
1742  hit_index, eNa_strand_minus);
1743  if (right_primer_hit_global_align) {
1744 
1745  //primer overlaps on hit should be within pcr product range
1746  int pcr_product_len = 0;
1747  bool valid_pcr_length;
1748 
1749  valid_pcr_length =
1750  x_IsPcrLengthInRange(*right_primer_hit_global_align,
1751  *left_primer_hit_global_align,
1752  true,
1754  pcr_product_len);
1755 
1756  if (valid_pcr_length) {
1757  CRef<CSeq_align> new_align_left(new CSeq_align);
1758  CRef<CSeq_align> new_align_right(new CSeq_align);
1759 
1760  new_align_left->Assign(*left_primer_hit_global_align);
1761  //we deal with plus strand on master all the time
1762  new_align_left->Reverse();
1763 
1764  new_align_right->Assign(*right_primer_hit_global_align);
1765  //we deal with plus strand on master all the time
1766  new_align_right->Reverse();
1767 
1768 
1769  x_SavePrimerInfo(*new_align_left, *new_align_right,
1770  left_total_mismatch,
1771  left_3end_mismatch, left_total_gap, left_3end_gap,
1772  right_total_mismatch, right_3end_mismatch,
1773  right_total_gap,
1774  right_3end_gap, pcr_product_len, hit_index, false, true);
1775 
1776  }
1777  }
1778 
1779  }
1780  }
1781 }
1782 
1783 
1784 
1785 
1787 {
1788 
1789  for (TSeqPos i = 0; i < m_Hits->m_SortHit.size(); i ++) {
1790 
1791  for (int j = 0; j < (int)m_PrimerInfoList.size(); j ++) {
1794 
1798 
1799  }
1800  }
1801 }
1802 
1804  SPrimerHitInfo* info1,
1805  const COligoSpecificityCheck::
1806  SPrimerHitInfo* info2) {
1807  int mismatch1 = 20;
1808  int mismatch2 = 20;
1809 
1810  mismatch1 = min(mismatch1,
1811  (int)info1->right_total_mismatch +
1812  (int)info1->right_total_gap +
1813  (int)info1->left_total_mismatch +
1814  (int)info1->left_total_gap);
1815 
1816  mismatch2 = min(mismatch2,
1817  (int)info2->right_total_mismatch +
1818  (int)info2->right_total_gap +
1819  (int)info2->left_total_mismatch +
1820  (int)info2->left_total_gap);
1821 
1822  return mismatch1 < mismatch2;
1823 }
1824 
1826  SPrimerHitInfo*>* info1,
1827  const vector<COligoSpecificityCheck::
1828  SPrimerHitInfo*>* info2) {
1829  int mismatch1 = 20;
1830  int mismatch2 = 20;
1831  ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info1) {
1832 
1833  mismatch1 = min(mismatch1,
1834  (int)(*iter)->right_total_mismatch +
1835  (int)(*iter)->right_total_gap +
1836  (int)(*iter)->left_total_mismatch +
1837  (int)(*iter)->left_total_gap);
1838  }
1839 
1840 
1841  ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info2) {
1842 
1843  mismatch2 = min(mismatch2,
1844  (int)(*iter)->right_total_mismatch +
1845  (int)(*iter)->right_total_gap +
1846  (int)(*iter)->left_total_mismatch +
1847  (int)(*iter)->left_total_gap);
1848  }
1849  return mismatch1 < mismatch2;
1850 }
1851 
1852 void COligoSpecificityCheck::x_SortPrimerHit(vector<vector<SPrimerHitInfo> >& primer_hit_list_list){
1853  for (int i = 0; i < (int) primer_hit_list_list.size(); i ++){
1854  vector<COligoSpecificityCheck::SPrimerHitInfo>* primer_hit_list = &primer_hit_list_list[i];
1855  //group hit with the same subject sequence together
1856  vector<vector<SPrimerHitInfo*>* > result;
1857  CConstRef<CSeq_id> previous_id;
1858  vector<SPrimerHitInfo*>* temp;
1859 
1860  NON_CONST_ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo>, iter, *primer_hit_list) {
1861  const CSeq_id& cur_id = iter->aln.first->GetSeq_id(1);
1862 
1863  if(previous_id.Empty()) {
1864  temp = new vector<SPrimerHitInfo*>;
1865  temp->push_back(&(*iter));
1866  result.push_back(temp);
1867  } else if (cur_id.Match(*previous_id)){
1868  temp->push_back(&(*iter));
1869 
1870  } else {
1871  temp = new vector<SPrimerHitInfo*>;
1872  temp->push_back(&(*iter));
1873  result.push_back(temp);
1874  }
1875  previous_id = &cur_id;
1876  }
1877  //sort the group based on lowest mismatches in a group
1878  stable_sort(result.begin(), result.end(), SortPrimerHitByMismatchAscending);
1879 
1880  //restore the hit structure to a plain list
1881  vector<SPrimerHitInfo> temp2;
1882  NON_CONST_ITERATE(vector<vector<SPrimerHitInfo*>* >, iter, result) {
1883  //sort within the group based on lowest mismatches
1884 
1885  stable_sort((**iter).begin(), (**iter).end(), SortPrimerHitInGroupByMismatchAscending);
1886  ITERATE(vector<SPrimerHitInfo*>, iter2, **iter){
1887  temp2.push_back(**iter2);
1888  }
1889  (**iter).clear();
1890  delete *iter;
1891  }
1892  primer_hit_list->clear();
1893  *primer_hit_list = temp2;
1894  }
1895 }
1896 
1897 void COligoSpecificityCheck::CheckSpecificity(const vector<SPrimerInfo>& primer_info_list,
1898  int from, int to)
1899 {
1900  int end = primer_info_list.size();
1901  if (from >= end) return;
1902  if (to > end || to < 0) to = end;
1903  for (int i=from; i<to; ++i) {
1904  m_PrimerInfoList.push_back(&(primer_info_list[i]));
1905  vector<SPrimerHitInfo> temp;
1906  m_PrimerHit.push_back(temp);
1907  m_SelfHit.push_back(temp);
1908  m_VariantHit.push_back(temp);
1909  m_AllowedHit.push_back(temp);
1910  m_NumTargetFromSameSequence.push_back(0);
1911  }
1912 
1915 }
1916 
1917 static bool SortHitByTopHspScores(TSortedHsp const& info1,
1918  TSortedHsp const& info2) {
1919  vector<double> score1;
1920  static const int num_hsp = 2;
1921  if (info1.first.size() > 0) {
1922  CRange<TSeqPos> previous_range;
1923  for (int i = 0; i < (int)info1.first.size() && i < num_hsp; i ++) {
1924  if (i == 0 || (i > 0 && !(info1.first[i]->master_range.IntersectingWith(previous_range)))) {
1925  score1.push_back(info1.first[i]->bit_score);
1926  }
1927  if (i == 0) {
1928  previous_range = info1.first[i]->master_range;
1929  }
1930  }
1931  }
1932  if (info1.second.size() > 0 && (score1.empty() || !(score1[0] > info1.second[0]->bit_score))) {
1933  CRange<TSeqPos> previous_range;
1934  for (int i = 0; i < (int)info1.second.size() && i < num_hsp; i ++) {
1935  if (i == 0 || (i > 0 && !(info1.second[i]->master_range.IntersectingWith(previous_range)))) {
1936  score1.push_back(info1.second[i]->bit_score);
1937  }
1938  if (i == 0) {
1939  previous_range = info1.second[i]->master_range;
1940  }
1941  }
1942  }
1943 
1944  vector<double> score2;
1945 
1946  if (info2.first.size() > 0) {
1947  CRange<TSeqPos> previous_range;
1948  for (int i = 0; i < (int)info2.first.size() && i < num_hsp; i ++) {
1949  if (i == 0 || (i > 0 && !(info2.first[i]->master_range.IntersectingWith(previous_range)))) {
1950  score2.push_back(info2.first[i]->bit_score);
1951  }
1952  if (i == 0) {
1953  previous_range = info2.first[i]->master_range;
1954  }
1955  }
1956  }
1957  if (info2.second.size() > 0 && (score2.empty() || !(score2[0] > info2.second[0]->bit_score))) {
1958  CRange<TSeqPos> previous_range;
1959  for (int i = 0; i < (int)info2.second.size() && i < num_hsp; i ++) {
1960  if (i == 0 || (i > 0 && !(info2.second[i]->master_range.IntersectingWith(previous_range)))) {
1961  score2.push_back(info2.second[i]->bit_score);
1962  }
1963  if (i == 0) {
1964  previous_range = info2.second[i]->master_range;
1965  }
1966  }
1967  }
1968 
1969 
1970  stable_sort(score1.begin(), score1.end(), greater<double>());
1971 
1972  stable_sort(score2.begin(), score2.end(), greater<double>());
1973 
1974  if (score1[0] > score2[0]) {
1975  return true;
1976  } else if (score1[0] < score2[0]) {
1977  return false;
1978  } else if (score1.size() > 1 && score2.size() > 1) {
1979 
1980  return (score1[1] > score2[1]);
1981  } else if (score1.size() > 1) {
1982  return true;
1983  } else if (score2.size() > 1) {
1984  return false;
1985  }
1986 
1987  return true;
1988 
1989 }
1990 
1991 
1992 ///Place alignment from the same id into one holder. Split the alignment in each holder
1993 /// into plus or minus strand and sort them by alignment start in ascending order
1995 {
1996  if(input_hits.Get().empty()) {
1997  return;
1998  }
1999 
2000  CConstRef<CSeq_id> previous_id, subid;
2001  bool is_first_aln = true;
2002  double highest_hit_score = 0;
2003  bool last_highest_hit_score_index_found = false;
2004  int count = 0;
2005  TSortedHsp each_hit; //first element for plus strand, second element for minus strand
2006 
2007  NON_CONST_ITERATE(CSeq_align_set::Tdata, iter, input_hits.Set()) {
2008  subid = &((*iter)->GetSeq_id(1));
2009  double cur_bit_score = 0;
2010  (*iter)->GetNamedScore(CSeq_align::eScore_BitScore, cur_bit_score);
2011  if (cur_bit_score == 0) {
2012  ITERATE(CDense_seg::TScores, iter_score, (*iter)->GetSegs().GetDenseg().GetScores()) {
2013  const CObject_id& id=(*iter_score)->GetId();
2014  if (id.IsStr() && id.GetStr() == "bit_score") {
2015  cur_bit_score = (*iter_score)->GetValue().GetReal();
2016  break;
2017  }
2018  }
2019  }
2020 
2021  if (!is_first_aln && !subid->Match(*previous_id)) {
2022  //this aln has a new id, compare to previous hit
2023  if (!last_highest_hit_score_index_found) {
2024 
2025  if (cur_bit_score < highest_hit_score) {
2026  last_highest_hit_score_index_found = true;
2027  }
2028 
2029  if (!(each_hit.first.empty())) {
2030  highest_hit_score = max(highest_hit_score, each_hit.first[0]->bit_score);
2031  }
2032  if (!(each_hit.second.empty())) {
2033  highest_hit_score = max(highest_hit_score, each_hit.second[0]->bit_score);
2034  }
2035 
2036  count ++;
2037  }
2038 
2039  //this aln has a new id, save the previous hit
2040 
2041  m_SortHit.push_back(each_hit);
2042 
2043  //reset
2044 
2045  each_hit.first.clear();
2046  each_hit.second.clear();
2047  }
2048  SHspInfo* temp = new SHspInfo;
2049  if ((*iter)->GetSeqStrand(0) == eNa_strand_minus) {
2050 
2051  (*iter)->Reverse();
2052  temp ->hsp = *iter;
2053  each_hit.second.push_back(temp);
2054  } else {
2055 
2056  temp ->hsp = *iter;
2057  each_hit.first.push_back(temp);
2058  }
2059  temp->master_range = temp->hsp->GetSeqRange(0);
2060  temp->slave_range = temp->hsp->GetSeqRange(1);
2061  temp->bit_score = cur_bit_score;
2062  is_first_aln = false;
2063  previous_id = subid;
2064 
2065  }
2066 
2067  //save the last ones with the same id
2068  if(!(each_hit.first.empty() && each_hit.second.empty())) {
2069  m_SortHit.push_back(each_hit);
2070  }
2071 
2072  //sort hit based on the top two hsp
2073  stable_sort(m_SortHit.begin(), m_SortHit.begin() + count, SortHitByTopHspScores);
2074 
2075  int num_hits = (int)m_SortHit.size();
2076  int num_hsp = (int)input_hits.Get().size();
2077  int hsp_hit_ratio = 0;
2078 
2079  if (num_hits > 0) {
2080  hsp_hit_ratio = num_hsp/num_hits;
2081 
2082  }
2083  cerr << "hit = " << num_hits << " hsp = " << num_hsp
2084  << " hsp/hit ratio = " << hsp_hit_ratio << endl;
2085 
2086 
2087  if (hsp_hit_ratio > 100) {//use itree for many hsp case
2088 
2089  m_UseITree = true;
2090  for (int i = 0; i < (int)m_SortHit.size(); i ++) {
2091 
2092  CIntervalTree* RangeTreeForEachHitPlusStrand = new CIntervalTree;
2093  if ((int)m_SortHit[i].first.size() > m_MaxHSPSize) {
2094  m_MaxHSPSize = (int)m_SortHit[i].first.size();
2095  }
2096  for (int j = 0; j < (int)m_SortHit[i].first.size(); j ++) {
2097  CRef<SHspIndex> index_holder(new SHspIndex);
2098  index_holder->index = j;
2099  CRange<int> temp_master_range(m_SortHit[i].first[j]->master_range.GetFrom(),
2100  m_SortHit[i].first[j]->master_range.GetTo());
2101  RangeTreeForEachHitPlusStrand->Insert(temp_master_range,
2102  static_cast<CConstRef<CObject> > (index_holder));
2103 
2104  }
2105  m_RangeTreeListPlusStrand.push_back(RangeTreeForEachHitPlusStrand);
2106 
2107  CIntervalTree* RangeTreeForEachHitMinusStrand = new CIntervalTree;
2108 
2109  if ((int)m_SortHit[i].second.size() > m_MaxHSPSize) {
2110  m_MaxHSPSize = (int)m_SortHit[i].second.size();
2111  }
2112  for (int j = 0; j < (int)m_SortHit[i].second.size(); j ++) {
2113  CRef<SHspIndex> index_holder(new SHspIndex);
2114  index_holder->index = j;
2115  CRange<int> temp_master_range(m_SortHit[i].second[j]->master_range.GetFrom(),
2116  m_SortHit[i].second[j]->master_range.GetTo());
2117  RangeTreeForEachHitMinusStrand->Insert(temp_master_range,
2118  static_cast<CConstRef<CObject> > (index_holder));
2119 
2120  }
2121  m_RangeTreeListMinusStrand.push_back(RangeTreeForEachHitMinusStrand);
2122  }
2123  } else {
2124  //sort hsp to speed up comparison later
2125  for (int i = 0; i < (int)m_SortHit.size(); i ++) {
2126  stable_sort(m_SortHit[i].first.begin(), m_SortHit[i].first.end(),
2128  stable_sort(m_SortHit[i].second.begin(), m_SortHit[i].second.end(),
2130  if ((int)m_SortHit[i].first.size() > m_MaxHSPSize) {
2131  m_MaxHSPSize = (int)m_SortHit[i].first.size();
2132  }
2133 
2134  if ((int)m_SortHit[i].second.size() > m_MaxHSPSize) {
2135  m_MaxHSPSize = (int)m_SortHit[i].second.size();
2136  }
2137 
2138  }
2139  }
2140 }
2141 
2142 
2143 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
Definition: asci_blk.cpp:1674
CRef< CAlnChunkVec > GetSeqChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
Definition: alnmap.cpp:1039
const CSeq_id & GetSeqId(TNumrow row) const
Definition: alnmap.hpp:645
bool IsPositiveStrand(TNumrow row) const
Definition: alnmap.hpp:600
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488
@ fChunkSameAsSeg
Definition: alnmap.hpp:89
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Definition: alnmap.cpp:688
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
Definition: alnvec.cpp:86
TResidue GetGapChar(TNumrow row) const
Definition: alnvec.hpp:358
void SetEndChar(TResidue gap_char)
Definition: alnvec.hpp:368
void SetGapChar(TResidue gap_char)
Definition: alnvec.hpp:339
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
Definition: alnvec.cpp:145
CBioseq_Handle –.
CConstRef –.
Definition: ncbiobj.hpp:1266
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
Definition: Dense_seg.cpp:1273
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:747
CRange< TSeqPos > GetSeqRange(TDim row) const
Definition: Dense_seg.hpp:234
static string GetLoaderNameFromArgs(CReader *reader=0)
Definition: gbloader.cpp:377
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
const SPrimerInfo * m_PrimerInfo
bool x_IsPcrLengthInRange(const CSeq_align &left_primer_hit_align, const CSeq_align &right_primer_hit_align, bool primers_on_different_strand, ENa_strand hit_strand, int &product_len)
Test if the primer pair generates the pcr product in specified length range and fill the actual lengt...
const COligoSpecificityTemplate * m_Hits
the information about the blast results
SHspIndexInfo * m_HspOverlappingWithLeftPrimer
CRef< CScope > m_Scope
scope to fetch sequence
void CheckSpecificity(const vector< SPrimerInfo > &primer_info_list, int from=0, int to=-1)
check the specificity of the primer pairs.
void x_FindOverlappingHSP(SHspIndexInfo *left_window_index_list, int &left_window_index_list_size, SHspIndexInfo *right_window_index_list, int &right_window_index_list_size, const CRange< TSeqPos > &left_window_desired_range, const CRange< TSeqPos > &right_window_desired_range, ENa_strand hit_strand, TSeqPos hit_index, const vector< SHspInfo * > &hsp_list)
SHspIndexInfo * m_HspOverlappingWithLeftPrimerMinusStrand
void x_AnalyzeLeftAndRightPrimer(const vector< SHspInfo * > &hsp_list, ENa_strand hit_strand, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, TSeqPos hit_index)
void x_AnalyzeTwoPrimers(const TSortedHsp &sorted_hsp, TSeqPos index)
Analyze the the primer pair specificity usign both left and right primer at ends.
COligoSpecificityCheck(const COligoSpecificityTemplate *temp, CScope &scope)
vector< vector< SPrimerHitInfo > > m_SelfHit
the hit represent the input template
vector< vector< SPrimerHitInfo > > m_VariantHit
the hits represent the transcript variants from the same gene as the input template
CRef< CScope > m_FeatureScope
bool x_SequencesMappedToSameTarget(CSeq_id::EAccessionInfo hit_type, const CSeq_align &left_align, const CSeq_align &right_align)
SHspIndexInfo * m_HspOverlappingWithRightPrimerMinusStrand
TSeqPos m_SpecifiedProductLen
the requested pcr length for non-specific template
vector< map< SSlaveRange, CRange< TSeqPos >, slave_range_sort_order > > m_SlaveRangeCache
vector< vector< SPrimerHitInfo > > m_PrimerHit
the non-specific hit for the primer pair
vector< int > m_NumTargetFromSameSequence
max number of targets allowed from a single subject sequence for a primer.
void x_FindMatchInfoForAlignment(CDense_seg &primer_denseg, bool &end_gap, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, CRange< TSignedSeqPos > &aln_range)
void x_SavePrimerInfo(CSeq_align &left_align, CSeq_align &right_align, TSeqPos left_total_mismatch, TSeqPos left_3end_mismatch, TSeqPos left_total_gap, TSeqPos left_3end_gap, TSeqPos right_total_mismatch, TSeqPos right_3end_mismatch, TSeqPos right_total_gap, TSeqPos right_3end_gap, int product_len, TSeqPos index, bool is_self_forward_primer, bool is_self_reverse_primer)
save the primer informaton
void x_AnalyzePrimerSpecificity()
Analyze the primer pair specificity.
vector< const SPrimerInfo * > m_PrimerInfoList
the information about primer to be checked
void x_AnalyzeOnePrimer(const vector< SHspInfo * > &plus_strand_hsp_list, const vector< SHspInfo * > &minus_strand_hsp_list, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, int HspOverlappingWithLeftPrimerMinusStrand_size, int HspOverlappingWithRightPrimerMinusStrand_size, TSeqPos hit_index)
analyze the case where the left primer itself can serve as both left and right primer
CRef< CObjectManager > m_FeatureOM
void x_SortPrimerHit(vector< vector< SPrimerHitInfo > > &primer_hit_list_list)
CRange< TSeqPos > x_GetSlaveRangeGivenMasterRange(const CSeq_align &input_align, CRange< TSeqPos > &master_range, int index)
map< SAlnCache, SPrimerMatch, sort_order > m_Cache
cache coordinate-alignment mapping
CRef< CSeq_align > x_FillGlobalAlignInfo(const CRange< TSeqPos > &desired_align_range, SHspInfo *input_hsp_info, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, TSeqPos index, ENa_strand hit_strand)
return alignment for the full primer window.
SHspIndexInfo * m_HspOverlappingWithRightPrimer
CRef< CDense_seg > x_NW_alignment(const CRange< TSeqPos > &desired_align_range, const CSeq_align &input_hit, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, int &align_length, TSeqPos master_local_start, TSeqPos master_local_stop, ENa_strand hit_strand, bool &nw_align_modified)
vector< vector< SPrimerHitInfo > > m_AllowedHit
the hit that user choose to ingnore for specificity
int m_NumNonSpecificTarget
the number non-specific targets to return
void x_SortHit(CSeq_align_set &input)
sort the hit
CRange< TSeqPos > m_TemplateRange
range on the input template
CConstRef< CSeq_id > m_Id
seqid
COligoSpecificityTemplate(const CBioseq_Handle &template_handle, CSeq_align_set &input_seqalign, CScope &scope, int word_size, TSeqPos allowed_total_mismatch=1, TSeqPos allowed_3end_mismatch=1, TSeqPos max_mismatch=7)
constructor @template_handle: bioseq represents the pcr template
Definition: primercheck.cpp:70
TSeqPos m_MismatchRegionLength3End
the length or region at the 3' end for checking mismatches
vector< TSeqPos > m_AllowedSeqidIndex
user specified hits that can be disregarded for specificity checking
CSeq_id::EAccessionInfo m_TemplateType
const CBioseq_Handle & m_TemplateHandle
bioseq handle for input bioseq
vector< TSortedHsp > m_SortHit
the processed sorted hit list corresponding to the input seqalign
const list< CRef< CSeq_id > > * m_Allowed_Splice_Variants
TSeqPos m_TargetSizeMax
the requested target max length
int m_WordSize
minimal continuous match required
const list< CRef< CSeq_loc > > * m_AllowedSeqloc
vector< CIntervalTree * > m_RangeTreeListMinusStrand
vector< CIntervalTree * > m_RangeTreeListPlusStrand
CScope –.
Definition: scope.hpp:92
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
Definition: Seq_align.cpp:685
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
CSeq_loc_Mapper –.
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
container_type::iterator iterator
Definition: map.hpp:54
Definition: map.hpp:338
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static char tmp[3200]
Definition: utf8.c:42
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
iterator Insert(const interval_type &interval, const mapped_type &value)
Definition: itree.cpp:158
reference GetValue(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:776
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ eAcc_htgs
Definition: Seq_id.hpp:288
@ eAcc_chromosome
Definition: Seq_id.hpp:292
@ eAcc_refseq_genomic
Definition: Seq_id.hpp:430
@ eAcc_con
Definition: Seq_id.hpp:289
@ eAcc_division_mask
Definition: Seq_id.hpp:299
TSeqPos TPoint
Definition: Seq_loc.hpp:102
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
ECompare
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_loc_Mapper_Base & KeepNonmappingRanges(void)
Keep ranges which can not be mapped.
@ eSeqMap_Down
map from a segmented bioseq to segments
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
position_type GetLength(void) const
Definition: range.hpp:158
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
static TThisType GetEmpty(void)
Definition: range.hpp:306
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
bool Empty(void) const
Definition: range.hpp:148
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
vector< CRef< CScore > > TScores
Definition: Dense_seg_.hpp:110
list< CRef< CSeq_align > > Tdata
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
T max(T x_, T y_)
T min(T x_, T y_)
The Object manager core.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
#define count
static bool SortIndexListByScoreDescending(const COligoSpecificityCheck::SHspIndexInfo &info1, const COligoSpecificityCheck::SHspIndexInfo &info2)
USING_SCOPE(objects)
static bool SortPrimerHitByMismatchAscending(const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info1, const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info2)
static bool SortHitByTopHspScores(TSortedHsp const &info1, TSortedHsp const &info2)
static bool SeqLocAllowed(const list< CRef< CSeq_loc > > &allowed_seq, const CSeq_id &hit_id, const CRange< TSeqPos > &hit_range, CScope &scope)
static bool SortPrimerHitInGroupByMismatchAscending(const COligoSpecificityCheck::SPrimerHitInfo *info1, const COligoSpecificityCheck::SPrimerHitInfo *info2)
static const double k_Min_Percent_Identity
Definition: primercheck.cpp:67
static const int k_MaxReliableGapNum
Definition: primercheck.cpp:68
USING_NCBI_SCOPE
Definition: primercheck.cpp:61
CRef< CDense_seg > s_DoNWalign(const CRange< TSeqPos > &desired_align_range, string &master_seq, const CAlnVec &av, TSeqPos hit_full_start, TSeqPos hit_full_stop, ENa_strand hit_strand, string &xcript, bool &nw_align_modified)
static void s_CountGaps(const string &xcript, TSeqPos &master_start_gap, TSeqPos &master_end_gap, TSeqPos &slave_start_gap, TSeqPos &slave_end_gap, char master_gap_char, char slave_gap_char)
static bool SortHspByMasterStartAscending(const SHspInfo *info1, const SHspInfo *info2)
static const double k_MinOverlapLenFactor
Definition: primercheck.cpp:66
primer specificity checking tool
pair< vector< SHspInfo * >, vector< SHspInfo * > > TSortedHsp
Definition: primercheck.hpp:61
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
key for coordinate-alignment cache
primer hit to the blast dababase
value for coordinate-alignment cache
TSeqPos num_3end_mismatch
total mismatchs
CRef< CSeq_align > aln
3' end gaps
TSeqPos num_total_gap
3' end mismatches
CRange< TSeqPos > master_range
Definition: primercheck.hpp:52
CRange< TSeqPos > slave_range
Definition: primercheck.hpp:53
double bit_score
Definition: primercheck.hpp:54
CConstRef< CSeq_align > hsp
Definition: primercheck.hpp:51
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:58:03 2024 by modify_doxy.py rev. 669887