77 : m_TemplateHandle(template_handle),
79 m_WordSize(word_size),
80 m_AllowedTotalMismatch(allowed_total_mismatch),
81 m_Allowed3EndMismatch(allowed_3end_mismatch),
82 m_MaxMismatch(max_mismatch),
84 m_MismatchRegionLength3End(10),
86 m_NumNonSpecificTarget(20),
87 m_MaxTargetPerSequence(100)
90 if(!input_seqalign.
Get().empty()){
154 char master_gap_char,
155 char slave_gap_char) {
157 for(
int i = 0;
i < (
int)xcript.size();
i ++){
158 if (xcript[
i] == master_gap_char) {
165 for(
int i = (
int)xcript.size() - 1;
i >= 0;
i--){
166 if (xcript[
i] == master_gap_char) {
173 if (master_start_gap == 0) {
175 if (xcript[
i] == slave_gap_char) {
182 if (master_end_gap == 0){
183 for(
int i = (
int)xcript.size() - 1;
i >= 0;
i--){
184 if (xcript[
i] == slave_gap_char) {
200 int& max_num_continuous_match,
204 int num_continuous_match = 0;
205 num_total_mismatch = 0;
206 num_3end_mismatch = 0;
209 max_num_continuous_match = 0;
217 string master_string;
223 int master_letter_len = 0;
224 for(
int i=0;
i< (
int)master_string.size();
i++){
225 if (master_string[
i] != gap_char) {
226 master_letter_len ++;
229 for(
int i=0;
i < (
int)master_string.size();
i++){
231 if (master_string[
i] == gap_char) {
232 if (is_left_primer) {
242 }
else if (slave_string[
i] == gap_char) {
243 if (is_left_primer) {
254 }
else if(master_string[
i]!=slave_string[
i]){
255 if (is_left_primer) {
257 num_3end_mismatch ++;
261 num_3end_mismatch ++;
264 num_total_mismatch ++;
269 if(master_string[
i]== slave_string[
i]){
270 num_continuous_match ++;
271 if (max_num_continuous_match < num_continuous_match) {
272 max_num_continuous_match = num_continuous_match;
276 num_continuous_match = 0;
281 if (master_string[0] == gap_char ||
282 master_string[(
int)master_string.size() - 1] == gap_char ||
283 slave_string[0] == gap_char ||
284 slave_string[(
int)slave_string.size() - 1] == gap_char ||
298 bool& nw_align_modified) {
299 nw_align_modified =
false;
309 hit_full_start, hit_seq);
314 GetSeqData(hit_full_start, hit_full_stop + 1, hit_seq);
323 aligner->SetScoreMatrix(
NULL);
325 xcript = aligner->GetTranscriptString();
327 den_ref = aligner->GetDense_seg(desired_align_range.
GetFrom(),
344 s_CountGaps(xcript, master_start_gap, master_end_gap, slave_start_gap, slave_end_gap,
'I',
'D');
345 if (slave_start_gap > 0 || slave_end_gap > 0) {
351 new_hit_full_start =
max((
int)(hit_full_start - slave_start_gap), 0);
353 GetBioseqLength() - 1);
355 new_hit_full_start =
max((
int)(hit_full_start - slave_end_gap), 0);
357 GetBioseqLength() - 1);
365 if (!(new_hit_full_start == hit_full_start &&
366 new_hit_full_stop == hit_full_stop)) {
368 for(
int i = hit_full_start - new_hit_full_start - 1;
i >= 0 ;
i --){
371 for(
int i = (
int)xcript.size() - 1 - (new_hit_full_stop - hit_full_stop);
i < (
int)xcript.size();
i ++) {
375 for(
int i = new_hit_full_stop - hit_full_stop - 1;
i >= 0 ;
i --){
378 for(
int i = (
int)xcript.size() - 1 - (hit_full_start - new_hit_full_start);
i < (
int)xcript.size();
i ++) {
383 hit_full_start = new_hit_full_start;
384 hit_full_stop = new_hit_full_stop;
397 den_ref->
SetIds().push_back(master_id);
398 den_ref->
SetIds().push_back(slave_id);
399 nw_align_modified =
true;
404 if (master_start_gap > 0 || master_end_gap > 0) {
406 xcript = xcript.substr(master_start_gap);
407 xcript = xcript.substr(0, xcript.size() - master_end_gap);
412 new_hit_full_start = hit_full_start + master_start_gap;
413 new_hit_full_stop = hit_full_stop - master_end_gap;
415 new_hit_full_start = hit_full_start + master_end_gap;
416 new_hit_full_stop = hit_full_stop - master_start_gap;
435 den_ref->
SetIds().push_back(master_id);
436 den_ref->
SetIds().push_back(slave_id);
437 nw_align_modified =
true;
462 int& max_num_continuous_match,
467 bool& nw_align_modified) {
476 desired_align_range.
GetTo() + 1, master_seq);
488 TSeqPos full_master_stop =
min(desired_align_range.
GetTo(), master_local_stop);
492 CRange<int> full_master_range(full_master_start, full_master_stop);
500 int longest_chunk_index = 0;
501 int longest_chunk_size = 0;
504 for (
int chunk_index = 0; chunk_index < master_chunk->size(); chunk_index ++) {
506 chunk_ref = (*master_chunk)[chunk_index];
507 if (!chunk_ref->IsGap()) {
508 if (chunk_ref->GetAlnRange().GetLength() > longest_chunk_size) {
509 longest_chunk_size = chunk_ref->GetAlnRange().GetLength();
510 longest_chunk_index = chunk_index;
519 CRange<int> longest_chunk_range = (*master_chunk)[longest_chunk_index]->GetRange();
521 int hit_start_adjust = longest_chunk_range.
GetFrom() -
523 int hit_stop_adjust = desired_align_range.
GetTo() -
524 longest_chunk_range.
GetTo();
530 hit_start_adjust), 0);
536 GetBioseqLength() - 1);
541 hit_stop_adjust), 0);
560 nw_align_modified =
false;
562 master_seq, *av, hit_full_start,
564 hit_strand, xcript, nw_align_modified);
578 int num_continuous_match = 0;
579 max_num_continuous_match = 0;
581 if (!nw_align_modified) {
584 align_length = (
int)xcript.size();
586 ITERATE(
string, iter, xcript) {
596 TSeqPos master_letter_len = (
TSeqPos)xcript.size() - num_master_gap;
598 ITERATE(
string, the_iter, xcript) {
602 if (is_left_primer) {
613 num_continuous_match = 0;
617 if (is_left_primer) {
627 num_continuous_match ++;
628 if (max_num_continuous_match < num_continuous_match) {
629 max_num_continuous_match = num_continuous_match;
634 if (is_left_primer) {
645 num_continuous_match = 0;
649 if (is_left_primer) {
659 num_continuous_match = 0;
670 num_total_mismatch = total_mismatch;
671 num_total_gap = total_insertion + total_deletion;
672 num_3end_mismatch = mismatch_3end;
673 num_3end_gap = insertion_3end + deletion_3end;
696 num_total_mismatch = 0;
697 num_3end_mismatch = 0;
700 int max_num_continuous_match = 0;
707 IntersectionWith(master_range);
735 global_align =
tmp.aln;
736 num_total_mismatch =
tmp.num_total_mismatch;
737 num_3end_mismatch =
tmp.num_3end_mismatch;
738 num_total_gap =
tmp.num_total_gap;
739 num_3end_gap =
tmp.num_3end_gap;
743 bool do_global_alignment =
true;
744 if (desired_align_range.
GetFrom() >= master_local_start &&
745 desired_align_range.
GetTo() <= master_local_stop) {
746 do_global_alignment =
false;
750 desired_align_range.
GetTo());
753 num_3end_mismatch, num_total_gap,num_3end_gap,
755 max_num_continuous_match, aln_range);
757 if (!do_global_alignment) {
758 double percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.
GetLength();
761 num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
762 (num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
763 num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
767 aln_ref->
SetSegs().SetDenseg(*primer_denseg);
769 global_align = aln_ref;
774 if (do_global_alignment) {
775 int align_length = 1;
776 bool nw_align_modified =
false;
777 num_total_mismatch = 0;
778 num_3end_mismatch = 0;
781 max_num_continuous_match = 0;
782 double percent_ident;
784 num_total_mismatch, num_3end_mismatch,
785 num_total_gap, num_3end_gap,
786 is_left_primer, max_num_continuous_match,
789 master_local_stop, hit_strand,
791 if (nw_align_modified) {
793 num_total_mismatch = 0;
794 num_3end_mismatch = 0;
797 max_num_continuous_match = 0;
801 num_3end_mismatch, num_total_gap,num_3end_gap,
803 max_num_continuous_match, aln_range);
805 percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.
GetLength();
808 percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/align_length;
812 num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
813 (num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
814 num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
819 aln_ref->
SetSegs().SetDenseg(*den_ref);
824 global_align = aln_ref;
833 temp_match.
aln = global_align;
835 m_Cache[cache_id] = temp_match;
851 bool same_target =
false;
858 cerr <<
"mapping triggered" << endl;
912 if (backbone_loc && component_loc) {
919 if (backbone_component) {
940 bool allowed =
false;
943 (*iter)->GetTotalRange().IntersectionWith(hit_range).GetLength() >= hit_range.
GetLength()*0.95){
964 bool is_self_forward_primer,
965 bool is_self_reverse_primer)
972 info.product_len = product_len;
973 info.left_total_mismatch = left_total_mismatch;
974 info.left_total_gap = left_total_gap;
975 info.left_3end_mismatch = left_3end_mismatch;
976 info.left_3end_gap = left_3end_gap;
978 info.right_total_mismatch = right_total_mismatch;
979 info.right_total_gap = right_total_gap;
980 info.right_3end_mismatch = right_3end_mismatch;
981 info.right_3end_gap = right_3end_gap;
982 info.aln.first = &left_align;
983 info.aln.second = &right_align;
985 info.self_forward_primer = is_self_forward_primer;
986 info.self_reverse_primer = is_self_reverse_primer;
995 GetBioseqCore()->
GetId(),
1004 if (template_hit_same_id && left_template_aln_overlap && right_template_aln_overlap) {
1023 !template_hit_same_id &&
1027 cerr <<
"self hit by mapping" << endl;
1030 bool hit_assigned =
false;
1033 if (index == *iter) {
1035 hit_assigned =
true;
1047 hit_assigned =
true;
1055 if (!hit_assigned) {
1068 int start1 = 0, start2 = 0;
1073 return start1 <= start2;
1091 int& left_window_index_list_size,
1093 int& right_window_index_list_size,
1098 const vector<SHspInfo*>& hsp_list) {
1099 left_window_index_list_size = 0;
1100 right_window_index_list_size = 0;
1105 left_window_desired_range_int.
SetFrom(left_window_desired_range.
GetFrom());
1106 left_window_desired_range_int.
SetTo(left_window_desired_range.
GetTo());
1109 right_window_desired_range_int.
SetFrom(right_window_desired_range.
GetFrom());
1110 right_window_desired_range_int.
SetTo(right_window_desired_range.
GetTo());
1122 for (; left_window_tree_it; ++ left_window_tree_it) {
1124 if (hsp_list[temp->index]->master_range.IntersectionWith(left_window_desired_range).GetLength() >=
1126 left_window_index_list[left_window_index_list_size].
index = temp->index;
1127 left_window_index_list[left_window_index_list_size].
bit_score = hsp_list[temp->index]->bit_score;
1128 left_window_index_list_size ++;
1131 for (; right_window_tree_it; ++ right_window_tree_it) {
1133 if (hsp_list[temp->index]->master_range.IntersectionWith(right_window_desired_range).GetLength() >=
1135 right_window_index_list[right_window_index_list_size].
index = temp->index;
1136 right_window_index_list[right_window_index_list_size].
bit_score = hsp_list[temp->index]->bit_score;
1137 right_window_index_list_size ++;
1143 for (
int i = 0;
i <(
int) hsp_list.size();
i ++ ) {
1146 if (hsp_list[
i]->master_range.GetFrom() >= right_window_desired_range.
GetTo()) {
1149 if (hsp_list[
i]->master_range.IntersectionWith(left_window_desired_range).
1151 left_window_index_list[left_window_index_list_size].
index =
i;
1152 left_window_index_list[left_window_index_list_size].
bit_score = hsp_list[
i]->bit_score;
1153 left_window_index_list_size ++;
1156 if (hsp_list[
i]->master_range.IntersectionWith(right_window_desired_range).
1158 right_window_index_list[right_window_index_list_size].
index =
i;
1159 right_window_index_list[right_window_index_list_size].
bit_score = hsp_list[
i]->bit_score;
1160 right_window_index_list_size ++;
1168 if (left_window_index_list_size > 0) {
1172 if (right_window_index_list_size > 0) {
1187 int HspOverlappingWithLeftPrimer_size;
1188 int HspOverlappingWithRightPrimer_size;
1189 int HspOverlappingWithLeftPrimerMinusStrand_size;
1190 int HspOverlappingWithRightPrimerMinusStrand_size;
1194 HspOverlappingWithLeftPrimer_size,
1196 HspOverlappingWithRightPrimer_size,
1203 HspOverlappingWithLeftPrimerMinusStrand_size,
1205 HspOverlappingWithRightPrimerMinusStrand_size,
1212 bool analyze_plus_strand_first =
true;
1218 if (sorted_hsp.first.size() > 0) {
1219 if (sorted_hsp.second.size() > 0 &&
1220 sorted_hsp.first[0]->bit_score < sorted_hsp.second[0]->bit_score - 1) {
1221 analyze_plus_strand_first =
false;
1224 analyze_plus_strand_first =
false;
1228 if (analyze_plus_strand_first) {
1232 HspOverlappingWithLeftPrimer_size,
1233 HspOverlappingWithRightPrimer_size,
1239 HspOverlappingWithLeftPrimerMinusStrand_size,
1240 HspOverlappingWithRightPrimerMinusStrand_size,
1248 HspOverlappingWithLeftPrimerMinusStrand_size,
1249 HspOverlappingWithRightPrimerMinusStrand_size,
1254 HspOverlappingWithLeftPrimer_size,
1255 HspOverlappingWithRightPrimer_size,
1261 HspOverlappingWithLeftPrimer_size,
1262 HspOverlappingWithRightPrimer_size,
1263 HspOverlappingWithLeftPrimerMinusStrand_size,
1264 HspOverlappingWithRightPrimerMinusStrand_size,
1284 slave_range = (*ij).second;
1291 ExtractSlice(0, master_range.
GetFrom(), master_range.
GetTo());
1296 cerr <<
"ExtractSlice error = " << e.what() << endl;
1310 int HspOverlappingWithLeftPrimer_size,
1311 int HspOverlappingWithRightPrimer_size,
1316 vector<CRange<TSeqPos> > right_slave_range_array(HspOverlappingWithRightPrimer_size);
1317 for (
int j = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithRightPrimer_size; j ++) {
1325 right_slave_range_array[j] =
1327 right_primer_master_overlap,
1331 int left_primer_hsp_index = 0;
1332 for (
int i = 0;
i < HspOverlappingWithLeftPrimer_size;
i ++) {
1346 bool left_slave_range_filled =
false;
1347 bool left_global_align_filled =
false;
1351 TSeqPos left_total_mismatch = 0;
1353 TSeqPos left_3end_mismatch = 0;
1356 for (
int j = 0; j < HspOverlappingWithRightPrimer_size; j ++) {
1363 if (!left_slave_range_filled) {
1364 left_primer_hit_range =
1366 left_primer_master_overlap,
1368 left_slave_range_filled =
true;
1369 if (left_primer_hit_range.
Empty()) {
1374 TSeqPos left_primer_hit_stop = left_primer_hit_range.
GetTo();
1379 if (right_primer_hit_range.
Empty()) {
1382 TSeqPos right_primer_hit_stop = right_primer_hit_range.
GetTo();
1383 TSeqPos right_primer_hit_start = right_primer_hit_range.
GetFrom();
1389 (left_primer_hit_start - right_primer_hit_stop + 1) +
1394 (right_primer_hit_start - left_primer_hit_stop + 1) +
1399 if (product_len > 0 &&
1403 if (!left_global_align_filled) {
1405 left_primer_hit_global_align
1407 hsp_list[left_primer_hsp_index],
1408 left_total_mismatch,
1412 true, hit_index, hit_strand);
1413 left_global_align_filled =
true;
1414 if (!left_primer_hit_global_align) {
1419 TSeqPos right_total_mismatch = 0;
1421 TSeqPos right_3end_mismatch = 0;
1425 hsp_list[right_hsp_index],
1426 right_total_mismatch, right_3end_mismatch,
1427 right_total_gap, right_3end_gap,
false,
1428 hit_index, hit_strand);
1429 if (right_primer_hit_global_align) {
1431 int pcr_product_len = 0;
1433 *right_primer_hit_global_align,
1439 *right_primer_hit_global_align,
1440 left_total_mismatch,
1441 left_3end_mismatch, left_total_gap, left_3end_gap,
1442 right_total_mismatch, right_3end_mismatch,
1444 right_3end_gap, pcr_product_len,
1445 hit_index,
false,
false);
1458 bool primers_on_different_strand,
1470 if (primers_on_different_strand) {
1472 product_len = right_primer_hit_stop - left_primer_hit_start + 1;
1476 product_len = (left_primer_hit_stop - right_primer_hit_start + 1);
1478 product_len = (right_primer_hit_start - left_primer_hit_stop + 1) +
1496 const vector<SHspInfo*>& minus_strand_hsp_list,
1497 int HspOverlappingWithLeftPrimer_size,
1498 int HspOverlappingWithRightPrimer_size,
1499 int HspOverlappingWithLeftPrimerMinusStrand_size,
1500 int HspOverlappingWithRightPrimerMinusStrand_size,
1503 vector<CRange<TSeqPos> > right_primer_hit_range_array(HspOverlappingWithLeftPrimerMinusStrand_size);
1504 for (
int j = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1506 CRange<TSeqPos> right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1511 right_primer_hit_range_array[j] =
1513 left_primer_window_right_align_overlap,
1517 for (
int i = 0;
i < HspOverlappingWithLeftPrimer_size;
i ++) {
1530 bool left_slave_range_filled =
false;
1531 bool left_global_align_filled =
false;
1532 TSeqPos left_total_mismatch = 0;
1533 TSeqPos left_3end_mismatch = 0;
1538 for (
int j = 0; j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1541 if (!left_slave_range_filled) {
1542 left_primer_hit_range =
1544 left_primer_master_overlap,
1546 left_slave_range_filled =
true;
1547 if (left_primer_hit_range.
Empty()) {
1552 TSeqPos left_primer_hit_stop = left_primer_hit_range.
GetTo();
1555 CRange<TSeqPos> right_primer_hit_range = right_primer_hit_range_array[j];
1556 if (right_primer_hit_range.
Empty()) {
1559 TSeqPos right_primer_hit_start = right_primer_hit_range.
GetFrom();
1563 int product_len = right_primer_hit_start - left_primer_hit_stop + 1 +
1567 if (!(product_len > 0 &&
1573 TSeqPos right_total_mismatch = 0;
1575 TSeqPos right_3end_mismatch = 0;
1578 if (!left_global_align_filled) {
1579 left_primer_hit_global_align =
1581 plus_strand_hsp_list[left_hsp_index],
1582 left_total_mismatch,
1584 left_total_gap, left_3end_gap,
1587 left_global_align_filled =
true;
1588 if(!left_primer_hit_global_align) {
1594 minus_strand_hsp_list[right_hsp_index],
1595 right_total_mismatch,
1596 right_3end_mismatch,
1597 right_total_gap, right_3end_gap,
1600 if (right_primer_hit_global_align) {
1603 int pcr_product_len = 0;
1604 bool valid_pcr_length;
1608 *right_primer_hit_global_align,
1613 if (valid_pcr_length) {
1617 new_align_left = left_primer_hit_global_align;
1618 new_align_right = right_primer_hit_global_align;
1622 left_total_mismatch,
1623 left_3end_mismatch, left_total_gap, left_3end_gap,
1624 right_total_mismatch, right_3end_mismatch,
1626 right_3end_gap, pcr_product_len, hit_index,
true,
false);
1636 vector<CRange<TSeqPos> > right_primer_hit_range_array2(HspOverlappingWithRightPrimerMinusStrand_size);
1637 for (
int j = 0; HspOverlappingWithRightPrimer_size > 0 && j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1640 CRange<TSeqPos> right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1645 right_primer_hit_range_array2[j] =
1647 right_primer_as_3_master_overlap,
1651 for (
int i = 0;
i < HspOverlappingWithRightPrimer_size;
i ++) {
1667 bool right_primer_as_5_slave_range_filled =
false;
1668 bool left_global_align_filled =
false;
1671 TSeqPos left_total_mismatch = 0;
1672 TSeqPos left_3end_mismatch = 0;
1679 for (
int j = 0; j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1682 if (!right_primer_as_5_slave_range_filled ) {
1683 right_primer_as_5_hit_range =
1685 right_primer_as_5_master_overlap,
1687 right_primer_as_5_slave_range_filled =
true;
1688 if (right_primer_as_5_hit_range.
Empty()) {
1695 TSeqPos right_primer_as_5_hit_start = right_primer_as_5_hit_range.
GetFrom();
1697 CRange<TSeqPos> right_primer_as_3_hit_range = right_primer_hit_range_array2[j];
1699 if (right_primer_as_3_hit_range.
Empty()) {
1702 TSeqPos right_primer_as_3_hit_stop = right_primer_as_3_hit_range.
GetTo();
1707 int product_len = right_primer_as_5_hit_start
1708 - right_primer_as_3_hit_stop + 1 +
1712 if (!(product_len > 0 &&
1718 TSeqPos right_total_mismatch = 0;
1720 TSeqPos right_3end_mismatch = 0;
1723 if (!left_global_align_filled) {
1724 left_primer_hit_global_align =
1726 plus_strand_hsp_list[left_hsp_index],
1727 left_total_mismatch,
1729 left_total_gap, left_3end_gap,
1731 left_global_align_filled =
true;
1732 if(!left_primer_hit_global_align) {
1738 right_total_mismatch,
1739 right_3end_mismatch,
1740 right_total_gap, right_3end_gap,
1743 if (right_primer_hit_global_align) {
1746 int pcr_product_len = 0;
1747 bool valid_pcr_length;
1751 *left_primer_hit_global_align,
1756 if (valid_pcr_length) {
1760 new_align_left->
Assign(*left_primer_hit_global_align);
1764 new_align_right->
Assign(*right_primer_hit_global_align);
1770 left_total_mismatch,
1771 left_3end_mismatch, left_total_gap, left_3end_gap,
1772 right_total_mismatch, right_3end_mismatch,
1774 right_3end_gap, pcr_product_len, hit_index,
false,
true);
1804 SPrimerHitInfo* info1,
1806 SPrimerHitInfo* info2) {
1810 mismatch1 =
min(mismatch1,
1811 (
int)info1->right_total_mismatch +
1812 (
int)info1->right_total_gap +
1813 (
int)info1->left_total_mismatch +
1814 (
int)info1->left_total_gap);
1816 mismatch2 =
min(mismatch2,
1817 (
int)info2->right_total_mismatch +
1818 (
int)info2->right_total_gap +
1819 (
int)info2->left_total_mismatch +
1820 (
int)info2->left_total_gap);
1822 return mismatch1 < mismatch2;
1826 SPrimerHitInfo*>* info1,
1828 SPrimerHitInfo*>* info2) {
1831 ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info1) {
1833 mismatch1 =
min(mismatch1,
1834 (
int)(*iter)->right_total_mismatch +
1835 (
int)(*iter)->right_total_gap +
1836 (
int)(*iter)->left_total_mismatch +
1837 (
int)(*iter)->left_total_gap);
1841 ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info2) {
1843 mismatch2 =
min(mismatch2,
1844 (
int)(*iter)->right_total_mismatch +
1845 (
int)(*iter)->right_total_gap +
1846 (
int)(*iter)->left_total_mismatch +
1847 (
int)(*iter)->left_total_gap);
1849 return mismatch1 < mismatch2;
1853 for (
int i = 0;
i < (
int) primer_hit_list_list.size();
i ++){
1854 vector<COligoSpecificityCheck::SPrimerHitInfo>* primer_hit_list = &primer_hit_list_list[
i];
1856 vector<vector<SPrimerHitInfo*>* >
result;
1858 vector<SPrimerHitInfo*>* temp;
1860 NON_CONST_ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo>, iter, *primer_hit_list) {
1861 const CSeq_id& cur_id = iter->aln.first->GetSeq_id(1);
1863 if(previous_id.
Empty()) {
1864 temp =
new vector<SPrimerHitInfo*>;
1865 temp->push_back(&(*iter));
1867 }
else if (cur_id.
Match(*previous_id)){
1868 temp->push_back(&(*iter));
1871 temp =
new vector<SPrimerHitInfo*>;
1872 temp->push_back(&(*iter));
1875 previous_id = &cur_id;
1881 vector<SPrimerHitInfo> temp2;
1886 ITERATE(vector<SPrimerHitInfo*>, iter2, **iter){
1887 temp2.push_back(**iter2);
1892 primer_hit_list->clear();
1893 *primer_hit_list = temp2;
1900 int end = primer_info_list.size();
1901 if (from >= end)
return;
1902 if (to > end || to < 0) to = end;
1903 for (
int i=from;
i<to; ++
i) {
1905 vector<SPrimerHitInfo> temp;
1919 vector<double> score1;
1920 static const int num_hsp = 2;
1921 if (info1.first.size() > 0) {
1923 for (
int i = 0;
i < (
int)info1.first.size() &&
i < num_hsp;
i ++) {
1924 if (
i == 0 || (
i > 0 && !(info1.first[
i]->master_range.IntersectingWith(previous_range)))) {
1925 score1.push_back(info1.first[
i]->bit_score);
1928 previous_range = info1.first[
i]->master_range;
1932 if (info1.second.size() > 0 && (score1.empty() || !(score1[0] > info1.second[0]->bit_score))) {
1934 for (
int i = 0;
i < (
int)info1.second.size() &&
i < num_hsp;
i ++) {
1935 if (
i == 0 || (
i > 0 && !(info1.second[
i]->master_range.IntersectingWith(previous_range)))) {
1936 score1.push_back(info1.second[
i]->bit_score);
1939 previous_range = info1.second[
i]->master_range;
1944 vector<double> score2;
1946 if (info2.first.size() > 0) {
1948 for (
int i = 0;
i < (
int)info2.first.size() &&
i < num_hsp;
i ++) {
1949 if (
i == 0 || (
i > 0 && !(info2.first[
i]->master_range.IntersectingWith(previous_range)))) {
1950 score2.push_back(info2.first[
i]->bit_score);
1953 previous_range = info2.first[
i]->master_range;
1957 if (info2.second.size() > 0 && (score2.empty() || !(score2[0] > info2.second[0]->bit_score))) {
1959 for (
int i = 0;
i < (
int)info2.second.size() &&
i < num_hsp;
i ++) {
1960 if (
i == 0 || (
i > 0 && !(info2.second[
i]->master_range.IntersectingWith(previous_range)))) {
1961 score2.push_back(info2.second[
i]->bit_score);
1964 previous_range = info2.second[
i]->master_range;
1970 stable_sort(score1.begin(), score1.end(), greater<double>());
1972 stable_sort(score2.begin(), score2.end(), greater<double>());
1974 if (score1[0] > score2[0]) {
1976 }
else if (score1[0] < score2[0]) {
1978 }
else if (score1.size() > 1 && score2.size() > 1) {
1980 return (score1[1] > score2[1]);
1981 }
else if (score1.size() > 1) {
1983 }
else if (score2.size() > 1) {
1996 if(input_hits.
Get().empty()) {
2001 bool is_first_aln =
true;
2002 double highest_hit_score = 0;
2003 bool last_highest_hit_score_index_found =
false;
2008 subid = &((*iter)->GetSeq_id(1));
2009 double cur_bit_score = 0;
2011 if (cur_bit_score == 0) {
2014 if (
id.IsStr() &&
id.GetStr() ==
"bit_score") {
2015 cur_bit_score = (*iter_score)->GetValue().GetReal();
2021 if (!is_first_aln && !subid->
Match(*previous_id)) {
2023 if (!last_highest_hit_score_index_found) {
2025 if (cur_bit_score < highest_hit_score) {
2026 last_highest_hit_score_index_found =
true;
2029 if (!(each_hit.first.empty())) {
2030 highest_hit_score =
max(highest_hit_score, each_hit.first[0]->bit_score);
2032 if (!(each_hit.second.empty())) {
2033 highest_hit_score =
max(highest_hit_score, each_hit.second[0]->bit_score);
2045 each_hit.first.clear();
2046 each_hit.second.clear();
2053 each_hit.second.push_back(temp);
2057 each_hit.first.push_back(temp);
2062 is_first_aln =
false;
2063 previous_id = subid;
2068 if(!(each_hit.first.empty() && each_hit.second.empty())) {
2076 int num_hsp = (
int)input_hits.
Get().size();
2077 int hsp_hit_ratio = 0;
2080 hsp_hit_ratio = num_hsp/num_hits;
2083 cerr <<
"hit = " << num_hits <<
" hsp = " << num_hsp
2084 <<
" hsp/hit ratio = " << hsp_hit_ratio << endl;
2087 if (hsp_hit_ratio > 100) {
2098 index_holder->index = j;
2100 m_SortHit[
i].first[j]->master_range.GetTo());
2101 RangeTreeForEachHitPlusStrand->
Insert(temp_master_range,
2112 for (
int j = 0; j < (
int)
m_SortHit[
i].second.size(); j ++) {
2114 index_holder->index = j;
2116 m_SortHit[
i].second[j]->master_range.GetTo());
2117 RangeTreeForEachHitMinusStrand->
Insert(temp_master_range,
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
CRef< CAlnChunkVec > GetSeqChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
const CSeq_id & GetSeqId(TNumrow row) const
bool IsPositiveStrand(TNumrow row) const
TSeqPos GetAlnStop(TNumseg seg) const
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
TResidue GetGapChar(TNumrow row) const
void SetEndChar(TResidue gap_char)
void SetGapChar(TResidue gap_char)
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
CRange< TSeqPos > GetSeqRange(TDim row) const
static string GetLoaderNameFromArgs(CReader *reader=0)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
const SPrimerInfo * m_PrimerInfo
bool x_IsPcrLengthInRange(const CSeq_align &left_primer_hit_align, const CSeq_align &right_primer_hit_align, bool primers_on_different_strand, ENa_strand hit_strand, int &product_len)
Test if the primer pair generates the pcr product in specified length range and fill the actual lengt...
const COligoSpecificityTemplate * m_Hits
the information about the blast results
SHspIndexInfo * m_HspOverlappingWithLeftPrimer
CRef< CScope > m_Scope
scope to fetch sequence
void CheckSpecificity(const vector< SPrimerInfo > &primer_info_list, int from=0, int to=-1)
check the specificity of the primer pairs.
void x_FindOverlappingHSP(SHspIndexInfo *left_window_index_list, int &left_window_index_list_size, SHspIndexInfo *right_window_index_list, int &right_window_index_list_size, const CRange< TSeqPos > &left_window_desired_range, const CRange< TSeqPos > &right_window_desired_range, ENa_strand hit_strand, TSeqPos hit_index, const vector< SHspInfo * > &hsp_list)
SHspIndexInfo * m_HspOverlappingWithLeftPrimerMinusStrand
void x_AnalyzeLeftAndRightPrimer(const vector< SHspInfo * > &hsp_list, ENa_strand hit_strand, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, TSeqPos hit_index)
void x_AnalyzeTwoPrimers(const TSortedHsp &sorted_hsp, TSeqPos index)
Analyze the the primer pair specificity usign both left and right primer at ends.
COligoSpecificityCheck(const COligoSpecificityTemplate *temp, CScope &scope)
vector< vector< SPrimerHitInfo > > m_SelfHit
the hit represent the input template
vector< vector< SPrimerHitInfo > > m_VariantHit
the hits represent the transcript variants from the same gene as the input template
CRef< CScope > m_FeatureScope
bool x_SequencesMappedToSameTarget(CSeq_id::EAccessionInfo hit_type, const CSeq_align &left_align, const CSeq_align &right_align)
SHspIndexInfo * m_HspOverlappingWithRightPrimerMinusStrand
TSeqPos m_SpecifiedProductLen
the requested pcr length for non-specific template
vector< map< SSlaveRange, CRange< TSeqPos >, slave_range_sort_order > > m_SlaveRangeCache
vector< vector< SPrimerHitInfo > > m_PrimerHit
the non-specific hit for the primer pair
vector< int > m_NumTargetFromSameSequence
max number of targets allowed from a single subject sequence for a primer.
void x_FindMatchInfoForAlignment(CDense_seg &primer_denseg, bool &end_gap, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, CRange< TSignedSeqPos > &aln_range)
void x_SavePrimerInfo(CSeq_align &left_align, CSeq_align &right_align, TSeqPos left_total_mismatch, TSeqPos left_3end_mismatch, TSeqPos left_total_gap, TSeqPos left_3end_gap, TSeqPos right_total_mismatch, TSeqPos right_3end_mismatch, TSeqPos right_total_gap, TSeqPos right_3end_gap, int product_len, TSeqPos index, bool is_self_forward_primer, bool is_self_reverse_primer)
save the primer informaton
void x_AnalyzePrimerSpecificity()
Analyze the primer pair specificity.
vector< const SPrimerInfo * > m_PrimerInfoList
the information about primer to be checked
void x_AnalyzeOnePrimer(const vector< SHspInfo * > &plus_strand_hsp_list, const vector< SHspInfo * > &minus_strand_hsp_list, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, int HspOverlappingWithLeftPrimerMinusStrand_size, int HspOverlappingWithRightPrimerMinusStrand_size, TSeqPos hit_index)
analyze the case where the left primer itself can serve as both left and right primer
CRef< CObjectManager > m_FeatureOM
void x_SortPrimerHit(vector< vector< SPrimerHitInfo > > &primer_hit_list_list)
~COligoSpecificityCheck()
CRange< TSeqPos > x_GetSlaveRangeGivenMasterRange(const CSeq_align &input_align, CRange< TSeqPos > &master_range, int index)
map< SAlnCache, SPrimerMatch, sort_order > m_Cache
cache coordinate-alignment mapping
CRef< CSeq_align > x_FillGlobalAlignInfo(const CRange< TSeqPos > &desired_align_range, SHspInfo *input_hsp_info, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, TSeqPos index, ENa_strand hit_strand)
return alignment for the full primer window.
SHspIndexInfo * m_HspOverlappingWithRightPrimer
CRef< CDense_seg > x_NW_alignment(const CRange< TSeqPos > &desired_align_range, const CSeq_align &input_hit, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, int &align_length, TSeqPos master_local_start, TSeqPos master_local_stop, ENa_strand hit_strand, bool &nw_align_modified)
vector< vector< SPrimerHitInfo > > m_AllowedHit
the hit that user choose to ingnore for specificity
int m_NumNonSpecificTarget
the number non-specific targets to return
void x_SortHit(CSeq_align_set &input)
sort the hit
CRange< TSeqPos > m_TemplateRange
range on the input template
CConstRef< CSeq_id > m_Id
seqid
COligoSpecificityTemplate(const CBioseq_Handle &template_handle, CSeq_align_set &input_seqalign, CScope &scope, int word_size, TSeqPos allowed_total_mismatch=1, TSeqPos allowed_3end_mismatch=1, TSeqPos max_mismatch=7)
constructor @template_handle: bioseq represents the pcr template
TSeqPos m_MismatchRegionLength3End
the length or region at the 3' end for checking mismatches
vector< TSeqPos > m_AllowedSeqidIndex
user specified hits that can be disregarded for specificity checking
CSeq_id::EAccessionInfo m_TemplateType
const CBioseq_Handle & m_TemplateHandle
bioseq handle for input bioseq
vector< TSortedHsp > m_SortHit
the processed sorted hit list corresponding to the input seqalign
const list< CRef< CSeq_id > > * m_Allowed_Splice_Variants
TSeqPos m_TargetSizeMax
the requested target max length
int m_WordSize
minimal continuous match required
int m_MaxTargetPerSequence
~COligoSpecificityTemplate()
const list< CRef< CSeq_loc > > * m_AllowedSeqloc
vector< CIntervalTree * > m_RangeTreeListMinusStrand
vector< CIntervalTree * > m_RangeTreeListPlusStrand
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
container_type::iterator iterator
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
iterator Insert(const interval_type &interval, const mapped_type &value)
reference GetValue(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
EAccessionInfo
For IdentifyAccession (below)
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static int WorstRank(const CRef< CSeq_id > &id)
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_loc_Mapper_Base & KeepNonmappingRanges(void)
Keep ranges which can not be mapped.
@ eSeqMap_Down
map from a segmented bioseq to segments
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
position_type GetLength(void) const
bool IntersectingWith(const TThisType &r) const
TThisType IntersectionWith(const TThisType &r) const
static TThisType GetEmpty(void)
TThisType & Set(position_type from, position_type to)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
TId GetId(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CScore > > TScores
list< CRef< CSeq_align > > Tdata
TIds & SetIds(void)
Assign a value to Ids data member.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
E_Choice Which(void) const
Which variant is currently selected.
const TId & GetId(void) const
Get the Id member data.
unsigned int
A callback function used to compare two keys in a database.
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
static bool SortIndexListByScoreDescending(const COligoSpecificityCheck::SHspIndexInfo &info1, const COligoSpecificityCheck::SHspIndexInfo &info2)
static bool SortPrimerHitByMismatchAscending(const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info1, const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info2)
static bool SortHitByTopHspScores(TSortedHsp const &info1, TSortedHsp const &info2)
static bool SeqLocAllowed(const list< CRef< CSeq_loc > > &allowed_seq, const CSeq_id &hit_id, const CRange< TSeqPos > &hit_range, CScope &scope)
static bool SortPrimerHitInGroupByMismatchAscending(const COligoSpecificityCheck::SPrimerHitInfo *info1, const COligoSpecificityCheck::SPrimerHitInfo *info2)
static const double k_Min_Percent_Identity
static const int k_MaxReliableGapNum
CRef< CDense_seg > s_DoNWalign(const CRange< TSeqPos > &desired_align_range, string &master_seq, const CAlnVec &av, TSeqPos hit_full_start, TSeqPos hit_full_stop, ENa_strand hit_strand, string &xcript, bool &nw_align_modified)
static void s_CountGaps(const string &xcript, TSeqPos &master_start_gap, TSeqPos &master_end_gap, TSeqPos &slave_start_gap, TSeqPos &slave_end_gap, char master_gap_char, char slave_gap_char)
static bool SortHspByMasterStartAscending(const SHspInfo *info1, const SHspInfo *info2)
static const double k_MinOverlapLenFactor
primer specificity checking tool
pair< vector< SHspInfo * >, vector< SHspInfo * > > TSortedHsp
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
key for coordinate-alignment cache
primer hit to the blast dababase
value for coordinate-alignment cache
TSeqPos num_3end_mismatch
total mismatchs
CRef< CSeq_align > aln
3' end gaps
TSeqPos num_3end_gap
total gaps
TSeqPos num_total_mismatch
TSeqPos num_total_gap
3' end mismatches
const CSeq_align * align_index
CRange< TSeqPos > master_range
CRange< TSeqPos > slave_range
CConstRef< CSeq_align > hsp