NCBI C++ ToolKit
vectorscreen.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vectorscreen.cpp 102916 2024-08-06 15:09:32Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  *
28  * @file vectorscreen.cpp
29  * vector screen graphic (using HTML table)
30  *
31  */
32 #include <ncbi_pch.hpp>
34 #include <util/range.hpp>
35 #include <serial/iterator.hpp>
36 #include <objects/seq/Bioseq.hpp>
47 #include <html/html.hpp>
48 
51 BEGIN_SCOPE(align_format)
52 
53 
55 
56 //Note these arrays are in the order strong, moderate, weak
58 static const int kInternalMatchScore[kNumSeqalignMatchTypes] = {30, 25, 23};
59 // + 1 for suspected match
60 static const char* kGif[kNumSeqalignMatchTypes + 2] =
61  {"red.gif", "purple.gif", "green.gif", "yellow.gif", "white.gif"};
62 static const char* kGifLegend[] =
63  {"Strong", "Moderate", "Weak", "Suspect"};
64 static const char* kMatchUrlLegend[] =
65  {"Strong match", "Moderate match", "Weak match", "Suspect origin"};
66 
67 static const TSeqPos kSupectLength = 50;
68 
69 static const TSeqPos kMasterPixel = 600;
70 static const TSeqPos kBarHeight = 20;
71 static const TSeqPos kNumScales = 5;
72 
73 
74 ///Returns a string concerning the strength of the match for a given enum value
75 string
77 {
78  if (match_type == CVecscreen::eNoMatch)
79  return NcbiEmptyString;
80  return string(kGifLegend[match_type]);
81 }
82 
83 
84 ///group hsp with same id togather
85 ///@param seqalign: the seqalign
86 ///
87 static void s_RestoreHspPos(CSeq_align_set& seqalign){
88  CSeq_align_set::Tdata::iterator next_iter;
89  CSeq_align_set::Tdata::iterator cur_iter = seqalign.Set().begin();
90 
91  while(cur_iter != seqalign.Set().end()){
92  bool is_first = true;
93  next_iter = cur_iter;
94  next_iter ++;
95  const CSeq_id& cur_id = (*cur_iter)->GetSeq_id(1);
96  while(next_iter != seqalign.Set().end()){
97  //only care the ones starting from the next next one
98  //because we don't need to do anything for the next one
99  if(is_first){
100  next_iter ++;
101  is_first = false;
102  }
103  if(next_iter != seqalign.Set().end()){
104  const CSeq_id& next_id = (*next_iter)->GetSeq_id(1);
105  if (cur_id.Match(next_id)){
106  CSeq_align_set::Tdata::iterator temp_iter = next_iter;
107  next_iter ++;
108  //insert after cur_iter
109  cur_iter ++;
110  seqalign.Set().insert(cur_iter, *temp_iter);
111  //move back to the newly inserted one
112  cur_iter --;
113  seqalign.Set().erase(temp_iter);
114  } else {
115  next_iter ++;
116  }
117  }
118  }
119  cur_iter ++;
120  }
121 
122 }
123 
124 ///Sort on seqalign range from
125 ///@param info1: the first seqalign
126 ///@param info2: the second seqalign
127 ///
129  CRef<CSeq_align> const& info2)
130 {
131  int score1, score2, sum_n, num_ident;
132  double bits, evalue;
133  list<TGi> use_this_gi;
134  TSeqPos from1, from2;
135 
136  CAlignFormatUtil::GetAlnScores(*info1, score1, bits, evalue,
137  sum_n, num_ident, use_this_gi);
138  CAlignFormatUtil::GetAlnScores(*info2, score2, bits, evalue,
139  sum_n, num_ident, use_this_gi);
140  from1 = info1->GetSeqRange(0).GetFrom();
141  from2 = info2->GetSeqRange(0).GetFrom();
142  if(from1 == from2) {
143  return score2 > score1;
144  } else {
145  return from1 < from2;
146  }
147 }
148 
149 
150 ///Sort on sealign score
151 ///@param info1: the first seqalign
152 ///@param info2: the second seqalign
153 ///
154 static bool AlnScoreDescendingSort(CRef<CSeq_align> const& info1,
155  CRef<CSeq_align> const& info2)
156 {
157  int score1, score2, sum_n, num_ident;
158  double bits, evalue;
159  list<TGi> use_this_gi;
160 
161  CAlignFormatUtil::GetAlnScores(*info1, score1, bits, evalue,
162  sum_n, num_ident, use_this_gi);
163  CAlignFormatUtil::GetAlnScores(*info2, score2, bits, evalue,
164  sum_n, num_ident, use_this_gi);
165 
166  return (score1 > score2);
167 }
168 
169 
170 void CVecscreen::x_GetEdgeRanges(const objects::CSeq_align& seqalign,
171  TSeqPos master_len,
172  TSeqPos& start_edge,
173  TSeqPos& end_edge)
174 {
175  int score, sum_n, num_ident;
176  TSeqPos aln_start, aln_stop;
177  double bits, evalue;
178  list<TGi> use_this_gi;
179 
180  aln_start = min(seqalign.GetSeqRange(0).GetTo(),
181  seqalign.GetSeqRange(0).GetFrom());
182  aln_stop = max(seqalign.GetSeqRange(0).GetTo(),
183  seqalign.GetSeqRange(0).GetFrom());
184  CAlignFormatUtil::GetAlnScores(seqalign, score, bits, evalue,
185  sum_n, num_ident,use_this_gi);
186  if(aln_start < m_TerminalFlexibility ){
187  if (aln_stop > start_edge) {
188  if(score >= kTerminalMatchScore[eStrong]){
189  start_edge=aln_stop;
190  } else if (score >= kTerminalMatchScore[eModerate]){
191  start_edge=aln_stop;
192  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
193  start_edge=aln_stop;
194  }
195  }
196  } else if (aln_stop > master_len - 1 - m_TerminalFlexibility){
197  if (aln_start < end_edge) {
198  if(score >= kTerminalMatchScore[eStrong]){
199  end_edge = aln_start;
200  } else if (score >= kTerminalMatchScore[eModerate]){
201  end_edge = aln_start;
202  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
203  end_edge = aln_start;
204  }
205 
206  }
207  }
208 }
209 
211  TSeqPos master_len,
212  TSeqPos start_edge,
213  TSeqPos end_edge)
214 {
215  int score, sum_n, num_ident;
216  TSeqPos aln_start, aln_stop;
217  double bits, evalue;
218  list<TGi> use_this_gi;
219 
220  aln_start = min(seqalign.GetSeqRange(0).GetTo(),
221  seqalign.GetSeqRange(0).GetFrom());
222  aln_stop = max(seqalign.GetSeqRange(0).GetTo(),
223  seqalign.GetSeqRange(0).GetFrom());
224  CAlignFormatUtil::GetAlnScores(seqalign, score, bits, evalue,
225  sum_n, num_ident,use_this_gi);
226 
227  if(aln_start < m_TerminalFlexibility ||
228  aln_stop > master_len - 1 - m_TerminalFlexibility){
229  //terminal match
230  if(score >= kTerminalMatchScore[eStrong]){
231  return eStrong;
232  } else if (score >= kTerminalMatchScore[eModerate]){
233  return eModerate;
234  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
235  return eWeak;
236  }
237  } else if ((int)aln_start - (int)start_edge <= 1 ||
238  (int)aln_stop + 1 >= (int) end_edge){
239  //terminal match if abutting or within another terminal hit
240  if(score >= kTerminalMatchScore[eStrong]){
241  return eStrong;
242  } else if (score >= kTerminalMatchScore[eModerate]){
243  return eModerate;
244  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
245  return eWeak;
246  }
247  } else {
248  //internal match
249  if(score >= kInternalMatchScore[eStrong]){
250  return eStrong;
251  } else if (score >= kInternalMatchScore[eModerate]){
252  return eModerate;
253  } else if (score >= kInternalMatchScore[eWeak] && m_ShowWeakMatch){
254  return eWeak;
255  }
256  }
257  return eNoMatch;
258 }
259 
261  CSeq_align_set& seqalign_lower)
262 {
263  //get merged range for higher seqalign
264  map<TSeqRange, int> range_to_id; // id is whichever came first, not perfect
265  list<CRange<TSeqPos> > range_list;
266  CRange<TSeqPos> prev_range, cur_range;
267  int j = 0;
268  ITERATE(CSeq_align_set::Tdata, iter, seqalign_higher.Get()){
269  cur_range.Set((*iter)->GetSeqRange(0).GetFrom(),
270  (*iter)->GetSeqRange(0).GetTo());
271  //merge if previous range intersect with current range
272  if(j > 0){
273  prev_range = range_list.back();
274  if(prev_range.IntersectingWith(cur_range)){
275  range_list.back() =
276  range_list.back().CombinationWith(cur_range);
277  range_to_id[range_list.back()] = range_to_id[prev_range];
278  } else {
279  range_to_id[cur_range]=x_GetId(**iter);
280  range_list.push_back(cur_range);
281  }
282  } else {
283  range_to_id[cur_range]=x_GetId(**iter);
284  range_list.push_back(cur_range); //store current range
285  }
286  j ++;
287  }
288 
289  //merge lower rank seqalign if it's contained in higher rank seqalign
290  //or if it's contained in the new range formed by higher and lower
291  //seqalign with a higher score
292  seqalign_lower.Set().sort(AlnScoreDescendingSort);
293 
294  NON_CONST_ITERATE(list<CRange<TSeqPos> >, iter_higher, range_list){
295  CSeq_align_set::Tdata::iterator iter_lower =
296  seqalign_lower.Set().begin();
297  while(iter_lower != seqalign_lower.Set().end()){
298  if((*iter_lower)->GetSeqRange(0).GetFrom() >=
299  iter_higher->GetFrom() &&
300  (*iter_lower)->GetSeqRange(0).GetTo() <=
301  iter_higher->GetTo()){
302 
303  x_DropToKeepMap[x_GetId(**iter_lower)] = range_to_id[*iter_higher];
304  x_IdToDropIdMap[range_to_id[*iter_higher]].push_back(x_GetId(**iter_lower));
305 
306  CSeq_align_set::Tdata::iterator temp_iter = iter_lower;
307  iter_lower ++;
308  seqalign_lower.Set().erase(temp_iter);
309  } else if ((*iter_lower)->GetSeqRange(0).
310  IntersectingWith(*iter_higher)){
311  int id= range_to_id[*iter_higher];
312  CRange<TSeqPos> lower_range = (*iter_lower)->GetSeqRange(0);
313  *iter_higher =
314  iter_higher->CombinationWith(lower_range);
315  iter_lower ++;
316  range_to_id[*iter_higher]=id;
317  }else {
318  iter_lower ++;
319  }
320  }
321  }
322 
323 }
324 
325 CVecscreen::CVecscreen(const CSeq_align_set& seqalign, TSeqPos master_length, TSeqPos terminal_flexibility){
326  m_SeqalignSetRef = &seqalign;
327  m_ImagePath = "./";
328  m_MasterLen = master_length;
330  m_HelpDocsUrl = "//www.ncbi.nlm.nih.gov/tools/vecscreen/about/";
331  m_ShowWeakMatch = true;
332  m_TerminalFlexibility = terminal_flexibility;
333 }
334 
336 {
337  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
338  delete (*iter);
339  }
340 }
341 
342 
344 {
345  //seqalign is presorted by score already. Delete ones that are contained
346  //in seqaligns with higher scores
347  CSeq_align_set::Tdata::iterator next_iter;
348  CSeq_align_set::Tdata::iterator cur_iter = seqalign.Set().begin();
349 
350  while(cur_iter != seqalign.Set().end()){
351  next_iter = cur_iter;
352  next_iter ++;
353 
354  CRange<TSeqPos> cur_range = (*cur_iter)->GetSeqRange(0);
355  while(next_iter != seqalign.Set().end()){
356  CRange<TSeqPos> next_range = (*next_iter)->GetSeqRange(0);
357  if (cur_range.GetFrom() <= next_range.GetFrom() &&
358  cur_range.GetTo() >= next_range.GetTo()){
359  //if cur_range contains next_range
360  x_DropToKeepMap[x_GetId(**next_iter)] = x_GetId(**cur_iter);
361  x_IdToDropIdMap[x_GetId(**cur_iter)].push_back(x_GetId(**next_iter));
362  CSeq_align_set::Tdata::iterator temp_iter = next_iter;
363  next_iter ++;
364  seqalign.Set().erase(temp_iter);
365  } else if (cur_range.IntersectingWith(next_range)){
366  cur_range =
367  cur_range.CombinationWith(next_range);
368  next_iter ++;
369  } else {
370  next_iter ++;
371  }
372  }
373  cur_iter ++;
374  }
375 }
376 
378 {
379 
380  //different match types, no eSuspect or eNoMatch
381  //as they are not contained in seqalign
382  vector<CRef<CSeq_align_set> > catagorized_seqalign(kNumSeqalignMatchTypes);
383  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
384  catagorized_seqalign[i] = new CSeq_align_set;
385  }
386 
387  //find edges of terminal hits
388 
389  TSeqPos start_edge = 0, end_dege = m_MasterLen - 1 ;
390  ITERATE(CSeq_align_set::Tdata, iter, seqalign.Get()){
391  x_GetEdgeRanges(**iter, m_MasterLen, start_edge, end_dege);
392  }
393 
394  //seperate seqalign with different catagory
395  int next_id=1;
396  x_OrigAlignsById.push_back(CRef<CSeq_align>()); // zero is not being used
397  ITERATE(CSeq_align_set::Tdata, iter, seqalign.Get()){
398  MatchType type = x_GetMatchType(**iter, m_MasterLen, start_edge, end_dege);
399  if(type != eNoMatch){
400  CRef<CSeq_align> new_align(new CSeq_align);
401  new_align->Assign(**iter);
402  if(new_align->GetSeqStrand(0) == eNa_strand_minus){
403  new_align->Reverse();
404  }
405  new_align->SetNamedScore("vs_match_type", type);
406  new_align->SetNamedScore("vs_id", next_id); next_id++;
407  x_OrigAlignsById.push_back(new_align);
408  catagorized_seqalign[type]->Set().push_back(new_align);
409  }
410  }
411 
412  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
413  //sort for x_MergeInclusiveSeqalign
414  catagorized_seqalign[i]->Set().sort(AlnScoreDescendingSort);
415  x_MergeInclusiveSeqalign(*(catagorized_seqalign[i]));
416  //restore alnrangesort
417  catagorized_seqalign[i]->Set().sort(AlnFromRangeAscendingSort);
418  }
419 
420 
421  for(int i = eStrong; i < kNumSeqalignMatchTypes - 1 ; i ++){
422  for(int j = i + 1; j < kNumSeqalignMatchTypes; j ++){
423  x_MergeLowerRankSeqalign(*(catagorized_seqalign[i]),
424  *(catagorized_seqalign[j]));
425  }
426  }
427  //set final seqalign
428  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
429  //restore alnrangesort
430  catagorized_seqalign[i]->Set().sort(AlnFromRangeAscendingSort);
431  ITERATE(CSeq_align_set::Tdata, iter, catagorized_seqalign[i]->Get()){
432  m_FinalSeqalign->Set().push_back(*iter);
433 
434  }
435  }
436 
437  x_BuildNonOverlappingRange(catagorized_seqalign);
438 
439 
440  NON_CONST_ITERATE(list<AlnInfo*>, aln_info_iter, m_AlnInfoList) {
441  AlnInfo::TAlignList aligns_to_add;
442  ITERATE(AlnInfo::TAlignList, aln_iter, (*aln_info_iter)->get_aligns()) {
443  set<int> drop_ids;
444  x_GetAllDropIdsForKeepId(x_GetId(**aln_iter), drop_ids);
445  ITERATE(set<int>, di, drop_ids) {
446  const CSeq_align& da = *x_OrigAlignsById[*di];
447  aligns_to_add.push_back(x_OrigAlignsById[*di]);
448  }
449  }
450  (*aln_info_iter)->add_drops(aligns_to_add);
451  }
452 }
453 
455  CRef<CHTML_table> tbl;
456  CRef<CHTML_img> image;
457  CHTML_tc* tc;
458  double pixel_factor = ((double)kMasterPixel)/m_MasterLen;
459  int column = 0;
460 
461  if(m_AlnInfoList.empty()){
462  return;
463  }
464 
465  //title
466  CRef<CHTML_b> b(new CHTML_b);
467  b->AppendPlainText("Distribution of Vector Matches on the Query Sequence");
468  b->Print(out, CNCBINode::eXHTML);
469  out << "\n\n";
470 
471  tbl = new CHTML_table;
472  tbl->SetCellSpacing(0)->SetCellPadding(0)->SetAttribute("border", "0");
473  tbl->SetAttribute("width", kNumScales*kMasterPixel/(kNumScales - 1));
474 
475  //scale bar
476  double scale = ((double)m_MasterLen)/(kNumScales - 1);
477  for(TSeqPos i = 0; i < kNumScales; i ++){
478  CNodeRef font(new CHTML_font(1, true, NStr::IntToString((int)(scale*i == 0?
479  1 : scale*i))));
480  tc = tbl->InsertAt(0, column, font);
481  tc->SetAttribute("align", "LEFT");
482  tc->SetAttribute("valign", "CENTER");
483  tc->SetAttribute("width", kMasterPixel/(kNumScales - 1));
484  column ++;
485  }
486  tbl->Print(out, CNCBINode::eXHTML);
487  //the actual bar
488 
489  column = 0;
490  tbl = new CHTML_table;
491  tbl->SetCellSpacing(0)->SetCellPadding(0)->SetAttribute("border", "0");
492 
493  int width_adjust = 1;
494  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
495  double width = (*iter)->range.GetLength()*pixel_factor;
496  //rounding to int this way as round() is not portable
497  width = width + (width < 0.0 ? -0.5 : 0.5);
498  if(((int)width) > 1){
499  //no show for less than one pixel as the border already
500  //looks like one pixel
501  //width_adjust to compensate for the border width
502 
503  image = new CHTML_img(m_ImagePath + kGif[(*iter)->type],
504  (int)width - width_adjust, kBarHeight);
505  image->SetAttribute("border", 1);
506  tc = tbl->InsertAt(0, column, image);
507  tc->SetAttribute("align", "LEFT");
508  tc->SetAttribute("valign", "CENTER");
509  column ++;
510  }
511  }
512  tbl->Print(out, CNCBINode::eXHTML);
513  out << "\n\n";
514 
515  //legend
516  b = new CHTML_b;
517  b->AppendPlainText("Match to Vector: ");
518  b->Print(out, CNCBINode::eXHTML);
519  for(int i = 0; i < kNumSeqalignMatchTypes; i++){
520  image = new CHTML_img(m_ImagePath + kGif[i], kBarHeight, kBarHeight);
521  image->SetAttribute("border", "1");
522  image->Print(out, CNCBINode::eXHTML);
523  b = new CHTML_b;
524  b->AppendPlainText(string(" ") + kGifLegend[i] + " ");
525  b->Print(out, CNCBINode::eXHTML);
526  }
527  out << "\n";
528  //suspected origin
529  b = new CHTML_b;
530  b->AppendPlainText("Segment of suspect origin: ");
531  b->Print(out, CNCBINode::eXHTML);
533  image->SetAttribute("border", "1");
534  image->Print(out, CNCBINode::eXHTML);
535 
536  //footnote
537  out << "\n\n";
538  b = new CHTML_b;
539  b->AppendPlainText("Segments matching vector: ");
540  b->Print(out, CNCBINode::eXHTML);
542 
543  for (int i = 0; i < kNumSeqalignMatchTypes + 1; i ++){
544  bool is_first = true;
545  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
546  if((*iter)->type == i){
547  if(is_first){
548  out << "\n";
549  a = new CHTML_a(m_HelpDocsUrl + "#" +
550  kGifLegend[(*iter)->type]);
551  a->SetAttribute("TARGET", "VecScreenInfo");
552  a->AppendPlainText(string(kMatchUrlLegend[(*iter)->type]) + ":");
553  a->Print(out, CNCBINode::eXHTML);
554  is_first = false;
555  } else {
556  out << ",";
557  }
558  if((*iter)->range.GetFrom() == (*iter)->range.GetTo()){
559  out << " " << (*iter)->range.GetFrom() + 1;
560  } else {
561  out << " " << (*iter)->range.GetFrom() + 1 << "-"
562  << (*iter)->range.GetTo() + 1;
563  }
564 
565  }
566  }
567  }
568 
569  out << "\n\n";
570 }
571 
572 
574  CSeq_align_set actual_aln_list;
577  x_MergeSeqalign(actual_aln_list);
578  //x_BuildHtmlBar(out);
581  return m_FinalSeqalign;
582 }
583 
586 }
587 
589  const AlnInfo::TAlignList aligns){
590  AlnInfo* aln_info = new AlnInfo;
591  aln_info->range.Set(from, to);
592  aln_info->type = type;
593  aln_info->add_aligns(aligns);
594  return aln_info;
595 }
596 
598  seqalign_vec){
599  vector< list<AlnInfo*> > aln_info_vec(seqalign_vec.size());
600  CRange<TSeqPos>* prev_range;
601 
602  //merge overlaps within the same type
603  for(unsigned int i = 0; i < seqalign_vec.size(); i ++){
604  int j = 0;
605  ITERATE(CSeq_align_set::Tdata, iter, seqalign_vec[i]->Get()){
606  AlnInfo* cur_aln_info = new AlnInfo;
607  cur_aln_info->range.Set((*iter)->GetSeqRange(0).GetFrom(),
608  (*iter)->GetSeqRange(0).GetTo());
609  cur_aln_info->type = (MatchType)i;
610  cur_aln_info->add_align(*iter);
611  //merge if previous range intersect with current range
612  if(j > 0){
613  prev_range = &(aln_info_vec[i].back()->range);
614  if(prev_range->IntersectingWith(cur_aln_info->range) ||
615  prev_range->AbuttingWith(cur_aln_info->range)){
616  aln_info_vec[i].back()->range =
617  aln_info_vec[i].back()->range.CombinationWith(cur_aln_info->range);
618  delete cur_aln_info;
619  aln_info_vec[i].back()->add_align(*iter);
620  } else {
621  aln_info_vec[i].push_back(cur_aln_info);
622  }
623  } else {
624  aln_info_vec[i].push_back(cur_aln_info); //store current range
625  }
626  j ++;
627  }
628  }
629 
630  //merge overlapping range of lower ranks to higher rank range
631  for(unsigned int i = 0; i < aln_info_vec.size(); i ++){
632  ITERATE(list<AlnInfo*>, iter_higher, aln_info_vec[i]){
633  for(unsigned int j = i + 1; j < aln_info_vec.size(); j ++){
634  list<AlnInfo*>::iterator iter_temp;
635  list<AlnInfo*>::iterator iter_lower = aln_info_vec[j].begin();
636  while(iter_lower != aln_info_vec[j].end()){
637  CRange<TSeqPos> higher_range, lower_range;
638  higher_range = (*iter_higher)->range;
639  lower_range = (*iter_lower)->range;
640  if((*iter_higher)->range.IntersectingWith((*iter_lower)->range)){
641  //overlaps. Need to handle
642  if((*iter_higher)->range.GetFrom() <=
643  (*iter_lower)->range.GetFrom()){
644  //higher from comes first
645  if((*iter_higher)->range.GetTo() >=
646  (*iter_lower)->range.GetTo()){
647  //higher include lower. delete lower.
648  (*iter_higher)->add_aligns((*iter_lower)->get_aligns());
649  iter_temp = iter_lower;
650  iter_lower ++;
651  aln_info_vec[j].erase(iter_temp);
652  } else {
653  //partially overlaps
654  //reduce the first part of the lower one
655  (*iter_lower)->range.
656  Set((*iter_higher)->range.GetTo() + 1,
657  (*iter_lower)->range.GetTo());
658  iter_lower ++;
659  }
660  } else {
661  //lower from comes first
662  if((*iter_higher)->range.GetTo() <=
663  (*iter_lower)->range.GetTo()){
664  //lower includes higher. need to break up lower
665  //to 3 parts and delete the middle one(included
666  //in higher one)
667 
668  aln_info_vec[j].
669  insert(iter_lower,
670  x_GetAlnInfo((*iter_lower)->range.
671  GetFrom(),
672  (*iter_higher)->range.
673  GetFrom() - 1 ,
674  (MatchType)j,
675  (*iter_lower)->get_aligns() ));
676 
677  if ((*iter_higher)->range.GetTo() <
678  (*iter_lower)->range.GetTo()) {
679  //insert another piece only if lower has extra piece
680  aln_info_vec[j].
681  insert(iter_lower,
682  x_GetAlnInfo((*iter_higher)->range.
683  GetTo() + 1,
684  (*iter_lower)->range.GetTo() ,
685  (MatchType)j,
686  (*iter_lower)->get_aligns() ));
687  }
688 
689  iter_temp = iter_lower;
690  iter_lower ++;
691  aln_info_vec[j].erase(iter_temp);
692 
693  } else {
694  //partially overlap
695  //reduce latter part of lower
696 
697  (*iter_lower)->range.
698  Set((*iter_lower)->range.GetFrom(),
699  (*iter_higher)->range.GetFrom() - 1);
700  iter_lower ++;
701  }
702  }
703  } else {
704  //no overlaps, do nothing
705  if ((*iter_lower)->range.GetFrom() > (*iter_higher)->range.GetFrom()) {
706  //no comparing again as it's already sorted
707  break;
708  }
709  iter_lower ++;
710  }
711  }
712  }
713  }
714  }
715 
716  //Set final list
717  for(unsigned int i = 0; i < aln_info_vec.size(); i++){
718  ITERATE(list<AlnInfo*>, iter, aln_info_vec[i]){
719  m_AlnInfoList.push_back(*iter);
720  }
721 
722  }
724 
725 
726  //adding range for suspected match and no match
727  list<AlnInfo*>::iterator prev_iter = m_AlnInfoList.end();
728  list<AlnInfo*>::iterator cur_iter = m_AlnInfoList.begin();
729  list<AlnInfo*>::iterator temp_iter;
730  int count = 0;
731 
732  while(cur_iter != m_AlnInfoList.end()){
733  if(count > 0){
734  CRange<TSeqPos> prev_range, cur_range;
735  prev_range = (*prev_iter)->range;
736  cur_range = (*cur_iter)->range;
737  int diff = cur_range.GetFrom() - prev_range.GetTo();
738  if(diff >= 2){
739  //no overlaps, insert the range in between
740 
741  MatchType type = ((*cur_iter)->range.GetFrom() - 1) -
742  ((*prev_iter)->range.GetTo() + 1) + 1 > kSupectLength ?
743  eNoMatch : eSuspect;
744 
746  insert(cur_iter,
747  x_GetAlnInfo(prev_range.GetTo() + 1,
748  cur_range.GetFrom() - 1,
749  type));
750  }
751 
752  } else {
753  if((*cur_iter)->range.GetFrom() > 0){
754  //insert the range infront of first align range
755  MatchType type = ((*cur_iter)->range.GetFrom() - 1) + 1 >
756  kSupectLength ?
757  eNoMatch : eSuspect;
759  insert(cur_iter,
760  x_GetAlnInfo(0, (*cur_iter)->range.GetFrom() - 1,
761  type));
762  }
763  }
764  prev_iter = cur_iter;
765  cur_iter ++;
766  count ++;
767  }
768 
769  //add the last possible no match range
770  if(prev_iter != m_AlnInfoList.end()){
771  if(m_MasterLen -1 > (*prev_iter)->range.GetTo()){
773  ((*prev_iter)->range.GetTo() + 1) +1 >
774  kSupectLength ?
775  eNoMatch : eSuspect;
777  push_back(x_GetAlnInfo((*prev_iter)->range.GetTo() + 1,
778  m_MasterLen - 1,
779  type));
780  }
781  }
782 }
783 
784 END_SCOPE(align_format)
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set &target, const objects::CSeq_align_set &source)
If a Seq-align-set contains Seq-aligns with discontinuous type segments, extract the underlying Seq-a...
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
Definition: Seq_align.cpp:685
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
void x_MergeSeqalign(objects::CSeq_align_set &seqalign)
merge overlapping seqalign
MatchType
vector match defines
TIdToDropIdMap x_IdToDropIdMap
AlnInfo * x_GetAlnInfo(TSeqPos from, TSeqPos to, MatchType type, const AlnInfo::TAlignList aligns=AlnInfo::TAlignList())
get align info
CVecscreen(const objects::CSeq_align_set &seqalign, TSeqPos master_length, TSeqPos terminal_flexibility=25)
Constructors.
void x_MergeInclusiveSeqalign(objects::CSeq_align_set &seqalign)
merge a seqalign if its range is in another seqalign
TSeqPos m_TerminalFlexibility
void x_BuildHtmlBar(CNcbiOstream &out)
Output the graphic.
static string GetStrengthString(MatchType match_type)
Returns a string concerning the strength of the match for a given enum value.
void x_GetEdgeRanges(const objects::CSeq_align &seqalign, TSeqPos master_len, TSeqPos &start_edge, TSeqPos &end_edge)
list< AlnInfo * > m_AlnInfoList
internal match list
CConstRef< objects::CSeq_align_set > m_SeqalignSetRef
the current seqalign
int x_GetId(const objects::CSeq_align &a)
void x_MergeLowerRankSeqalign(objects::CSeq_align_set &seqalign_higher, objects::CSeq_align_set &seqalign_lower)
merge a seqalign if its range is in another higher ranked seqalign
~CVecscreen()
Destructor.
string m_ImagePath
gif image file path
string m_HelpDocsUrl
help url
static bool FromRangeAscendingSort(AlnInfo *const &info1, AlnInfo *const &info2)
Sort on range from.
MatchType x_GetMatchType(const objects::CSeq_align &seqalign, TSeqPos master_len, TSeqPos start_edge, TSeqPos end_edge)
Get match type.
void VecscreenPrint(CNcbiOstream &out)
show alignment graphic view
TSeqPos m_MasterLen
master seq length
vector< CRef< objects::CSeq_align > > x_OrigAlignsById
bool m_ShowWeakMatch
Show weak match?
TDropToKeepMap x_DropToKeepMap
void x_BuildNonOverlappingRange(vector< CRef< objects::CSeq_align_set > > seqalign_vec)
Build non overlapping internal match list.
void x_GetAllDropIdsForKeepId(int keep_id, set< int > &drop_ids)
CRef< objects::CSeq_align_set > ProcessSeqAlign(void)
Process alignment to show.
CRef< objects::CSeq_align_set > m_FinalSeqalign
the processed seqalign
Definition: map.hpp:338
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
static const char * column
Definition: stats.c:23
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:690
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void SetAttribute(const string &name, const string &value)
@ eXHTML
Definition: node.hpp:111
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
TThisType CombinationWith(const TThisType &r) const
Definition: range.hpp:358
bool AbuttingWith(const TThisType &r) const
Definition: range.hpp:336
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
#define NcbiEmptyString
Definition: ncbistr.hpp:122
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
USING_SCOPE(objects)
HTML classes.
int i
const TYPE & Get(const CNamedParameterList *param)
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
T min(T x_, T y_)
static void s_RestoreHspPos(CSeq_align_set &seqalign)
group hsp with same id togather
static const TSeqPos kSupectLength
static bool AlnScoreDescendingSort(CRef< CSeq_align > const &info1, CRef< CSeq_align > const &info2)
Sort on sealign score.
static const int kTerminalMatchScore[kNumSeqalignMatchTypes]
static const char * kGif[kNumSeqalignMatchTypes+2]
static const char * kMatchUrlLegend[]
static bool AlnFromRangeAscendingSort(CRef< CSeq_align > const &info1, CRef< CSeq_align > const &info2)
Sort on seqalign range from.
static const char * kGifLegend[]
static const TSeqPos kMasterPixel
static const TSeqPos kNumScales
static const int kNumSeqalignMatchTypes
static const TSeqPos kBarHeight
static const int kInternalMatchScore[kNumSeqalignMatchTypes]
#define count
void add_align(CRef< objects::CSeq_align > a)
void add_aligns(const TAlignList &al)
list< CRef< objects::CSeq_align > > TAlignList
Definition: type.c:6
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:15 2024 by modify_doxy.py rev. 669887