NCBI C++ ToolKit
vectorscreen.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vectorscreen.cpp 98214 2022-10-12 15:36:06Z boukn $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  *
28  * @file vectorscreen.cpp
29  * vector screen graphic (using HTML table)
30  *
31  */
32 #include <ncbi_pch.hpp>
34 #include <util/range.hpp>
35 #include <serial/iterator.hpp>
36 #include <objects/seq/Bioseq.hpp>
47 #include <html/html.hpp>
48 
51 BEGIN_SCOPE(align_format)
52 
53 
55 
56 //Note these arrays are in the order strong, moderate, weak
58 static const int kInternalMatchScore[kNumSeqalignMatchTypes] = {30, 25, 23};
59 // + 1 for suspected match
60 static const string kGif[kNumSeqalignMatchTypes + 2] =
61  {"red.gif", "purple.gif", "green.gif", "yellow.gif", "white.gif"};
62 static const string kGifLegend[] =
63  {"Strong", "Moderate", "Weak", "Suspect"};
64 static const string kMatchUrlLegend[] =
65  {"Strong match", "Moderate match", "Weak match", "Suspect origin"};
66 
67 static const TSeqPos kSupectLength = 50;
68 
69 static const TSeqPos kMasterPixel = 600;
70 static const TSeqPos kBarHeight = 20;
71 static const TSeqPos kNumScales = 5;
72 
73 
74 ///Returns a string concerning the strength of the match for a given enum value
75 const string&
77 {
78  if (match_type == CVecscreen::eNoMatch)
79  return NcbiEmptyString;
80 
81  return kGifLegend[match_type];
82 }
83 
84 
85 
86 ///group hsp with same id togather
87 ///@param seqalign: the seqalign
88 ///
89 static void s_RestoreHspPos(CSeq_align_set& seqalign){
90  CSeq_align_set::Tdata::iterator next_iter;
91  CSeq_align_set::Tdata::iterator cur_iter = seqalign.Set().begin();
92 
93  while(cur_iter != seqalign.Set().end()){
94  bool is_first = true;
95  next_iter = cur_iter;
96  next_iter ++;
97  const CSeq_id& cur_id = (*cur_iter)->GetSeq_id(1);
98  while(next_iter != seqalign.Set().end()){
99  //only care the ones starting from the next next one
100  //because we don't need to do anything for the next one
101  if(is_first){
102  next_iter ++;
103  is_first = false;
104  }
105  if(next_iter != seqalign.Set().end()){
106  const CSeq_id& next_id = (*next_iter)->GetSeq_id(1);
107  if (cur_id.Match(next_id)){
108  CSeq_align_set::Tdata::iterator temp_iter = next_iter;
109  next_iter ++;
110  //insert after cur_iter
111  cur_iter ++;
112  seqalign.Set().insert(cur_iter, *temp_iter);
113  //move back to the newly inserted one
114  cur_iter --;
115  seqalign.Set().erase(temp_iter);
116  } else {
117  next_iter ++;
118  }
119  }
120  }
121  cur_iter ++;
122  }
123 
124 }
125 
126 ///Sort on seqalign range from
127 ///@param info1: the first seqalign
128 ///@param info2: the second seqalign
129 ///
131  CRef<CSeq_align> const& info2)
132 {
133  int score1, score2, sum_n, num_ident;
134  double bits, evalue;
135  list<TGi> use_this_gi;
136  TSeqPos from1, from2;
137 
138  CAlignFormatUtil::GetAlnScores(*info1, score1, bits, evalue,
139  sum_n, num_ident, use_this_gi);
140  CAlignFormatUtil::GetAlnScores(*info2, score2, bits, evalue,
141  sum_n, num_ident, use_this_gi);
142  from1 = info1->GetSeqRange(0).GetFrom();
143  from2 = info2->GetSeqRange(0).GetFrom();
144  if(from1 == from2) {
145  return score2 > score1;
146  } else {
147  return from1 < from2;
148  }
149 }
150 
151 
152 ///Sort on sealign score
153 ///@param info1: the first seqalign
154 ///@param info2: the second seqalign
155 ///
156 static bool AlnScoreDescendingSort(CRef<CSeq_align> const& info1,
157  CRef<CSeq_align> const& info2)
158 {
159  int score1, score2, sum_n, num_ident;
160  double bits, evalue;
161  list<TGi> use_this_gi;
162 
163  CAlignFormatUtil::GetAlnScores(*info1, score1, bits, evalue,
164  sum_n, num_ident, use_this_gi);
165  CAlignFormatUtil::GetAlnScores(*info2, score2, bits, evalue,
166  sum_n, num_ident, use_this_gi);
167 
168  return (score1 > score2);
169 }
170 
171 
172 void CVecscreen::x_GetEdgeRanges(const objects::CSeq_align& seqalign,
173  TSeqPos master_len,
174  TSeqPos& start_edge,
175  TSeqPos& end_edge)
176 {
177  int score, sum_n, num_ident;
178  TSeqPos aln_start, aln_stop;
179  double bits, evalue;
180  list<TGi> use_this_gi;
181 
182  aln_start = min(seqalign.GetSeqRange(0).GetTo(),
183  seqalign.GetSeqRange(0).GetFrom());
184  aln_stop = max(seqalign.GetSeqRange(0).GetTo(),
185  seqalign.GetSeqRange(0).GetFrom());
186  CAlignFormatUtil::GetAlnScores(seqalign, score, bits, evalue,
187  sum_n, num_ident,use_this_gi);
188  if(aln_start < m_TerminalFlexibility ){
189  if (aln_stop > start_edge) {
190  if(score >= kTerminalMatchScore[eStrong]){
191  start_edge=aln_stop;
192  } else if (score >= kTerminalMatchScore[eModerate]){
193  start_edge=aln_stop;
194  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
195  start_edge=aln_stop;
196  }
197  }
198  } else if (aln_stop > master_len - 1 - m_TerminalFlexibility){
199  if (aln_start < end_edge) {
200  if(score >= kTerminalMatchScore[eStrong]){
201  end_edge = aln_start;
202  } else if (score >= kTerminalMatchScore[eModerate]){
203  end_edge = aln_start;
204  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
205  end_edge = aln_start;
206  }
207 
208  }
209  }
210 }
211 
213  TSeqPos master_len,
214  TSeqPos start_edge,
215  TSeqPos end_edge)
216 {
217  int score, sum_n, num_ident;
218  TSeqPos aln_start, aln_stop;
219  double bits, evalue;
220  list<TGi> use_this_gi;
221 
222  aln_start = min(seqalign.GetSeqRange(0).GetTo(),
223  seqalign.GetSeqRange(0).GetFrom());
224  aln_stop = max(seqalign.GetSeqRange(0).GetTo(),
225  seqalign.GetSeqRange(0).GetFrom());
226  CAlignFormatUtil::GetAlnScores(seqalign, score, bits, evalue,
227  sum_n, num_ident,use_this_gi);
228 
229  if(aln_start < m_TerminalFlexibility ||
230  aln_stop > master_len - 1 - m_TerminalFlexibility){
231  //terminal match
232  if(score >= kTerminalMatchScore[eStrong]){
233  return eStrong;
234  } else if (score >= kTerminalMatchScore[eModerate]){
235  return eModerate;
236  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
237  return eWeak;
238  }
239  } else if ((int)aln_start - (int)start_edge <= 1 ||
240  (int)aln_stop + 1 >= (int) end_edge){
241  //terminal match if abutting or within another terminal hit
242  if(score >= kTerminalMatchScore[eStrong]){
243  return eStrong;
244  } else if (score >= kTerminalMatchScore[eModerate]){
245  return eModerate;
246  } else if (score >= kTerminalMatchScore[eWeak] && m_ShowWeakMatch){
247  return eWeak;
248  }
249  } else {
250  //internal match
251  if(score >= kInternalMatchScore[eStrong]){
252  return eStrong;
253  } else if (score >= kInternalMatchScore[eModerate]){
254  return eModerate;
255  } else if (score >= kInternalMatchScore[eWeak] && m_ShowWeakMatch){
256  return eWeak;
257  }
258  }
259  return eNoMatch;
260 }
261 
263  CSeq_align_set& seqalign_lower)
264 {
265  //get merged range for higher seqalign
266  map<TSeqRange, int> range_to_id; // id is whichever came first, not perfect
267  list<CRange<TSeqPos> > range_list;
268  CRange<TSeqPos> prev_range, cur_range;
269  int j = 0;
270  ITERATE(CSeq_align_set::Tdata, iter, seqalign_higher.Get()){
271  cur_range.Set((*iter)->GetSeqRange(0).GetFrom(),
272  (*iter)->GetSeqRange(0).GetTo());
273  //merge if previous range intersect with current range
274  if(j > 0){
275  prev_range = range_list.back();
276  if(prev_range.IntersectingWith(cur_range)){
277  range_list.back() =
278  range_list.back().CombinationWith(cur_range);
279  range_to_id[range_list.back()] = range_to_id[prev_range];
280  } else {
281  range_to_id[cur_range]=x_GetId(**iter);
282  range_list.push_back(cur_range);
283  }
284  } else {
285  range_to_id[cur_range]=x_GetId(**iter);
286  range_list.push_back(cur_range); //store current range
287  }
288  j ++;
289  }
290 
291  //merge lower rank seqalign if it's contained in higher rank seqalign
292  //or if it's contained in the new range formed by higher and lower
293  //seqalign with a higher score
294  seqalign_lower.Set().sort(AlnScoreDescendingSort);
295 
296  NON_CONST_ITERATE(list<CRange<TSeqPos> >, iter_higher, range_list){
297  CSeq_align_set::Tdata::iterator iter_lower =
298  seqalign_lower.Set().begin();
299  while(iter_lower != seqalign_lower.Set().end()){
300  if((*iter_lower)->GetSeqRange(0).GetFrom() >=
301  iter_higher->GetFrom() &&
302  (*iter_lower)->GetSeqRange(0).GetTo() <=
303  iter_higher->GetTo()){
304 
305  x_DropToKeepMap[x_GetId(**iter_lower)] = range_to_id[*iter_higher];
306  x_IdToDropIdMap[range_to_id[*iter_higher]].push_back(x_GetId(**iter_lower));
307 
308  CSeq_align_set::Tdata::iterator temp_iter = iter_lower;
309  iter_lower ++;
310  seqalign_lower.Set().erase(temp_iter);
311  } else if ((*iter_lower)->GetSeqRange(0).
312  IntersectingWith(*iter_higher)){
313  int id= range_to_id[*iter_higher];
314  CRange<TSeqPos> lower_range = (*iter_lower)->GetSeqRange(0);
315  *iter_higher =
316  iter_higher->CombinationWith(lower_range);
317  iter_lower ++;
318  range_to_id[*iter_higher]=id;
319  }else {
320  iter_lower ++;
321  }
322  }
323  }
324 
325 }
326 
327 CVecscreen::CVecscreen(const CSeq_align_set& seqalign, TSeqPos master_length, TSeqPos terminal_flexibility){
328  m_SeqalignSetRef = &seqalign;
329  m_ImagePath = "./";
330  m_MasterLen = master_length;
332  m_HelpDocsUrl = "//www.ncbi.nlm.nih.gov/tools/vecscreen/about/";
333  m_ShowWeakMatch = true;
334  m_TerminalFlexibility = terminal_flexibility;
335 }
336 
338 {
339  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
340  delete (*iter);
341  }
342 }
343 
344 
346 {
347  //seqalign is presorted by score already. Delete ones that are contained
348  //in seqaligns with higher scores
349  CSeq_align_set::Tdata::iterator next_iter;
350  CSeq_align_set::Tdata::iterator cur_iter = seqalign.Set().begin();
351 
352  while(cur_iter != seqalign.Set().end()){
353  next_iter = cur_iter;
354  next_iter ++;
355 
356  CRange<TSeqPos> cur_range = (*cur_iter)->GetSeqRange(0);
357  while(next_iter != seqalign.Set().end()){
358  CRange<TSeqPos> next_range = (*next_iter)->GetSeqRange(0);
359  if (cur_range.GetFrom() <= next_range.GetFrom() &&
360  cur_range.GetTo() >= next_range.GetTo()){
361  //if cur_range contains next_range
362  x_DropToKeepMap[x_GetId(**next_iter)] = x_GetId(**cur_iter);
363  x_IdToDropIdMap[x_GetId(**cur_iter)].push_back(x_GetId(**next_iter));
364  CSeq_align_set::Tdata::iterator temp_iter = next_iter;
365  next_iter ++;
366  seqalign.Set().erase(temp_iter);
367  } else if (cur_range.IntersectingWith(next_range)){
368  cur_range =
369  cur_range.CombinationWith(next_range);
370  next_iter ++;
371  } else {
372  next_iter ++;
373  }
374  }
375  cur_iter ++;
376  }
377 }
378 
380 {
381 
382  //different match types, no eSuspect or eNoMatch
383  //as they are not contained in seqalign
384  vector<CRef<CSeq_align_set> > catagorized_seqalign(kNumSeqalignMatchTypes);
385  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
386  catagorized_seqalign[i] = new CSeq_align_set;
387  }
388 
389  //find edges of terminal hits
390 
391  TSeqPos start_edge = 0, end_dege = m_MasterLen - 1 ;
392  ITERATE(CSeq_align_set::Tdata, iter, seqalign.Get()){
393  x_GetEdgeRanges(**iter, m_MasterLen, start_edge, end_dege);
394  }
395 
396  //seperate seqalign with different catagory
397  int next_id=1;
398  x_OrigAlignsById.push_back(CRef<CSeq_align>()); // zero is not being used
399  ITERATE(CSeq_align_set::Tdata, iter, seqalign.Get()){
400  MatchType type = x_GetMatchType(**iter, m_MasterLen, start_edge, end_dege);
401  if(type != eNoMatch){
402  CRef<CSeq_align> new_align(new CSeq_align);
403  new_align->Assign(**iter);
404  if(new_align->GetSeqStrand(0) == eNa_strand_minus){
405  new_align->Reverse();
406  }
407  new_align->SetNamedScore("vs_match_type", type);
408  new_align->SetNamedScore("vs_id", next_id); next_id++;
409  x_OrigAlignsById.push_back(new_align);
410  catagorized_seqalign[type]->Set().push_back(new_align);
411  }
412  }
413 
414  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
415  //sort for x_MergeInclusiveSeqalign
416  catagorized_seqalign[i]->Set().sort(AlnScoreDescendingSort);
417  x_MergeInclusiveSeqalign(*(catagorized_seqalign[i]));
418  //restore alnrangesort
419  catagorized_seqalign[i]->Set().sort(AlnFromRangeAscendingSort);
420  }
421 
422 
423  for(int i = eStrong; i < kNumSeqalignMatchTypes - 1 ; i ++){
424  for(int j = i + 1; j < kNumSeqalignMatchTypes; j ++){
425  x_MergeLowerRankSeqalign(*(catagorized_seqalign[i]),
426  *(catagorized_seqalign[j]));
427  }
428  }
429  //set final seqalign
430  for(unsigned int i = 0; i < catagorized_seqalign.size(); i ++){
431  //restore alnrangesort
432  catagorized_seqalign[i]->Set().sort(AlnFromRangeAscendingSort);
433  ITERATE(CSeq_align_set::Tdata, iter, catagorized_seqalign[i]->Get()){
434  m_FinalSeqalign->Set().push_back(*iter);
435 
436  }
437  }
438 
439  x_BuildNonOverlappingRange(catagorized_seqalign);
440 
441 
442  NON_CONST_ITERATE(list<AlnInfo*>, aln_info_iter, m_AlnInfoList) {
443  AlnInfo::TAlignList aligns_to_add;
444  ITERATE(AlnInfo::TAlignList, aln_iter, (*aln_info_iter)->get_aligns()) {
445  set<int> drop_ids;
446  x_GetAllDropIdsForKeepId(x_GetId(**aln_iter), drop_ids);
447  ITERATE(set<int>, di, drop_ids) {
448  const CSeq_align& da = *x_OrigAlignsById[*di];
449  aligns_to_add.push_back(x_OrigAlignsById[*di]);
450  }
451  }
452  (*aln_info_iter)->add_drops(aligns_to_add);
453  }
454 }
455 
457  CRef<CHTML_table> tbl;
458  CRef<CHTML_img> image;
459  CHTML_tc* tc;
460  double pixel_factor = ((double)kMasterPixel)/m_MasterLen;
461  int column = 0;
462 
463  if(m_AlnInfoList.empty()){
464  return;
465  }
466 
467  //title
468  CRef<CHTML_b> b(new CHTML_b);
469  b->AppendPlainText("Distribution of Vector Matches on the Query Sequence");
470  b->Print(out, CNCBINode::eXHTML);
471  out << "\n\n";
472 
473  tbl = new CHTML_table;
474  tbl->SetCellSpacing(0)->SetCellPadding(0)->SetAttribute("border", "0");
475  tbl->SetAttribute("width", kNumScales*kMasterPixel/(kNumScales - 1));
476 
477  //scale bar
478  double scale = ((double)m_MasterLen)/(kNumScales - 1);
479  for(TSeqPos i = 0; i < kNumScales; i ++){
480  CNodeRef font(new CHTML_font(1, true, NStr::IntToString((int)(scale*i == 0?
481  1 : scale*i))));
482  tc = tbl->InsertAt(0, column, font);
483  tc->SetAttribute("align", "LEFT");
484  tc->SetAttribute("valign", "CENTER");
485  tc->SetAttribute("width", kMasterPixel/(kNumScales - 1));
486  column ++;
487  }
488  tbl->Print(out, CNCBINode::eXHTML);
489  //the actual bar
490 
491  column = 0;
492  tbl = new CHTML_table;
493  tbl->SetCellSpacing(0)->SetCellPadding(0)->SetAttribute("border", "0");
494 
495  int width_adjust = 1;
496  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
497  double width = (*iter)->range.GetLength()*pixel_factor;
498  //rounding to int this way as round() is not portable
499  width = width + (width < 0.0 ? -0.5 : 0.5);
500  if(((int)width) > 1){
501  //no show for less than one pixel as the border already
502  //looks like one pixel
503  //width_adjust to compensate for the border width
504 
505  image = new CHTML_img(m_ImagePath + kGif[(*iter)->type],
506  (int)width - width_adjust, kBarHeight);
507  image->SetAttribute("border", 1);
508  tc = tbl->InsertAt(0, column, image);
509  tc->SetAttribute("align", "LEFT");
510  tc->SetAttribute("valign", "CENTER");
511  column ++;
512  }
513  }
514  tbl->Print(out, CNCBINode::eXHTML);
515  out << "\n\n";
516 
517  //legend
518  b = new CHTML_b;
519  b->AppendPlainText("Match to Vector: ");
520  b->Print(out, CNCBINode::eXHTML);
521  for(int i = 0; i < kNumSeqalignMatchTypes; i++){
522  image = new CHTML_img(m_ImagePath + kGif[i], kBarHeight, kBarHeight);
523  image->SetAttribute("border", "1");
524  image->Print(out, CNCBINode::eXHTML);
525  b = new CHTML_b;
526  b->AppendPlainText(" " + kGifLegend[i] + " ");
527  b->Print(out, CNCBINode::eXHTML);
528  }
529  out << "\n";
530  //suspected origin
531  b = new CHTML_b;
532  b->AppendPlainText("Segment of suspect origin: ");
533  b->Print(out, CNCBINode::eXHTML);
535  image->SetAttribute("border", "1");
536  image->Print(out, CNCBINode::eXHTML);
537 
538  //footnote
539  out << "\n\n";
540  b = new CHTML_b;
541  b->AppendPlainText("Segments matching vector: ");
542  b->Print(out, CNCBINode::eXHTML);
544 
545  for (int i = 0; i < kNumSeqalignMatchTypes + 1; i ++){
546  bool is_first = true;
547  ITERATE(list<AlnInfo*>, iter, m_AlnInfoList){
548  if((*iter)->type == i){
549  if(is_first){
550  out << "\n";
551  a = new CHTML_a(m_HelpDocsUrl + "#" +
552  kGifLegend[(*iter)->type]);
553  a->SetAttribute("TARGET", "VecScreenInfo");
554  a->AppendPlainText(kMatchUrlLegend[(*iter)->type] + ":");
555  a->Print(out, CNCBINode::eXHTML);
556  is_first = false;
557  } else {
558  out << ",";
559  }
560  if((*iter)->range.GetFrom() == (*iter)->range.GetTo()){
561  out << " " << (*iter)->range.GetFrom() + 1;
562  } else {
563  out << " " << (*iter)->range.GetFrom() + 1 << "-"
564  << (*iter)->range.GetTo() + 1;
565  }
566 
567  }
568  }
569  }
570 
571  out << "\n\n";
572 }
573 
574 
576  CSeq_align_set actual_aln_list;
579  x_MergeSeqalign(actual_aln_list);
580  //x_BuildHtmlBar(out);
583  return m_FinalSeqalign;
584 }
585 
588 }
589 
591  const AlnInfo::TAlignList aligns){
592  AlnInfo* aln_info = new AlnInfo;
593  aln_info->range.Set(from, to);
594  aln_info->type = type;
595  aln_info->add_aligns(aligns);
596  return aln_info;
597 }
598 
600  seqalign_vec){
601  vector< list<AlnInfo*> > aln_info_vec(seqalign_vec.size());
602  CRange<TSeqPos>* prev_range;
603 
604  //merge overlaps within the same type
605  for(unsigned int i = 0; i < seqalign_vec.size(); i ++){
606  int j = 0;
607  ITERATE(CSeq_align_set::Tdata, iter, seqalign_vec[i]->Get()){
608  AlnInfo* cur_aln_info = new AlnInfo;
609  cur_aln_info->range.Set((*iter)->GetSeqRange(0).GetFrom(),
610  (*iter)->GetSeqRange(0).GetTo());
611  cur_aln_info->type = (MatchType)i;
612  cur_aln_info->add_align(*iter);
613  //merge if previous range intersect with current range
614  if(j > 0){
615  prev_range = &(aln_info_vec[i].back()->range);
616  if(prev_range->IntersectingWith(cur_aln_info->range) ||
617  prev_range->AbuttingWith(cur_aln_info->range)){
618  aln_info_vec[i].back()->range =
619  aln_info_vec[i].back()->range.CombinationWith(cur_aln_info->range);
620  delete cur_aln_info;
621  aln_info_vec[i].back()->add_align(*iter);
622  } else {
623  aln_info_vec[i].push_back(cur_aln_info);
624  }
625  } else {
626  aln_info_vec[i].push_back(cur_aln_info); //store current range
627  }
628  j ++;
629  }
630  }
631 
632  //merge overlapping range of lower ranks to higher rank range
633  for(unsigned int i = 0; i < aln_info_vec.size(); i ++){
634  ITERATE(list<AlnInfo*>, iter_higher, aln_info_vec[i]){
635  for(unsigned int j = i + 1; j < aln_info_vec.size(); j ++){
636  list<AlnInfo*>::iterator iter_temp;
637  list<AlnInfo*>::iterator iter_lower = aln_info_vec[j].begin();
638  while(iter_lower != aln_info_vec[j].end()){
639  CRange<TSeqPos> higher_range, lower_range;
640  higher_range = (*iter_higher)->range;
641  lower_range = (*iter_lower)->range;
642  if((*iter_higher)->range.IntersectingWith((*iter_lower)->range)){
643  //overlaps. Need to handle
644  if((*iter_higher)->range.GetFrom() <=
645  (*iter_lower)->range.GetFrom()){
646  //higher from comes first
647  if((*iter_higher)->range.GetTo() >=
648  (*iter_lower)->range.GetTo()){
649  //higher include lower. delete lower.
650  (*iter_higher)->add_aligns((*iter_lower)->get_aligns());
651  iter_temp = iter_lower;
652  iter_lower ++;
653  aln_info_vec[j].erase(iter_temp);
654  } else {
655  //partially overlaps
656  //reduce the first part of the lower one
657  (*iter_lower)->range.
658  Set((*iter_higher)->range.GetTo() + 1,
659  (*iter_lower)->range.GetTo());
660  iter_lower ++;
661  }
662  } else {
663  //lower from comes first
664  if((*iter_higher)->range.GetTo() <=
665  (*iter_lower)->range.GetTo()){
666  //lower includes higher. need to break up lower
667  //to 3 parts and delete the middle one(included
668  //in higher one)
669 
670  aln_info_vec[j].
671  insert(iter_lower,
672  x_GetAlnInfo((*iter_lower)->range.
673  GetFrom(),
674  (*iter_higher)->range.
675  GetFrom() - 1 ,
676  (MatchType)j,
677  (*iter_lower)->get_aligns() ));
678 
679  if ((*iter_higher)->range.GetTo() <
680  (*iter_lower)->range.GetTo()) {
681  //insert another piece only if lower has extra piece
682  aln_info_vec[j].
683  insert(iter_lower,
684  x_GetAlnInfo((*iter_higher)->range.
685  GetTo() + 1,
686  (*iter_lower)->range.GetTo() ,
687  (MatchType)j,
688  (*iter_lower)->get_aligns() ));
689  }
690 
691  iter_temp = iter_lower;
692  iter_lower ++;
693  aln_info_vec[j].erase(iter_temp);
694 
695  } else {
696  //partially overlap
697  //reduce latter part of lower
698 
699  (*iter_lower)->range.
700  Set((*iter_lower)->range.GetFrom(),
701  (*iter_higher)->range.GetFrom() - 1);
702  iter_lower ++;
703  }
704  }
705  } else {
706  //no overlaps, do nothing
707  if ((*iter_lower)->range.GetFrom() > (*iter_higher)->range.GetFrom()) {
708  //no comparing again as it's already sorted
709  break;
710  }
711  iter_lower ++;
712  }
713  }
714  }
715  }
716  }
717 
718  //Set final list
719  for(unsigned int i = 0; i < aln_info_vec.size(); i++){
720  ITERATE(list<AlnInfo*>, iter, aln_info_vec[i]){
721  m_AlnInfoList.push_back(*iter);
722  }
723 
724  }
726 
727 
728  //adding range for suspected match and no match
729  list<AlnInfo*>::iterator prev_iter = m_AlnInfoList.end();
730  list<AlnInfo*>::iterator cur_iter = m_AlnInfoList.begin();
731  list<AlnInfo*>::iterator temp_iter;
732  int count = 0;
733 
734  while(cur_iter != m_AlnInfoList.end()){
735  if(count > 0){
736  CRange<TSeqPos> prev_range, cur_range;
737  prev_range = (*prev_iter)->range;
738  cur_range = (*cur_iter)->range;
739  int diff = cur_range.GetFrom() - prev_range.GetTo();
740  if(diff >= 2){
741  //no overlaps, insert the range in between
742 
743  MatchType type = ((*cur_iter)->range.GetFrom() - 1) -
744  ((*prev_iter)->range.GetTo() + 1) + 1 > kSupectLength ?
745  eNoMatch : eSuspect;
746 
748  insert(cur_iter,
749  x_GetAlnInfo(prev_range.GetTo() + 1,
750  cur_range.GetFrom() - 1,
751  type));
752  }
753 
754  } else {
755  if((*cur_iter)->range.GetFrom() > 0){
756  //insert the range infront of first align range
757  MatchType type = ((*cur_iter)->range.GetFrom() - 1) + 1 >
758  kSupectLength ?
759  eNoMatch : eSuspect;
761  insert(cur_iter,
762  x_GetAlnInfo(0, (*cur_iter)->range.GetFrom() - 1,
763  type));
764  }
765  }
766  prev_iter = cur_iter;
767  cur_iter ++;
768  count ++;
769  }
770 
771  //add the last possible no match range
772  if(prev_iter != m_AlnInfoList.end()){
773  if(m_MasterLen -1 > (*prev_iter)->range.GetTo()){
775  ((*prev_iter)->range.GetTo() + 1) +1 >
776  kSupectLength ?
777  eNoMatch : eSuspect;
779  push_back(x_GetAlnInfo((*prev_iter)->range.GetTo() + 1,
780  m_MasterLen - 1,
781  type));
782  }
783  }
784 }
785 
786 END_SCOPE(align_format)
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set &target, const objects::CSeq_align_set &source)
If a Seq-align-set contains Seq-aligns with discontinuous type segments, extract the underlying Seq-a...
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
Definition: Seq_align.cpp:685
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
void x_MergeSeqalign(objects::CSeq_align_set &seqalign)
merge overlapping seqalign
MatchType
vector match defines
TIdToDropIdMap x_IdToDropIdMap
AlnInfo * x_GetAlnInfo(TSeqPos from, TSeqPos to, MatchType type, const AlnInfo::TAlignList aligns=AlnInfo::TAlignList())
get align info
CVecscreen(const objects::CSeq_align_set &seqalign, TSeqPos master_length, TSeqPos terminal_flexibility=25)
Constructors.
void x_MergeInclusiveSeqalign(objects::CSeq_align_set &seqalign)
merge a seqalign if its range is in another seqalign
TSeqPos m_TerminalFlexibility
void x_BuildHtmlBar(CNcbiOstream &out)
Output the graphic.
void x_GetEdgeRanges(const objects::CSeq_align &seqalign, TSeqPos master_len, TSeqPos &start_edge, TSeqPos &end_edge)
list< AlnInfo * > m_AlnInfoList
internal match list
CConstRef< objects::CSeq_align_set > m_SeqalignSetRef
the current seqalign
int x_GetId(const objects::CSeq_align &a)
void x_MergeLowerRankSeqalign(objects::CSeq_align_set &seqalign_higher, objects::CSeq_align_set &seqalign_lower)
merge a seqalign if its range is in another higher ranked seqalign
~CVecscreen()
Destructor.
string m_ImagePath
gif image file path
string m_HelpDocsUrl
help url
static bool FromRangeAscendingSort(AlnInfo *const &info1, AlnInfo *const &info2)
Sort on range from.
MatchType x_GetMatchType(const objects::CSeq_align &seqalign, TSeqPos master_len, TSeqPos start_edge, TSeqPos end_edge)
Get match type.
void VecscreenPrint(CNcbiOstream &out)
show alignment graphic view
TSeqPos m_MasterLen
master seq length
static const string & GetStrengthString(MatchType match_type)
Returns a string concerning the strength of the match for a given enum value.
vector< CRef< objects::CSeq_align > > x_OrigAlignsById
bool m_ShowWeakMatch
Show weak match?
TDropToKeepMap x_DropToKeepMap
void x_BuildNonOverlappingRange(vector< CRef< objects::CSeq_align_set > > seqalign_vec)
Build non overlapping internal match list.
void x_GetAllDropIdsForKeepId(int keep_id, set< int > &drop_ids)
CRef< objects::CSeq_align_set > ProcessSeqAlign(void)
Process alignment to show.
CRef< objects::CSeq_align_set > m_FinalSeqalign
the processed seqalign
Definition: map.hpp:338
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
static const char * column
Definition: stats.c:23
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void SetAttribute(const string &name, const string &value)
@ eXHTML
Definition: node.hpp:111
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
TThisType CombinationWith(const TThisType &r) const
Definition: range.hpp:358
bool AbuttingWith(const TThisType &r) const
Definition: range.hpp:336
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
#define NcbiEmptyString
Definition: ncbistr.hpp:122
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
USING_SCOPE(objects)
HTML classes.
int i
const TYPE & Get(const CNamedParameterList *param)
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
T min(T x_, T y_)
static void s_RestoreHspPos(CSeq_align_set &seqalign)
group hsp with same id togather
static const TSeqPos kSupectLength
static const string kMatchUrlLegend[]
static bool AlnScoreDescendingSort(CRef< CSeq_align > const &info1, CRef< CSeq_align > const &info2)
Sort on sealign score.
static const int kTerminalMatchScore[kNumSeqalignMatchTypes]
static bool AlnFromRangeAscendingSort(CRef< CSeq_align > const &info1, CRef< CSeq_align > const &info2)
Sort on seqalign range from.
static const TSeqPos kMasterPixel
static const TSeqPos kNumScales
static const string kGif[kNumSeqalignMatchTypes+2]
static const int kNumSeqalignMatchTypes
static const TSeqPos kBarHeight
static const int kInternalMatchScore[kNumSeqalignMatchTypes]
static const string kGifLegend[]
void add_align(CRef< objects::CSeq_align > a)
void add_aligns(const TAlignList &al)
list< CRef< objects::CSeq_align > > TAlignList
Definition: type.c:6
#define const
Definition: zconf.h:232
Modified on Wed Apr 17 13:10:58 2024 by modify_doxy.py rev. 669887