NCBI C++ ToolKit
seq_text_ds.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_text_ds.cpp 47485 2023-05-02 14:46:59Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Colleen Bollin (adapted from a file by Andrey Yazhuk)
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
36 #include <gui/objutils/label.hpp> // TODO
37 #include <gui/objutils/utils.hpp>
38 
39 #include <objects/seq/MolInfo.hpp>
40 
41 #include <objmgr/bioseq_ci.hpp>
42 #include <objmgr/seq_vector.hpp>
43 #include <objmgr/util/sequence.hpp>
45 #include <objmgr/feat_ci.hpp>
46 #include <objmgr/seqdesc_ci.hpp>
47 
48 #include <objmgr/util/feature.hpp>
49 
50 #include <gui/objutils/utils.hpp>
52 
55 
56 
58  CScope& scope)
59 {
60  m_Sep = &sep;
61  m_Scope = &scope;
62 
63  CSeq_entry_Handle handle = scope.GetSeq_entryHandle(sep);
64  CBioseq_CI it(handle);
65 
66  m_SubjectHandle = *it;
67  m_ID = m_SubjectHandle.GetSeqId();
69  if (idh) {
70  m_ID = idh.GetSeqId();
71  }
72  m_Loc.Reset ();
73 
74  m_Loc = new CSeq_loc(const_cast<CSeq_id&>(*CBioseq_Handle(*it).
75  GetSeqId()),
76  0,CBioseq_Handle(*it).GetBioseqLength() - 1);
78 }
79 
80 
82  CScope& scope)
83 {
84  m_Scope = &scope;
85 
86  m_SubjectHandle = handle;
87 
88  m_ID = handle.GetSeqId();
90  if (idh) {
91  m_ID = idh.GetSeqId();
92  }
93 
94  m_Loc = new CSeq_loc(const_cast<CSeq_id&>(*CBioseq_Handle(handle).
95  GetSeqId()),
96  0,CBioseq_Handle(handle).GetBioseqLength() - 1);
98 }
99 
101 {
102  m_Scope = &scope;
103  m_SubjectHandle = scope.GetBioseqHandle(*loc.GetId());
104 
105  if (!m_SubjectHandle)
106  NCBI_THROW(CException, eUnknown, "failed to retrieve sequence");
107 
108  m_ID = m_SubjectHandle.GetSeqId();
110  if (idh) {
111  m_ID = idh.GetSeqId();
112  }
113 
114  m_Loc = new CSeq_loc();
115  m_Loc->Add(loc);
117 }
118 
119 
121 {
122 }
123 
124 
126 {
127  return m_Scope.GetObject();
128 }
129 
130 
131 TSeqPos CSeqTextDataSource::SequencePosToSourcePos (TSeqPos sequence_pos, bool *found_in_source)
132 {
133  bool found = false;
134  bool in_source = false;
135  TSeqPos source_pos = 0;
136 
137  if (m_Intervals.size() < 1) {
138  return -1;
139  }
140 
142  if (sequence_pos >= it->GetSequenceStart() && sequence_pos <= it->GetSequenceStop()) {
143  source_pos = it->GetSourceStart() + sequence_pos - it->GetSequenceStart();
144  found = true;
145  in_source = true;
146  break;
147  } else if (sequence_pos < it->GetSequenceStart()) {
148  source_pos = it->GetSourceStart();
149  found = true;
150  break;
151  }
152  }
153  if (!found) {
154  source_pos = m_Intervals.back().GetSourceStart() + m_Intervals.back().GetLength() - 1;
155  }
156 
157  if (found_in_source != NULL) {
158  *found_in_source = in_source;
159  }
160  return source_pos;
161 }
162 
163 
165 {
166  TSeqPos sequence_pos = 0;
167 
169  if (source_pos >= it->GetSourceStart() && source_pos <= it->GetSourceStop()) {
170  sequence_pos = it->GetSequenceStart() + source_pos - it->GetSourceStart();
171  break;
172  }
173  }
174 
175  return sequence_pos;
176 }
177 
178 
180 {
181  string seq_piece;
182 
183  // initialize buffer to empty
184  buffer.erase();
185 
187  if (it->GetSourceStart() > stop) {
188  break;
189  }
190  it->GetSeqString(start, stop, buffer);
191  }
192 }
193 
194 
195 void InvertCase(string& seq)
196 {
197  for (unsigned int i = 0; i < seq.length(); i++) {
198  if (islower (seq[i])) {
199  seq[i] = toupper(seq[i]);
200  } else {
201  seq[i] = tolower(seq[i]);
202  }
203  }
204 }
205 
206 
207 void CSeqTextDataSource::GetSeqData (TSeqPos start, TSeqPos stop, string& buffer, SAnnotSelector *feat_sel, bool showFeatAsLower)
208 {
209  buffer.clear();
210 
212  if (it->GetSourceStart() > stop) {
213  break;
214  }
215  it->GetSeqData(start, stop, buffer, feat_sel /*CSeqFeatData::eSubtype_any*/);
216  }
217 
218  if (showFeatAsLower) {
220  }
221 }
222 
223 
224 int CSeqTextDataSource::ChooseBetterSubtype (int subtype1, int subtype2)
225 {
226  if (subtype1 == CSeqFeatData::eSubtype_cdregion || subtype2 == CSeqFeatData::eSubtype_cdregion) {
228  } else if (subtype1 == CSeqFeatData::eSubtype_mRNA || subtype2 == CSeqFeatData::eSubtype_mRNA) {
230  } else if (subtype1 == CSeqFeatData::eSubtype_gene || subtype2 == CSeqFeatData::eSubtype_gene) {
232  } else if (subtype1 == CSeqFeatData::eSubtype_bad) {
233  return subtype2;
234  } else if (subtype2 == CSeqFeatData::eSubtype_bad) {
235  return subtype1;
236  } else if (subtype1 < subtype2) {
237  return subtype1;
238  } else {
239  return subtype2;
240  }
241 }
242 
243 
244 void
246 (TSeqPos start_offset,
247  TSeqPos stop_offset,
248  CSeqTextConfig *cfg,
249  ISeqTextGeometry* pParent,
251  CSeqTextDefs::TSpliceSiteVector &splice_sites,
253 {
254 
255  subtypes.clear();
256  splice_sites.clear();
257  variations.clear();
258 
259  // no configuration - leave the feature data blank
260  if (cfg == NULL) {
261  return;
262  }
263 
264  // output_range_offset contains the lengths of output data that have already
265  // been filled in. data for a range should be filled in based on the
266  // distance from the start of the range plus the output_range_offset
267  //TSeqPos output_range_offset = 0;
268 
269  int l = (stop_offset - start_offset) + 1;
270  splice_sites.reserve(l);
271  splice_sites.resize(l, false);
272  subtypes.reserve(l);
273  subtypes.resize(l, CSeqFeatData::eSubtype_bad);
274 
275 
276  // don't look for features after the end of the data
277  stop_offset = min (stop_offset, GetDataLen() - 1);
278 
279  // examine each interval in the source data
281  if (it->GetSourceStart() > stop_offset) {
282  break;
283  }
284  it->GetFeatureData(start_offset, stop_offset, cfg, pParent, subtypes, splice_sites, variations);
285 
286  }
287 
288 }
289 
290 
292 {
293  bool is_mRNA = true;
294 
295  for (CSeq_loc_CI seq_loc_it (*m_Loc); seq_loc_it && is_mRNA; ++ seq_loc_it) {
296  CBioseq_Handle handle = GetScope().GetBioseqHandle(*seq_loc_it.GetEmbeddingSeq_loc().GetId());
297  if (!handle.CanGetInst_Mol() || handle.GetInst_Mol() != CSeq_inst::eMol_rna) {
298  is_mRNA = false;
299  } else {
300  CSeqdesc_CI di (handle, CSeqdesc::e_Molinfo);
301  if (!di || di->GetMolinfo().GetBiomol() != CMolInfo::eBiomol_mRNA) {
302  is_mRNA = false;
303  }
304  }
305  }
306  return is_mRNA;
307 }
308 
309 
310 void
312 (TSeqPos start_offset,
313  TSeqPos stop_offset,
314  ISeqTextGeometry* pParent,
316 {
317  bool even = false;
318 
319  // only do this when sequence is mRNA
320 
321  if (!IsmRNASequence ()) return;
322 
323  subtypes.clear();
324  // this fills in the output range
325  //for (int i = 0; i < stop_offset - start_offset + 2; i++) {
326  //subtypes.push_back (CSeqFeatData::eSubtype_bad);
327  //}
328  if (!pParent) return;
329  subtypes.reserve((stop_offset - start_offset) + 1);
330  subtypes.resize((stop_offset - start_offset) + 1, CSeqFeatData::eSubtype_bad);
331 
332  // examine each interval in the source data
334  if (it->GetSourceStart() > stop_offset) {
335  break;
336  }
337  it->GetSubtypesForAlternatingExons(start_offset, stop_offset, pParent, subtypes, even);
338  }
339 
340 }
341 
342 
344 {
345  variations.clear();
346 
347  // examine each interval in the source data
349  if (it->GetSourceStart() > stop_offset) {
350  break;
351  }
352  it->GetVariations(start_offset, stop_offset, variations);
353  }
354 
355 }
356 
357 
358 void CSeqTextDataSource::RenderFeatureExtras (ISeqTextGeometry* pParent, CGlPane &pane, TSeqPos start_offset, TSeqPos stop_offset)
359 {
360  if (pParent == NULL) return;
361 
362  // open pane for drawing
363  pane.OpenOrtho();
364 
365  // examine each interval in the source data
367  it->RenderFeatureExtras (pParent, start_offset, stop_offset);
368  }
369 
370  pane.Close();
371 }
372 
373 
374 void
376 (TSeqPos start_offset,
377  TSeqPos stop_offset,
379 {
380  bool is_first = true;
381  TSeqPos offset = 0;
382 
383  breaks.clear();
384 
385 
386  for (CSeq_loc_CI seq_loc_it (*m_Loc); seq_loc_it && offset < stop_offset; ++ seq_loc_it) {
387  if (is_first) {
388  is_first = false;
389  }
390  else if (offset >= start_offset)
391  {
392  breaks.push_back (offset);
393  }
394  CSeq_loc_CI::TRange seq_range = seq_loc_it.GetRange();
395  TSeqPos seq_start = seq_range.GetFrom();
396  TSeqPos seq_stop = seq_range.GetTo();
397  offset += seq_stop - seq_start + 1;
398  }
399 }
400 
401 
403 {
405 }
406 
408 {
409  string s = "Sequence : ";
410  unique_ptr<sequence::CDeflineGenerator> gen(new sequence::CDeflineGenerator());
411  s += gen->GenerateDefline(m_SubjectHandle);
412  return s;
413 }
414 
415 
417 {
418  return m_ID;
419 }
420 
421 
422 vector<CConstRef<CSeq_feat> > CSeqTextDataSource::GetFeaturesAtPosition(TSeqPos source_pos)
423 {
424  vector<CConstRef<CSeq_feat> > features;
425 
426  const CSeq_id *seq_id = m_Loc->GetId();
427  if (!seq_id) return features;
428 
429  if (source_pos > GetDataLen() - 1) {
430  return features;
431  }
432 
433 #if 1
435  it->GetFeaturesAtPosition(source_pos, features);
436  }
437  return features;
438 #else
439  TSeqPos seq_pos = SourcePosToSequencePos(source_pos);
440  CBioseq_Handle handle = GetScope().GetBioseqHandle(*seq_id);
441 
442  TSeqRange range (seq_pos, seq_pos);
443  if (feat_sel) {
444  return new CFeat_CI(handle, range, *feat_sel);
445  }
446 
447  objects::SAnnotSelector sel = CSeqUtils::GetAnnotSelector();
448  return new CFeat_CI(handle, range, sel);
449 #endif
450 }
451 
453 {
454  string tooltip_text = "";
455 
457  it->GetToolTipForPosition(source_pos, tooltip_text);
458  }
459  return tooltip_text;
460 }
461 
462 
464 {
465  return m_Loc.GetNCPointer();
466 }
467 
468 int CSeqTextDataSource::FindSequenceFragment (const string& fragment, TSeqPos start_search)
469 {
470  TSeqPos data_len = GetDataLen();
471  TSeqPos search_buffer_len;
472  TSeqPos end_search;
473  string search_buffer;
474 
475  if (fragment.length() > data_len - start_search) {
476  return -1;
477  }
478 
479  search_buffer_len = max ((int) 1000, (int)(3 * fragment.length()));
480  search_buffer_len = min (search_buffer_len, data_len - start_search);
481 
482  while (start_search < data_len - fragment.length() + 1) {
483  end_search = start_search + search_buffer_len;
484  GetSeqString (start_search, end_search, search_buffer);
485  string::size_type pos = NStr::FindNoCase(search_buffer, fragment);
486  if (pos != string::npos) {
487  return static_cast<int>(pos + start_search);
488  }
489  start_search = static_cast<TSeqPos>(end_search - fragment.length() + 1);
490  }
491  return -1;
492 }
493 
494 
495 void CSeqTextDataSource::FindSequenceFragmentList (const string& fragment,
496  CSeqTextDefs::TSeqPosVector &locations,
497  ICanceled* cancel)
498 {
499  if (cancel->IsCanceled()) {
500  return;
501  }
502  TSeqPos data_len = GetDataLen();
503  TSeqPos search_buffer_len;
504  TSeqPos start_search = 0, end_search;
505  string search_buffer;
506 
507  locations.clear();
508 
509  if (fragment.length() > data_len - start_search) {
510  return;
511  }
512 
513  search_buffer_len = max ((int) 1000, (int)(3 * fragment.length()));
514  search_buffer_len = min (search_buffer_len, data_len - start_search);
515 
516  while (!cancel->IsCanceled() && start_search < data_len - fragment.length() + 1) {
517  end_search = start_search + search_buffer_len;
518  GetSeqString (start_search, end_search, search_buffer);
519  string::size_type pos = NStr::FindNoCase(search_buffer, fragment);
520  while (pos != string::npos) {
521  locations.push_back(static_cast<TSeqPos>(pos + start_search));
522  if (end_search > pos + fragment.length()) {
523  pos = NStr::FindNoCase(search_buffer, fragment, pos + 1);
524  } else {
525  pos = string::npos;
526  }
527  }
528  start_search = static_cast<TSeqPos>(end_search - fragment.length() + 1);
529  }
530 
531  if (cancel->IsCanceled()) {
532  locations.clear();
533  }
534 }
535 
536 
538 {
539  m_Intervals.clear();
540  TSeqPos offset = 0;
541  for (CSeq_loc_CI seq_loc_it (*m_Loc); seq_loc_it; ++ seq_loc_it) {
542  m_Intervals.push_back(CSeqTextDataSourceInterval(*(seq_loc_it.GetRangeAsSeq_loc()), *m_Scope, offset));
543  offset += seq_loc_it.GetRange().GetLength();
544  }
545 }
546 
547 
548 CSeqTextDataSourceInterval::CSeqTextDataSourceInterval(const objects::CSeq_loc& loc, objects::CScope& scope, TSeqPos offset)
549  : m_Feat(scope, loc), m_Offset(offset)
550 {
551  m_Loc.Reset(new CSeq_loc());
552  m_Loc->Assign(loc);
553  m_Seq = scope.GetBioseqHandle(*(loc.GetId()));
554  m_Length = sequence::GetLength(loc, &scope);
555  m_Vect = m_Seq.GetSeqVector (CBioseq_Handle::eCoding_Iupac);
556 }
557 
558 
560 {
561 }
562 
563 
565 {
566  bool rval = true;
567  if (pos < m_Offset) {
568  pos = 0;
569  rval = false;
570  } else if (pos > m_Offset + m_Length - 1) {
571  pos = m_Offset + m_Length - 1;
572  rval = false;
573  } else {
574  pos -= m_Offset;
575  rval = true;
576  }
577  return rval;
578 }
579 
580 
582 {
583  bool rval = true;
584  if (pos < GetSequenceStart()) {
585  pos = 0;
586  rval = false;
587  } else if (pos > GetSequenceStop()) {
588  pos = GetSequenceStop();
589  rval = false;
590  } else {
591  pos -= GetSequenceStart();
592  }
593  return rval;
594 }
595 
596 
598 {
599  bool rval = true;
600  if (src_start >= m_Offset + m_Length || src_stop < m_Offset) {
601  rval = false;
602  }
603  SourcePosToIntervalPos(src_start);
604  SourcePosToIntervalPos(src_stop);
605  return rval;
606 }
607 
608 
609 void CSeqTextDataSourceInterval::GetSeqString(TSeqPos src_start, TSeqPos src_stop, string& buffer) const
610 {
611  if (IntersectingSourceInterval(src_start, src_stop)) {
612  //CSeqVector vect = m_Seq.GetSeqVector (CBioseq_Handle::eCoding_Iupac);
613  string seq_piece;
614  m_Vect.GetSeqData (src_start + GetSequenceStart(), src_stop + GetSequenceStart(), seq_piece);
615  buffer.append (seq_piece);
616  }
617 }
618 
619 
620 void CSeqTextDataSourceInterval::GetSeqData (TSeqPos src_start, TSeqPos src_stop, string& buffer, SAnnotSelector *feat_sel) const
621  //CSeqFeatData::ESubtype subtype) const
622 {
623  if (IntersectingSourceInterval(src_start, src_stop)) {
624  //CSeqVector vect = m_Seq.GetSeqVector (CBioseq_Handle::eCoding_Iupac);
625  string seq_piece;
626  m_Vect.GetSeqData (src_start + GetSequenceStart(), src_stop + GetSequenceStart() + 1, seq_piece);
627 // CFeat_CI feat_it = m_Feat;
628  SAnnotSelector sel;
629  CFeat_CI feat_it(m_Seq.GetScope(), *m_Loc, feat_sel ? *feat_sel : sel);
630 
631  while (feat_it) {
632  //if (subtype != CSeqFeatData::eSubtype_any && subtype != feat_it->GetData().GetSubtype()) {
633  // continue;
634  //}
635  const CSeq_loc& loc = feat_it->GetLocation();
636 
637  for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
638  CSeq_loc_CI::TRange feat_range = loc_it.GetRange();
639  TSeqPos feat_start = feat_range.GetFrom();
640  TSeqPos feat_stop = feat_range.GetTo();
641  if (feat_stop < GetSequenceStart() || feat_start > GetSequenceStop()) {
642  // if the section of the feature is not in this sequence piece,
643  // don't draw it
644  continue;
645  }
646  feat_start = max(0, int(feat_start - GetSequenceStart()));
647  feat_stop = max(0, int(feat_stop - GetSequenceStart()));
648 
649  if (feat_stop < src_start || feat_start > src_stop) {
650  // if the section of the feature is not in the viewed area, don't draw it
651  continue;
652  }
653 
654  feat_start = (feat_start < src_start) ? 0 : feat_start - src_start;
655  feat_stop = (feat_stop - src_start > seq_piece.size() - 1) ? static_cast<TSeqPos>(seq_piece.size() - 1) : feat_stop - src_start;
656 
657  while (feat_start <= feat_stop)
658  {
659  seq_piece [feat_start] = tolower(seq_piece [feat_start]);
660  feat_start ++;
661  }
662  }
663 
664  ++feat_it;
665  }
666 
667  buffer.append (seq_piece);
668  }
669 
670 }
671 
672 
674 {
675  if (SourcePosToIntervalPos(src_pos)) {
677  CFeat_CI f(m_Seq, TSeqRange(src_pos + GetSequenceStart(), src_pos + GetSequenceStart()), sel);
678  while (f) {
679  string feat_title = "";
680  CLabel::GetLabel(f->GetOriginalFeature(), &feat_title,
681  CLabel::eUserTypeAndContent, &m_Seq.GetScope());
682  if (!feat_title.empty()) {
683  if (!tooltip_text.empty()) {
684  tooltip_text.append ("\n");
685  }
686  tooltip_text.append (feat_title);
687  }
688  ++f;
689  }
690  }
691 }
692 
693 
695 {
696  if (pParent == NULL) return;
697 
698  // if this interval contains locations between start_offset and stop_offset, draw feature extras
699  if (IntersectingSourceInterval(start_offset, stop_offset)) {
700  CScope * scope = &(m_Seq.GetScope());
701  CRef<CSeq_loc> cmp = x_GetSeqLocForInterval(start_offset, stop_offset);
703  CFeat_CI f(*scope, *cmp, sel);
704  while (f) {
705  pParent->STG_RenderFeatureExtras(*f);
706  ++f;
707  }
708  }
709 
710 }
711 
712 
713 void
715 (TSeqPos start_offset,
716  TSeqPos stop_offset,
717  CSeqTextConfig *cfg,
718  ISeqTextGeometry* pParent,
720  CSeqTextDefs::TSpliceSiteVector &splice_sites,
722 {
723  if (IntersectingSourceInterval(start_offset, stop_offset)) {
724  CScope * scope = &(m_Seq.GetScope());
725  CRef<CSeq_loc> cmp = x_GetSeqLocForInterval(start_offset, stop_offset);
727  CFeat_CI f(*scope, *cmp, sel);
728  while (f) {
729  // feature subtypes
730  int subtype = f->GetFeatSubtype();
731  if (cfg->GetFeatureColorationChoice() == CSeqTextPaneConfig::eAll && pParent && cfg->GetShow (subtype)) {
732  pParent->STG_SetSubtypesForFeature (subtypes, f->GetLocation(), f->GetFeatSubtype(), start_offset, stop_offset);
733  }
734 
735  if (subtype == CSeqFeatData::eSubtype_cdregion) {
736  LookForSpliceJunctions (*f, splice_sites, start_offset, stop_offset);
737  }
738 
739  if (subtype == CSeqFeatData::eSubtype_variation) {
740  x_AddVariationsFromFeature (f->GetOriginalFeature(), f->GetLocation(), variations);
741  }
742  ++f;
743  }
744  }
745 
746 }
747 
748 
750 {
751  if (IntersectingSourceInterval(start_offset, stop_offset)) {
752  CRef<CSeq_loc> cmp = x_GetSeqLocForInterval(start_offset, stop_offset);
754  CFeat_CI f(m_Seq.GetScope(), *cmp, sel);
755  while (f) {
756  x_AddVariationsFromFeature (f->GetOriginalFeature(), f->GetLocation(), variations);
757  ++f;
758  }
759  }
760 }
761 
762 
763 void
765 (const CSeq_feat &feat,
766  const CSeq_loc &loc,
767  CSeqTextDefs::TVariationGraphVector &variations) const
768 {
769  TSeqPos feat_left = loc.GetStart(eExtreme_Positional);
770  TSeqPos feat_right = loc.GetStop(eExtreme_Positional);
771  bool replace_found = false;
772 
773  // get contents of sequence at this location, so we know which /replace
774  // represents the original text (and we can display the other instead)
775 
776  string seq_piece;
777  //CSeqVector vect = m_Seq.GetSeqVector (CBioseq_Handle::eCoding_Iupac);
778  m_Vect.GetSeqData (feat_left, feat_right + 1, seq_piece);
779 
780  // needed sequence coordinates for getting actual sequence text
781  // store source coordinates in variation holder
782 
783  feat_left -= GetSequenceStart();
784  feat_right -= GetSequenceStart();
785 
786  if (feat.CanGetQual()) {
787  ITERATE( CSeq_feat::TQual, it, feat.GetQual()) {
788  if (!NStr::CompareNocase((*it)->GetQual(),"replace")
789  && NStr::CompareNocase((*it)->GetVal(),seq_piece))
790  {
791  variations.push_back (CSeqTextVariationGraph(feat_left, feat_right, (*it)->GetVal()));
792  replace_found = true;
793  }
794  }
795  }
796  if (!replace_found) {
797  variations.push_back (CSeqTextVariationGraph(feat_left, feat_right, "-"));
798  }
799 }
800 
801 
802 void
804 (TSeqPos start_offset,
805  TSeqPos stop_offset,
806  ISeqTextGeometry* pParent,
808  bool& even)
809 {
810  if (!pParent) return;
811 
812  if (IntersectingSourceInterval(start_offset, stop_offset)) {
814  CFeat_CI f(m_Seq, TSeqRange(start_offset + GetSequenceStart(), stop_offset + GetSequenceStart()), sel);
815  while (f) {
816  if (f->GetData().GetSubtype() == CSeqFeatData::eSubtype_exon) {
817  TSeqPos feat_stop = f->GetLocation().GetStop(eExtreme_Positional);
818  TSeqPos feat_start = f->GetLocation().GetStart(eExtreme_Positional);
819  if (feat_stop < GetSequenceStart() || feat_stop - GetSequenceStart() < start_offset) {
820  // just alternate
821  even = !even;
822  } else if (feat_start - GetSequenceStart() > stop_offset) {
823  // we're done
824  break;
825  } else {
826  // color the portion of this exon in the view range
827  // only color odd exons
828  if (!even) {
829  pParent->STG_SetSubtypesForFeature (subtypes, f->GetLocation(), f->GetFeatSubtype(), start_offset, stop_offset);
830  }
831  even = !even;
832  }
833  }
834  ++f;
835  }
836  }
837 
838 }
839 
840 
842 {
843  TSeqPos start_offset = source_pos;
844  TSeqPos stop_offset = start_offset;
845  if (IntersectingSourceInterval(start_offset, stop_offset)) {
846  start_offset += GetSequenceStart();
847  stop_offset += GetSequenceStart();
849  CFeat_CI f(m_Seq, TSeqRange(start_offset, stop_offset), sel);
850  while (f) {
851  for(CSeq_loc_CI loc_it(f->GetLocation()); loc_it; ++loc_it) {
852  CSeq_loc_CI::TRange this_range = loc_it.GetRange();
853  if (this_range.GetFrom() <= start_offset && this_range.GetTo() >= start_offset) {
854  features.push_back(f->GetSeq_feat());
855  break;
856  }
857  }
858  ++f;
859  }
860  }
861 
862 }
863 
864 
865 // adds splice site information in the visible range start_offset to stop_offset (source coordinates)
866 // splice_sites is an array with positions from start_offset to stop_offset
868 {
869  const CSeq_loc& loc = feat.GetLocation();
870  string splice_buffer;
871  bool is_start = true;
872  TSeqPos unset_pos[2];
873  bool need_unset[2];
874 
875  need_unset[0] = false;
876  need_unset[1] = false;
877  unset_pos[0] = 0;
878  unset_pos[1] = 0;
879 
880  for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
881  CSeq_loc_CI::TRange this_range = loc_it.GetRange();
882 
883  TSeqPos feat_start = this_range.GetFrom();
884  TSeqPos feat_stop = this_range.GetTo();
885  if (feat_stop <= start_offset || feat_start >= stop_offset)
886  continue;
887  feat_start -= start_offset;
888  feat_stop -= start_offset;
889 
890  bool is_minus = false;
891 
892  if (loc_it.GetStrand() == eNa_strand_minus) {
893  is_minus = true;
894  }
895 
896  CBioseq_Handle handle = m_Seq.GetScope().GetBioseqHandle(*loc_it.GetEmbeddingSeq_loc().GetId());
898  if (feat_start > 2 && ! is_start && feat_start < stop_offset && feat_start - 2 >= start_offset) {
899  // check the two positions before the start of this location
900  // to see if they contain A and G
901  vect.GetSeqData (feat_start - 2, feat_start, splice_buffer);
902  if (x_IsSpliceSite(splice_buffer, true, is_minus)) {
903  TSeqPos interval_pos = feat_start - 2;
904  if (SequencePosToIntervalPos(interval_pos)) {
905  splice_sites[interval_pos + m_Offset] = true;
906  }
907  interval_pos = feat_start - 1;
908  if (SequencePosToIntervalPos(interval_pos)) {
909  splice_sites[interval_pos + m_Offset] = true;
910  }
911  }
912  }
913  need_unset [0] = false;
914  need_unset [1] = false;
915  if (feat_stop < splice_sites.size() - 2 && feat_stop <= handle.GetBioseqLength() - 2
916  && feat_stop > start_offset && feat_stop + 3 < stop_offset) {
917  // check the two positions after the start of this location
918  // to see if they contain G and T
919  vect.GetSeqData (feat_stop + 1, feat_stop + 3, splice_buffer);
920  if (x_IsSpliceSite(splice_buffer, false, is_minus)) {
921  TSeqPos interval_pos = feat_stop + 1;
922  if (SequencePosToIntervalPos(interval_pos)) {
923  splice_sites[interval_pos] = true;
924  need_unset [0] = true;
925  unset_pos [0] = interval_pos;
926  }
927  interval_pos = feat_stop + 2;
928  if (SequencePosToIntervalPos(interval_pos)) {
929  splice_sites[interval_pos] = true;
930  need_unset [1] = true;
931  unset_pos [1] = interval_pos;
932  }
933  }
934  }
935  is_start = false;
936  }
937 
938  /* unset splice site after last feature interval */
939  if (need_unset [0]) {
940  splice_sites[unset_pos[0]] = false;
941  }
942  if (need_unset[1]) {
943  splice_sites[unset_pos[1]] = false;
944  }
945 
946 }
947 
948 
950 {
951  CRef<CSeq_loc> cmp(new CSeq_loc());
952  CRef<CSeq_id> id(new CSeq_id());
953  id->Assign(*(m_Seq.GetId().front().GetSeqId()));
954  cmp->SetInt().SetId(*id);
955  cmp->SetInt().SetFrom(interval_start + GetSequenceStart());
956  cmp->SetInt().SetTo(interval_stop + GetSequenceStart());
957  return cmp;
958 }
959 
960 
961 bool CSeqTextDataSourceInterval::x_IsSpliceSite(string splice_buffer, bool before_loc, bool is_minus) const
962 {
963  bool is_splice = false;
964 
965  if (splice_buffer.length() < 2) {
966  is_splice = false;
967  } else if (before_loc) {
968  if (is_minus) {
969  if ((splice_buffer[0] == 'A' && (splice_buffer[1] == 'C' || splice_buffer[1] == 'G'))
970  || (splice_buffer[0] == 'G' && splice_buffer[1] == 'C')) {
971  is_splice = true;
972  } else {
973  is_splice = false;
974  }
975  } else {
976  if (splice_buffer[0] == 'A' && (splice_buffer[1] == 'G' || splice_buffer[1] == 'C')) {
977  is_splice = true;
978  } else {
979  is_splice = false;
980  }
981  }
982  } else {
983  if (is_minus) {
984  if ((splice_buffer[0] == 'C' || splice_buffer[0] == 'G') && splice_buffer[1] == 'T') {
985  is_splice = true;
986  } else {
987  is_splice = false;
988  }
989  } else {
990  if ((splice_buffer[0] == 'G' && (splice_buffer[1] == 'T' || splice_buffer[1] == 'C'))
991  || (splice_buffer[0] == 'C' && splice_buffer[1] == 'T')) {
992  is_splice = true;
993  } else {
994  is_splice = false;
995  }
996  }
997  }
998  return is_splice;
999 }
1000 
1001 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
class CGlPane
Definition: glpane.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
bool GetShow(int subtype) const
CSeqTextPaneConfig::EFeatureDisplayType GetFeatureColorationChoice()
bool SourcePosToIntervalPos(TSeqPos &pos) const
void GetFeaturesAtPosition(TSeqPos source_pos, vector< CConstRef< objects::CSeq_feat > > &features)
bool IntersectingSourceInterval(TSeqPos &src_start, TSeqPos &src_stop) const
void GetSeqData(TSeqPos src_start, TSeqPos src_stop, string &buffer, objects::SAnnotSelector *feat_sel) const
void RenderFeatureExtras(ISeqTextGeometry *pParent, TSeqPos start_offset, TSeqPos stop_offset)
objects::CBioseq_Handle m_Seq
objects::CSeqVector m_Vect
bool SequencePosToIntervalPos(TSeqPos &pos) const
TSeqPos GetSequenceStart() const
Definition: seq_text_ds.hpp:70
void GetSubtypesForAlternatingExons(TSeqPos start_offset, TSeqPos stop_offset, ISeqTextGeometry *pParent, CSeqTextDefs::TSubtypeVector &subtypes, bool &even)
void x_AddVariationsFromFeature(const objects::CSeq_feat &feat, const objects::CSeq_loc &loc, CSeqTextDefs::TVariationGraphVector &variations) const
void GetSeqString(TSeqPos src_start, TSeqPos src_stop, string &buffer) const
void LookForSpliceJunctions(const objects::CMappedFeat &feat, CSeqTextDefs::TSpliceSiteVector &splice_sites, TSeqPos start_offset, TSeqPos stop_offset) const
CSeqTextDataSourceInterval(const objects::CSeq_loc &loc, objects::CScope &scope, TSeqPos offset)
bool x_IsSpliceSite(string splice_buffer, bool before_loc, bool is_minus) const
void GetFeatureData(TSeqPos start_offset, TSeqPos stop_offset, CSeqTextConfig *cfg, ISeqTextGeometry *pParent, CSeqTextDefs::TSubtypeVector &subtypes, CSeqTextDefs::TSpliceSiteVector &splice_sites, CSeqTextDefs::TVariationGraphVector &variations)
CRef< objects::CSeq_loc > m_Loc
Definition: seq_text_ds.hpp:99
void GetToolTipForPosition(TSeqPos src_pos, string &tooltip_text)
CRef< objects::CSeq_loc > x_GetSeqLocForInterval(TSeqPos interval_start, TSeqPos interval_stop) const
TSeqPos GetSequenceStop() const
Definition: seq_text_ds.hpp:71
void GetVariations(TSeqPos start_offset, TSeqPos stop_offset, CSeqTextDefs::TVariationGraphVector &variations)
vector< CSeqTextDataSourceInterval > TIntervalList
CRef< objects::CSeq_entry > m_Sep
virtual ~CSeqTextDataSource()
void RenderFeatureExtras(ISeqTextGeometry *pParent, CGlPane &pane, TSeqPos seq_start, TSeqPos seq_stop)
CRef< objects::CSeq_loc > m_Loc
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer, objects::SAnnotSelector *feat_sel=NULL, bool showFeatAsLower=false)
void FindSequenceFragmentList(const string &fragment, CSeqTextDefs::TSeqPosVector &locations, ICanceled *cancel)
const objects::CSeq_loc * GetLoc() const
objects::CBioseq_Handle m_SubjectHandle
void GetFeatureData(TSeqPos start_offset, TSeqPos stop_offset, CSeqTextConfig *cfg, ISeqTextGeometry *pParent, CSeqTextDefs::TSubtypeVector &subtypes, CSeqTextDefs::TSpliceSiteVector &splice_sites, CSeqTextDefs::TVariationGraphVector &variations)
TIdRef GetId() const
static int ChooseBetterSubtype(int subtype1, int subtype2)
void GetIntervalBreaks(TSeqPos start_offset, TSeqPos stop_offset, CSeqTextDefs::TSeqPosVector &breaks)
string GetToolTipForSourcePos(TSeqPos source_pos)
CSeqTextDataSource(objects::CSeq_entry &sep, objects::CScope &scope)
int FindSequenceFragment(const string &fragment, TSeqPos start_search)
vector< CConstRef< objects::CSeq_feat > > GetFeaturesAtPosition(TSeqPos source_pos)
TSeqPos SequencePosToSourcePos(TSeqPos sequence_pos, bool *found_in_source=NULL)
objects::CScope & GetScope()
void GetSubtypesForAlternatingExons(TSeqPos start_offset, TSeqPos stop_offset, ISeqTextGeometry *pParent, CSeqTextDefs::TSubtypeVector &subtypes)
TIntervalList m_Intervals
void GetVariations(TSeqPos start_offset, TSeqPos stop_offset, CSeqTextDefs::TVariationGraphVector &variations)
TSeqPos SourcePosToSequencePos(TSeqPos source_pos)
CRef< objects::CScope > m_Scope
void x_PopulateFeatureIterators()
void GetSeqString(TSeqPos start, TSeqPos stop, string &buffer)
vector< CSeqTextVariationGraph > TVariationGraphVector
vector< bool > TSpliceSiteVector
vector< int > TSubtypeVector
vector< TSeqPos > TSeqPosVector
class CSeqTextVariationGraph
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
class ISeqTextGeometry
virtual void STG_RenderFeatureExtras(const objects::CMappedFeat &feat)=0
virtual void STG_SetSubtypesForFeature(CSeqTextDefs::TSubtypeVector &subtypes, const objects::CSeq_loc &loc, int subtype, TSeqPos start_offset, TSeqPos stop_offset)=0
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
bool OpenOrtho()
Definition: glpane.hpp:427
void Close(void)
Definition: glpane.cpp:178
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eUserTypeAndContent
Definition: label.hpp:66
CConstRef< CSeq_id > GetSeqId(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TSeqPos GetBioseqLength(void) const
TInst_Mol GetInst_Mol(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetInst_Mol(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TObjectType * GetNCPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1174
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
virtual bool IsCanceled(void) const =0
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
Definition: Seq_feat_.hpp:1141
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
int i
range(_Ty, _Ty) -> range< _Ty >
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
int islower(Uchar c)
Definition: ncbictype.hpp:66
T max(T x_, T y_)
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
int offset
Definition: replacements.h:160
USING_SCOPE(ncbi::objects)
void InvertCase(string &seq)
SAnnotSelector –.
static const char *const features[]
Modified on Sat Dec 02 09:19:33 2023 by modify_doxy.py rev. 669887