NCBI C++ ToolKit
objcoords.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: objcoords.cpp 91363 2020-10-19 14:54:50Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Vlad Lebedev, Victor Joukov
27  *
28  * File Description: Library to retrieve mapped sequence coordinates of
29  * the feature products given a sequence location
30  *
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <misc/hgvs/objcoords.hpp>
37 
38 #include <misc/hgvs/sequtils.hpp>
39 
40 #include <util/checksum.hpp>
41 
42 #include <util/sequtil/sequtil.hpp>
44 
45 #include <objmgr/feat_ci.hpp>
46 #include <objmgr/align_ci.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/seq_vector.hpp>
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/util/feature.hpp>
55 #include <objects/seq/Seqdesc.hpp>
63 
64 #include <math.h>
65 
66 
69 
70 
71 // size of sequence frame
72 static const TSeqPos kSeqFrame = 10;
73 
74 // Service functions
75 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement=false);
76 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment=0);
77 
79 
80 
81 /////////////////////////////////////////////////////////////////////////////
82 // CReportEntry
83 // Container for all information related to one logical entry - mRNA/CDS
84 // combination or component.
85 
86 class CReportEntry : public CObject
87 {
88 public:
89  CReportEntry(CScope& scope, const CSeq_id& seq_id,
90  feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
91  CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
92  feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
93  CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align);
95  void GetCoordinates(CScope& scope,
97  TSeqPos pos,
98  CHGVS_Coordinate_Set& coords) const;
99  bool IsValid() { return m_mrna || m_cds; }
100 
101  // Specific to feature/alignment based mRNA/CDS group
102  void SetGene(CScope& scope, const CSeq_feat& feat);
103  void SetMrna(CScope& scope, const CSeq_feat& feat);
104  void SetCds(CScope& scope, const CSeq_feat& feat);
105  void SetAlignment(CScope& scope, const CSeq_align& align);
106  // ??? is it specific, or can be made generic
108 
109 private:
110  CConstRef<CSeq_feat> m_mrna; /// mRNA feature on main sequence (genomic or transcript)
111  CConstRef<CSeq_feat> m_cds; /// CDS feature on main sequence
112  CConstRef<CSeq_feat> m_rna_cds; /// CDS feature on transcript, where main is genomic
119  // Some features occur on many kinds of sequences, so
120  // we have an alias m_main_id for such cases
125  string m_locus;
126 
127  void x_SetId(CScope& scope, const CSeq_id& seq_id);
128  void x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat);
129  void x_SetRnaCds(CScope& scope, const CSeq_feat& cds);
130  void x_SetMrnaMapper(CScope& scope, const CSeq_feat& feat);
131 
132  CRef<CSeq_loc_Mapper> x_GetCdsMapper(CScope& scope, const CSeq_feat& cds) const;
134  bool x_CheckExonGap(TSeqPos pos) const;
135  TSeqPos x_GetCDSOffset() const;
136  CRef<CHGVS_Coordinate> x_NewCoordinate(CScope& scope, const CSeq_id* id, TSeqPos pos) const;
137  void x_SetSequence(CHGVS_Coordinate& ref, CScope& scope, const CSeq_id* id,
138  TSignedSeqPos pos, TSignedSeqPos adjustment=0, bool plus_strand=true) const;
139  void x_SetHgvs(CHGVS_Coordinate& ref, const char* prefix, TSignedSeqPos pos,
140  TSignedSeqPos adjustment = 0, bool utr3_tail = false,
141  bool in_gap = false) const;
142  bool x_MapPos(const CSeq_loc_Mapper& mapper,
143  const CSeq_id& id, TSeqPos pos,
144  TSeqPos& mapped_pos) const;
145  void x_CalculateUTRAdjustments(TSignedSeqPos& feature_pos,
146  TSignedSeqPos& seq_pos,
147  TSignedSeqPos& adjustment,
148  bool& utr3_tail) const;
149  void x_GetRCoordinate(CScope& scope, TSeqPos pos,
150  CHGVS_Coordinate_Set& coords) const;
151  void x_GetCCoordinate(CScope& scope, TSeqPos pos, TSignedSeqPos adj,
152  CHGVS_Coordinate_Set& coords) const;
153  void x_GetPCoordinate(CScope& scope, TSeqPos pos,
154  CHGVS_Coordinate_Set& coords) const;
155 };
156 
157 
158 
160  feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
161  : m_cdr_length(0), m_cds_offset_set(false)
162 {
163  x_SetId(scope, seq_id);
164  x_SetFeature(scope, feat_tree, root_feat);
165 }
166 
167 
168 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
169  feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
170  : m_cdr_length(0), m_cds_offset_set(false)
171 {
172  x_SetId(scope, seq_id);
173  SetGene(scope, gene.GetMappedFeature());
174  x_SetFeature(scope, feat_tree, root_feat);
175 }
176 
177 
178 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align)
179 {
180  x_SetId(scope, seq_id);
181  SetAlignment(scope, align);
182 }
183 
184 
185 void CReportEntry::x_SetId(CScope& scope, const CSeq_id& seq_id)
186 {
187  m_main_id.Reset(&seq_id);
189  /// check for protein
191  m_prot_id.Reset(&seq_id);
192  }
193  /// check for mRNA
194  else if (type_flags & CSeq_id::eAcc_mrna) {
195  m_transcript_id.Reset(&seq_id);
196  }
197  /// check for genomic
198  else if (type_flags & CSeq_id::fAcc_genomic) {
199  m_genomic_id.Reset(&seq_id);
200  }
201 }
202 
203 
204 void CReportEntry::x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat)
205 {
206  const CSeq_feat& seq_feat = feat.GetMappedFeature();
207  CSeqFeatData::ESubtype subtype =
208  seq_feat.GetData().GetSubtype();
209 
210  switch (subtype) {
211  // The following case is useless, handled in different manner in calling code
213  SetGene(scope, seq_feat);
214  {{
215  vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
216  ITERATE (vector<CMappedFeat>, iter, cc) {
217  x_SetFeature(scope, feat_tree, *iter);
218  }
219  }}
220  break;
222  SetMrna(scope, seq_feat);
223  if (feat.IsSetProduct()) {
224  CBioseq_Handle product_bsh =scope.GetBioseqHandle(feat.GetProduct());
225  if (product_bsh) {
228  for (CFeat_CI feat_iter(product_bsh, sel); feat_iter; ++feat_iter)
229  {
230  x_SetRnaCds(scope, feat_iter->GetMappedFeature());
231  }
232  }
233  } else {
234  vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
235  // TODO: We don't handle multiple CDS per mRNA. Do they actually happen?
236  ITERATE (vector<CMappedFeat>, iter, cc) {
237  const CSeq_feat& seq_feat = iter->GetMappedFeature();
238  if (seq_feat.GetData().GetSubtype() ==
240  {
241  SetCds(scope, seq_feat);
242  break;
243  }
244  }
245  }
246  break;
248  SetCds(scope, seq_feat);
249  break;
250  default:
251  break;
252  }
253 }
254 
255 
258  TSeqPos pos,
259  CHGVS_Coordinate_Set& coords) const
260 {
261  // Fill out g. r. c. and p. coordinates based on the entry and type of
262  // original sequence
264  // do nothing
265  return;
266  }
267  // TODO: handle component here in the future
268 
269  // Calculate adjustment to the nearest exon, 0 if we are in exon
270  TSignedSeqPos adjustment = x_GetAdjustment(pos);
271  // check for genomic
273  // r. c. and p. - r. here, c. and p. same as for mRNA
274  if (!adjustment)
275  x_GetRCoordinate(scope, pos, coords);
276  }
277  // check for mRNA, effectively union of cases with genomic
279  // c. and p.
280  x_GetCCoordinate(scope, pos, adjustment, coords);
281  if (!adjustment)
282  x_GetPCoordinate(scope, pos, coords);
283  }
284 }
285 
286 
288 {
289  // Calculate minimal distance to the exon boundary
290  if (!m_mrna_mapper) return 0;
291  //bool plus_strand = true;
292  if (m_mrna_align) {
293  // TODO: Use alignment, take into account exons
294  // const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
295  // ??? what should we do if we're in exon gap? How to handle exon overlap
296  // (ambiguous positions in an exon)
297  }
298  TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
299  TSignedSeqPos adj = 0;
300  const CMappingRanges& ranges = m_mrna_mapper->GetMappingRanges();
303  idh,
306  for ( ; rg_it; ++rg_it) {
307  const CMappingRange& cvt = *rg_it->second;
308  TSignedSeqPos f = cvt.GetSrc_from();
309  TSignedSeqPos t = f + cvt.GetLength();
310  // TODO: Is it reliable enough to test for mRNA strand?
311  // plus_strand = !cvt.GetReverse();
312 // LOG_POST("Pos " << pos << " from " << f << " to " << t);
313  int adj_for_intron;
314  if (signed_pos < f) {
315  adj_for_intron = signed_pos - f;
316  } else if (signed_pos > t) {
317  adj_for_intron = signed_pos - t;
318  } else {
319  return 0; // no adjustment needed
320  }
321  if (adj == 0 || std::abs(adj_for_intron) < std::abs(adj)) {
322  adj = adj_for_intron;
323  }
324  }
325 // LOG_POST("Adjustment calculated " << adj);
326  return m_mrna_plus_strand ? adj : -adj;
327 }
328 
329 
331 {
332  if (!m_mrna_align) return false;
333  // Use alignment, take into account exons
334  const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
335  ITERATE (CSpliced_seg::TExons, exon_it, seg.GetExons()) {
336  const CSpliced_exon& exon = **exon_it;
337  if (!exon.IsSetGenomic_start() || pos < exon.GetGenomic_start() ||
338  !exon.IsSetGenomic_end() || pos > exon.GetGenomic_end() ||
339  !exon.IsSetParts()
340  ) {
341  continue;
342  }
343  TSeqPos exon_pos = m_mrna_plus_strand ?
344  pos - exon.GetGenomic_start() :
345  exon.GetGenomic_end() - pos;
346  ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
347  TSeqPos l = 0;
348  bool in_gap = false;
349  const CSpliced_exon_chunk& part = **part_it;
350  if (part.IsMatch()) {
351  l = part.GetMatch();
352  } else if (part.IsMismatch()) {
353  l = part.GetMismatch();
354  } else if (part.IsDiag()) {
355  l = part.GetDiag();
356  } else if (part.IsProduct_ins()) {
357  l = 0; // redundant
358  } else if (part.IsGenomic_ins()) {
359  l = part.GetGenomic_ins();
360  in_gap = true;
361  }
362  if (l > exon_pos) return in_gap;
363  exon_pos -= l;
364  }
365  }
366  return false;
367 }
368 
369 
371 {
372  return m_cds_offset_set ? m_cds_offset : 0;
373 }
374 
375 
377 {
378  // get the title (best id)
379  string title;
380  if (id) {
381  CSeq_id_Handle idh = sequence::GetId(*id, scope,
383  idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
384  } else if (!m_locus.empty()) {
385  title = m_locus;
386  }
388  ref->SetTitle(title);
389  ref->SetObject_id("");
390  ref->SetMarker_pos(pos);
391  ref->SetSequence("");
392  ref->SetHgvs_position("");
393  return ref;
394 }
395 
396 
398  TSignedSeqPos pos, TSignedSeqPos adjustment, bool plus_strand) const
399 {
400  if (!id) return;
401  CBioseq_Handle prod_bsh =
402  scope.GetBioseqHandle(*id);
403  CSeqVector prod_vec(prod_bsh, CBioseq_Handle::eCoding_Iupac,
404  plus_strand ? eNa_strand_plus : eNa_strand_minus);
405  ref.SetSequence( x_GetSequence(prod_vec, plus_strand ? pos : prod_vec.size() - pos - 1, adjustment) );
406 }
407 
408 
410  TSignedSeqPos adjustment, bool utr3_tail,
411  bool in_gap) const
412 {
413  string hgvs(prefix);
414  if (in_gap) {
415  TSignedSeqPos second_pos;
416  if (adjustment > 0) {
417  second_pos = pos + 1;
418  } else {
419  second_pos = pos;
420  pos -= 1;
421  }
422  hgvs += "(" + NStr::IntToString(pos+1) + "_" + NStr::IntToString(second_pos+1) + ")";
423  ref.SetHgvs_position(hgvs);
424  return;
425  }
426  TSignedSeqPos pos_1_based =
427  pos >= 0 ? pos + 1 : pos;
428  if (adjustment == 0 || !utr3_tail) {
429  hgvs += NStr::IntToString(pos_1_based);
430  }
431  if (adjustment != 0) {
432  if (utr3_tail) {
433  hgvs += '*';
434  } else if (adjustment > 0) {
435  hgvs += '+';
436  }
437  hgvs += NStr::IntToString(adjustment);
438  }
439  ref.SetHgvs_position(hgvs);
440 }
441 
442 
444  const CSeq_id& id, TSeqPos pos,
445  TSeqPos& mapped_pos) const
446 {
447  // fake location for seq_mapper
448  CRef<CSeq_loc> fake_loc(new CSeq_loc());
449  fake_loc->SetPnt().SetPoint(pos);
450  fake_loc->SetId(id);
451 
452  // Get the feature position
453  CRef<CSeq_loc> mapped_loc;
454  // Due to defect in definition of Map (non-const) we need to use coercion
455  mapped_loc = const_cast<CSeq_loc_Mapper*>(&mapper)->Map(fake_loc.GetObject());
456  if (mapped_loc->Which() == CSeq_loc::e_Null) {
457  ERR_POST(Warning << "loc mapping did not work");
458  return false;
459  }
460  mapped_pos = mapped_loc->GetPnt().GetPoint();
461  return true;
462 }
463 
464 
466  TSignedSeqPos& seq_pos,
467  TSignedSeqPos& adjustment,
468  bool& utr3_tail) const
469 {
470  utr3_tail = false;
471  // To calculate whether we are in UTR 3' region we need to know if our
472  // unadjusted position would map beyond the coding feature length.
473  // There are two cases:
474  // If the right flank is not zero, even adjusted position would map
475  // after cdr_length.
476  // The second one is worse - its adjusted pos always maps to the last
477  // valid position of last exon. For this case we check it and add
478  // adjustment to be used in UTR 3' test.
479  TSignedSeqPos utr3_adjusted_pos = feature_pos;
480  if (m_cdr_length && feature_pos >= TSignedSeqPos(m_cdr_length) - 1) {
481  utr3_adjusted_pos += adjustment;
482  }
483  // If position is out of bounds of the feature, we just add
484  // adjustment so it does not matter is it in intron or not.
485  // According to Alex Astashin, if the position is inside mRNA,
486  // it is reported in detail, so we check seq_pos, not feature_pos
487  if (seq_pos <= 0) {
488  feature_pos += adjustment;
489  seq_pos += adjustment;
490  adjustment = 0;
491  } else
492  if (m_cdr_length && utr3_adjusted_pos >= TSignedSeqPos(m_cdr_length)) {
493  feature_pos += adjustment;
494  seq_pos += adjustment;
495  // Adjustment signifies the offset from the stop codon into 3' UTR
496  adjustment = feature_pos - m_cdr_length + 1;
497  utr3_tail = true;
498  feature_pos = m_cdr_length - 1;
499  }
500 }
501 
502 
504  CHGVS_Coordinate_Set& coords) const
505 {
507 
508  if (!m_mrna_mapper) {
509  ERR_POST(Warning << "mRNA mapper is empty");
510  return;
511  }
512  TSeqPos mapped_pos;
513  if (x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos)) {
514  ref->SetPos_mapped(mapped_pos);
515  x_SetHgvs(*ref, "r.", mapped_pos);
516  if (m_transcript_id) {
517  x_SetSequence(*ref, scope, m_transcript_id, mapped_pos);
518  } else if (m_genomic_id) {
519  // Product-less mRNA feature - we're faking it a bit using
520  // original sequence and original, not mapped position and
521  // appropriate strand. Same trick works for c. coordinate below
522  x_SetSequence(*ref, scope, m_genomic_id, pos, 0, m_mrna_plus_strand);
523  }
524  coords.Set().push_back(ref);
525  }
526 }
527 
528 
530  CHGVS_Coordinate_Set& coords) const
531 {
532  if (!m_cds && !m_rna_cds) return;
534  bool mapped = false;
535 
536  // adjusted_pos is for safely mapping the coordinate, it is guaranteed to be in
537  // exon, and even if the exon has a gap, not in the gap.
538  // adjustment is opposite the product, that is why we need to know
539  // the strand of the feature to find adjustment to mappable position
540  TSeqPos adjusted_pos = pos + (m_mrna_plus_strand ? -adjustment : adjustment);
541 
542  TSignedSeqPos mapped_pos;
543  TSeqPos seq_pos;
544  TSeqPos offset = 0;
545  bool utr3_tail = false;
546  bool in_gap = false;
547  if (m_mrna_mapper) {
548  // To use mRNA mapper to map to CDS coordinate we need to calculate
549  // CDS to mRNA offset here and use it to fix mapped coordinate
551  if (x_MapPos(*m_mrna_mapper, *m_main_id, adjusted_pos, seq_pos)) {
552  in_gap = x_CheckExonGap(pos);
553  mapped_pos = seq_pos - offset;
554  TSignedSeqPos utr_adjusted_pos = seq_pos;
555  x_CalculateUTRAdjustments(mapped_pos, utr_adjusted_pos, adjustment, utr3_tail);
556  if (m_transcript_id) {
557  x_SetSequence(*ref, scope, m_transcript_id, utr_adjusted_pos, adjustment);
558  } else if (m_genomic_id) {
559  // Product-less mRNA feature - see comment for r. coordinate
560  x_SetSequence(*ref, scope, m_genomic_id, pos, adjustment, m_mrna_plus_strand);
561  }
562  mapped = true;
563  }
564  } else if (m_cds) { // This condition is trivially true, but stays here to denote the case
565  // CDS feature only - map through fake product
566  CRef<CSeq_loc> fake_product(new CSeq_loc);
567  fake_product->SetWhole().Assign(*m_main_id);
569  (m_cds->GetLocation(),
570  *fake_product,
571  &scope));
572  if (x_MapPos(*mapper, *m_main_id, adjusted_pos, seq_pos)) {
573  mapped_pos = seq_pos;
574  // Sequence coming from original id, use original pos
575  x_SetSequence(*ref, scope, m_main_id, pos, adjustment);
576  mapped = true;
577  }
578  } else {
579  // Sorry, not enough info to calculate the coordinate
580  return;
581  }
582  x_SetHgvs(*ref, "c.", mapped_pos, adjustment, utr3_tail, in_gap);
583 
584  if (!adjustment) {
585  ref->SetPos_mapped(mapped_pos);
586  }
587  if (mapped) coords.Set().push_back(ref);
588 }
589 
590 
592  CHGVS_Coordinate_Set& coords) const
593 {
594  TSeqPos mapped_pos = pos;
595  if (m_rna_cds && m_mrna_mapper) {
596  // We have both DNA to RNA and RNA to protein mappings, so map 2 times
597  if (!x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos))
598  return;
599  if (!x_MapPos(*x_GetCdsMapper(scope, *m_rna_cds), *m_transcript_id, mapped_pos, mapped_pos))
600  return;
601  } else if (m_cds) {
602  if (!x_MapPos(*x_GetCdsMapper(scope, *m_cds), *m_main_id, pos, mapped_pos))
603  return;
604  } else {
605  return;
606  }
608  ref->SetPos_mapped(mapped_pos);
609  x_SetHgvs(*ref, "p.", mapped_pos);
610  x_SetSequence(*ref, scope, m_prot_id, mapped_pos);
611  coords.Set().push_back(ref);
612 }
613 
614 
615 void CReportEntry::SetGene(CScope& scope, const CSeq_feat& feat)
616 {
617  if (feat.GetData().GetGene().IsSetLocus_tag()) {
618  m_locus = feat.GetData().GetGene().GetLocus_tag();
619  } else if (feat.GetData().GetGene().IsSetLocus()) {
620  m_locus = feat.GetData().GetGene().GetLocus();
621  }
622  // CLabel::GetLabel(feat, &m_locus, CLabel::eContent, &scope);
623 }
624 
625 
626 void CReportEntry::SetMrna(CScope& scope, const CSeq_feat& feat)
627 {
628  m_mrna.Reset(&feat);
629  if (feat.IsSetProduct())
630  // mRNA feature's product always points to transcript
634  x_SetMrnaMapper(scope, feat);
635 }
636 
637 
638 void CReportEntry::SetCds(CScope& scope, const CSeq_feat& cds)
639 {
640  m_cds.Reset(&cds);
641 // if (!m_mrna) m_main_id.Reset(cds.GetLocation().GetId());
642  m_prot_id.Reset(cds.GetProduct().GetId());
643  if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
645  }
646 
647  // Prefer direct calculation (see x_SetRnaCds) and check
648  // that mRNA is already set
649  if (m_cds_offset_set || !m_mrna) return;
650  // Offset is inferred given the
651  // locations of the mRNA and CDS feature
652  CSeq_loc_CI loc_iter(m_mrna->GetLocation());
654  ENa_strand strand = cds.GetLocation().GetStrand();
655  TSeqRange r(start, start);
656  TSeqPos frame = 0;
657  if (cds.GetData().GetCdregion().IsSetFrame()) {
658  frame = cds.GetData().GetCdregion().GetFrame();
659  if (frame) {
660  frame -= 1;
661  }
662  }
663 
664  TSeqPos offset_acc = 0;
665  for (; loc_iter; ++loc_iter) {
666  if (loc_iter.GetRange()
667  .IntersectingWith(r)) {
668  if (strand == eNa_strand_minus) {
669  m_cds_offset = offset_acc +
670  loc_iter.GetRange().GetTo() - start - frame;
671  } else {
672  m_cds_offset = offset_acc +
673  start - loc_iter.GetRange().GetFrom() + frame;
674  }
675  m_cds_offset_set = true;
676  break;
677  }
678  offset_acc += loc_iter.GetRange().GetLength();
679  }
680 }
681 
682 
684 {
685  m_rna_cds.Reset(&cds);
686  m_prot_id.Reset(cds.GetProduct().GetId());
687  if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
689  }
690  // Offset is determined by the
691  // placement of a CDS feature
692  // directly on the RNA sequence
694  if (cds.GetData().GetCdregion().IsSetFrame()) {
695  TSeqPos frame = cds.GetData().GetCdregion().GetFrame();
696  if (frame) {
697  m_cds_offset += frame - 1;
698  }
699  }
700  m_cds_offset_set = true;
701 }
702 
703 
705 {
706  if (feat.IsSetProduct()) {
708  (feat,
710  &scope));
711  } else {
712  CRef<CSeq_loc> fake_product(new CSeq_loc);
713  fake_product->SetWhole().Assign(*m_main_id);
715  (feat.GetLocation(),
716  *fake_product,
717  &scope));
718  }
719 }
720 
721 
722 void CReportEntry::SetAlignment(CScope& scope, const CSeq_align& align)
723 {
724  // cerr << MSerial_AsnText << align;
725 
726  if (align.GetSegs().Which() != CSeq_align::TSegs::e_Spliced)
727  return;
728 
729  m_main_id.Reset(&align.GetSeq_id(1));
730  m_genomic_id.Reset(&align.GetSeq_id(1));
731  m_transcript_id.Reset(&align.GetSeq_id(0));
732 
734 // Work-around for CXX-1555 - pass id of the sequence as a reliable way
735 // to indicate desirable direction of the mapping.
736 // (*align, 0, scope));
737  (align, align.GetSeq_id(0), &scope));
738  m_mrna_align.Reset(&align);
739  const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
741 
742  if (!m_cds) {
743  // There is no CDS feature because this entry is derived from
744  // alignment. Try to use CDS feature if any on target mRNA
745  CBioseq_Handle product_bsh =
746  scope.GetBioseqHandle
747  (align.GetSeq_id(0));
748  if (product_bsh) {
749  //
750  // our offset is determined by the
751  // placement of a CDS feature
752  // directly on the product RNA sequence
753  //
754 // LOG_POST("Deriving CDS mapping from alignment");
757  // TODO: ??? Now we prefer LAST of CDSs for a given RNA. Can there be
758  // more than one, and what is the meaning for it?
759  for (CFeat_CI feat_iter(product_bsh, sel);
760  feat_iter; ++feat_iter)
761  {
762  x_SetRnaCds(scope, feat_iter->GetMappedFeature());
763  }
764  }
765  } else {
766  // Recalculate CDS offset using the alignment
767  // Check that first exon is not partial
768  const CSpliced_exon &first_exon = *(seg.GetExons().front());
769  if (!(first_exon.IsSetPartial() && first_exon.GetPartial())) {
770  TSeqPos cds_start_on_dna = m_cds->GetLocation().GetStart(eExtreme_Biological);
771  ENa_strand strand = m_cds->GetLocation().GetStrand();
772  TSeqPos rna_start_on_dna;
773  if (strand == eNa_strand_minus) {
774  rna_start_on_dna = seg.GetSeqStop(1);
775  } else {
776  rna_start_on_dna = seg.GetSeqStart(1);
777  }
778  TSeqPos rna_start, cds_start;
779  x_MapPos(*m_mrna_mapper, *m_main_id, rna_start_on_dna, rna_start);
780  x_MapPos(*m_mrna_mapper, *m_main_id, cds_start_on_dna, cds_start);
781  m_cds_offset = cds_start - rna_start;
782  m_cds_offset_set = true;
783  }
784  }
785 }
786 
787 
789 {
791  (cds,
793  &scope));
794  return cds_mapper;
795 }
796 
797 
798 typedef vector<CRef<CReportEntry> > TReportEntryList;
799 
800 static
801 TReportEntryList s_GetFeaturesForRange(CScope& scope, const CBioseq_Handle& bsh, TSeqRange search_range);
802 static
804 static
805 bool s_IsRefSeqGene(const CBioseq_Handle& bsh);
806 
807 
808 /////////////////////////////////////////////////////////////////////////////
809 // CObjectCoords::
810 //
811 
813  m_Scope(&scope)
814 {
815 }
816 
817 
819 {
821  GetCoordinates(*coords, id, pos, window);
822  return coords;
823 }
824 
825 void
827 {
828  CBioseq_Handle bsh = m_Scope->GetBioseqHandle(id);
829  if ( !bsh ) {
831  "failed to retrieve sequence: " + id.AsFastaString());
832  }
833  CSeqVector seq_vec =
835 
836  string title;
838  idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
839 
840  // First entry is for the gi itself
842 
843  // identify sequence
845  string hgvs;
846 
847  /// check for protein
849  hgvs = "p.";
850  }
851  /// check for mRNA
852  else if (type_flags & CSeq_id::eAcc_mrna) {
853  hgvs = "r.";
854  }
855  /// check for genomic
856  else if (type_flags & CSeq_id::fAcc_genomic) {
857  hgvs = "g.";
858  }
859 
860  // Get Main sequence
861  ref->SetTitle(title);
862  ref->SetHgvs_position( hgvs + NStr::IntToString(pos + 1) );
863  ref->SetSequence( x_GetSequence(seq_vec, pos) );
864  ref->SetMarker_pos(pos);
865  ref->SetPos_mapped(pos);
866 
867  coords.Set().push_back(ref);
868 
869  // Get all RNAs and CDSs in the +-2k (default) vicinity of the
870  // position in question and build a CFeatTree.
871  // Main cases are:
872  // 1. Genomic sequence. Tree will contain many mRNA->CDS chains
873  // E.g. NT_011515 pos 1585115 (alignment present)
874  // NC_000007.13 pos 65338429 (no alignment)
875  // NC_000019.9 pos 45016991 - forward ribosomal slippage
876  // NC_000007.13 v 94283650:94301032 - back ribosomal slippage,
877  // encoded through weird mRNA/CDS relation
878  // NG_005895.1 - RefSeq Gene, features through alignment only
879  // 1a. mRNA can be w/o product (GenBank DNA)
880  // E.g. BX571818.3
881  // 1b. No mRNA
882  // E.g. NC_000913.2 pos 2282398
883  // 2. Transcript. There will be CDSs alone
884  // E.g. NM_002020
885  // 2a. There can be mRNA w/o product (GenBank mRNA)
886  // E.g. M11313
887  // 2b. No CDS
888  // E.g. NR_002767
889  // 3. Protein sequence
890  // Overall procedure is following:
891  // Get all pairwise alignments for position into map by product id.
892  // Sort all top-level into ReportEntries, adding alignment for mRNA
893  // products from the map (removing from map).
894  // Convert all remaning alignment to ReportEntries
895  // Get all components for position into GeneInfos
896  // Process all ReportEntries getting r., c., and p. (g. for components)
897  TSeqPos min_pos = pos >= window ? pos - window : pos;
898  TSeqPos max_pos = pos + window;
899  TSeqRange search_range(min_pos, max_pos);
900  // Per SV-487, if we deal with RefSeq Gene record only use
901  // alignments to get features
902  bool is_ref_seq_gene = s_IsRefSeqGene(bsh);
903  // is_ref_seq_gene = false; // DEBUG
905  if (!is_ref_seq_gene) entries = s_GetFeaturesForRange(*m_Scope, bsh, search_range);
906  s_ExtendEntriesFromAlignments(*m_Scope, bsh, search_range, entries);
907  ///
908  /// now, evaluate each feature
909  /// we may need to inject an artificial mRNA feature to capture
910  /// the 'c.' coordinate.
911  ///
913  (*iter)->GetCoordinates(
914  *m_Scope,
915  type_flags,
916  pos,
917  coords);
918 
919  }
920 }
921 
922 
923 static
925 {
926  // TODO: Use CSeq_descr_CI on bhs itself here
927  bool is_ref_seq_gene = false;
928  CSeq_entry_Handle seh = bsh.GetParentEntry();
929  if (seh.IsSetDescr()) {
930  const CSeq_descr& descr = seh.GetDescr();
931  CSeq_descr::Tdata descr_list = descr.Get();
932  ITERATE(CSeq_descr::Tdata, it, descr_list) {
933  if (it->GetObject().IsGenbank()) {
934  const CSeqdesc::TGenbank& gb = it->GetObject().GetGenbank();
935  if (gb.IsSetKeywords()) {
936  ITERATE(list<string>, it1, gb.GetKeywords()) {
937  if (*it1 == "RefSeqGene") {
938  is_ref_seq_gene = true;
939  break;
940  }
941  }
942  }
943  }
944  if (is_ref_seq_gene) break;
945  }
946  }
947  return is_ref_seq_gene;
948 }
949 
950 
951 static
953  CScope& scope,
954  const CBioseq_Handle& bsh,
955  TSeqRange search_range)
956 {
958  // find features on the sequence
960  // We don't need gene record per se, only mRNA and CDS
964 
965  CFeat_CI feat_iter(bsh, search_range, sel);
966  feature::CFeatTree feat_tree;
967  feat_tree.AddFeatures(feat_iter);
968 
969  /*
970  Here we have following structures:
971  Gene
972  |\
973  | mRNA
974  | \
975  | CDS
976  |\
977  | mRNA
978  |\
979  | CDS
980  We convert them into list of ReportEntries which hold Gene, mRNA, and CDS
981  records (possibly empty). We need to pass only hierarchy root to ReportEntry
982  constructor except in case of Gene because Gene can have multiple distinct
983  mRNA descendants.
984  */
985  vector<CMappedFeat> feat_roots =
986  feat_tree.GetChildren(CMappedFeat());
987  CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
988  ITERATE (vector<CMappedFeat>, iter, feat_roots) {
989  const CSeq_feat& seq_feat = iter->GetMappedFeature();
990  CSeqFeatData::ESubtype subtype =
991  seq_feat.GetData().GetSubtype();
992  if (subtype == CSeqFeatData::eSubtype_gene) {
993  vector<CMappedFeat> cc = feat_tree.GetChildren(*iter);
994  ITERATE (vector<CMappedFeat>, iter1, cc) {
996  scope,
997  *seq_id,
998  *iter, // pass Gene explicitly
999  feat_tree,
1000  *iter1));
1001  if (entry->IsValid()) entries.push_back(entry);
1002  }
1003  } else {
1004  CRef<CReportEntry> entry(new CReportEntry(
1005  scope,
1006  *seq_id,
1007  feat_tree,
1008  *iter));
1009  if (entry->IsValid()) entries.push_back(entry);
1010  }
1011  }
1012  return entries;
1013 }
1014 
1015 
1016 static
1018  CScope& scope,
1019  const CBioseq_Handle& bsh,
1020  TSeqRange search_range,
1022 {
1023  CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
1024  TReportEntryList new_entries;
1025  SAnnotSelector sel;
1026  //sel.SetAdaptiveDepth(true);
1027  // Slow, but otherwise it can't find all relevant alignments
1028  // for composite sequences
1029  sel.SetResolveAll();
1030  for (CAlign_CI align_it(bsh, search_range, sel); align_it; ++align_it) {
1031  const CSeq_align& al = *align_it;
1032  if (al.CheckNumRows() == 2) {
1033  string label; al.GetSeq_id(0).GetLabel(&label);
1034  bool found = false;
1036  CConstRef<CSeq_id> product_id((*it)->GetTranscriptId());
1037  string entry_label; product_id->GetLabel(&entry_label);
1038 // LOG_POST("Check match for alignment " + label + " with existing entry " + entry_label);
1040  *product_id, al.GetSeq_id(0),
1041  &scope))
1042  {
1043  // Found exising entry - extend it with alignment
1044  (*it)->SetAlignment(scope, al);
1045 // LOG_POST("Entry for " + label + " extended by alignment");
1046  found = true;
1047  break;
1048  }
1049  }
1050  // If we did not match this alignment to an existing entry, make a new one.
1051  if (!found) {
1052  CBioseq_Handle target_bsh = scope.GetBioseqHandle(al.GetSeq_id(0));
1053  if (target_bsh && (GetSequenceType(target_bsh) & CSeq_id::eAcc_mrna)) {
1054  // Make new entry
1055  CRef<CReportEntry> entry(new CReportEntry(scope, *seq_id, al));
1056  new_entries.push_back(entry);
1057 // LOG_POST("New entry " + label + " created from alignment");
1058  }
1059  }
1060  }
1061  }
1062  entries.insert(entries.end(), new_entries.begin(), new_entries.end());
1063 }
1064 
1065 
1066 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement)
1067 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment)
1068 {
1069  string seq_before, seq_pos, seq_after;
1070  TSignedSeqPos seq_len = seq_vec.size();
1071  TSignedSeqPos max_seq_frame = TSignedSeqPos(kSeqFrame);
1072 
1073  pos += adjustment;
1074 
1075  if (adjustment >=0) {
1076  TSeqPos before_from = pos <= max_seq_frame ?
1077  0 : max(0, pos - max_seq_frame);
1078  seq_vec.GetSeqData(before_from, max(0, pos), seq_before);
1079  // This filler takes into account both before and after, so we don't
1080  // need to repeat it twice. We need to compensate for very negative
1081  // position to not overfill it, so we limit overfill by 2*kSeqFrame+1
1082  // which is total length of reported sequence context.
1083  if (pos < max_seq_frame) {
1084  string filler;
1085  int max_filler_len = min(2*max_seq_frame+1, max_seq_frame - pos);
1086  for (int i = 0; i < max_filler_len; i++) filler += "&nbsp;";
1087  seq_before = filler + seq_before;
1088  }
1089  if (adjustment != 0) {
1090  int fill_len = min(int(adjustment-1), int(seq_before.size()));
1091  seq_before.replace(seq_before.size()-fill_len, fill_len, fill_len, '-');
1092  }
1093  } else {
1094  for (int i = 0; i < max_seq_frame; i++) seq_before += "-";
1095  }
1096  if (adjustment == 0) {
1097  seq_vec.GetSeqData(pos, pos+1, seq_pos);
1098  } else {
1099  seq_pos = "-";
1100  }
1101  if (adjustment <= 0) {
1102  // If pos is less than -1, it starts eating into seq_after. The filler is taken
1103  // care of, now we ensure that we're not generating negative positions.
1104 
1105  seq_vec.GetSeqData(max(0, pos+1),
1106  max(0, min(seq_len, pos+max_seq_frame+1)), seq_after);
1107  if (adjustment != 0) {
1108  int fill_len = min(int(-adjustment-1), int(seq_after.size()));
1109  seq_after.replace(0, fill_len, fill_len, '-');
1110  }
1111  } else {
1112  for (int i = 0; i < max_seq_frame; i++) seq_after += "-";
1113  }
1114 /*
1115  if (complement) {
1116  string comp;
1117  CSeqManip::Complement(seq_before, CSeqUtil::e_Iupacna, 0, seq_before.size(), comp);
1118  CSeqManip::Complement(seq_after, CSeqUtil::e_Iupacna, 0, seq_after.size(), seq_before);
1119  seq_before = comp;
1120  CSeqManip::Complement(seq_pos, CSeqUtil::e_Iupacna, 0, seq_pos.size(), seq_pos);
1121  }
1122 */
1123 
1124  return
1125  seq_before + "<span class='xsv-seq_pos_highlight'>" +
1126  seq_pos + "</span>" + seq_after;
1127 }
1128 
1130 {
1131  #ifdef __GUI_SEQ_UTILS_DEFINED__
1132  return CSeqUtils::GetAnnotSelector();
1133  #endif
1134 
1135  SAnnotSelector sel;
1136  sel
1137  // consider overlaps by total range...
1139  // resolve all segments...
1140  .SetResolveAll()
1141  ;
1142 
1143  sel.SetExcludeExternal(false);
1144 
1145  ///
1146  /// known external annotations
1147  ///
1148  sel.ExcludeNamedAnnots("SNP"); /// SNPs = variation features
1149  sel.ExcludeNamedAnnots("CDD"); /// CDD = conserved domains
1150  sel.ExcludeNamedAnnots("STS"); /// STS = sequence tagged sites
1151 
1152  sel.SetAdaptiveDepth(true);
1153  sel.SetResolveAll();
1154 
1155  return sel;
1156 }
1157 
1158 /*
1159 /////////////////////////////////////////////////////////////////////////////
1160 // Service functions
1161 
1162 static
1163 TSignedSeqPos x_GetAdjustment(const CSeq_feat& feat, TSeqPos pos)
1164 {
1165  // Calculate minimal distance to the exon boundary
1166  int adj = 0;
1167  bool plus_strand =
1168  sequence::GetStrand(feat.GetLocation()) != eNa_strand_minus;
1169  TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
1170  CSeq_loc_CI loc_iter(feat.GetLocation());
1171  for ( ; loc_iter; ++loc_iter) {
1172  const TSeqRange& curr = loc_iter.GetRange();
1173  TSignedSeqPos f = curr.GetFrom();
1174  TSignedSeqPos t = curr.GetTo();
1175 
1176  int adj_for_intron;
1177  if (signed_pos < f) {
1178  adj_for_intron = signed_pos - f;
1179  } else if (signed_pos > t) {
1180  adj_for_intron = signed_pos - t;
1181  } else {
1182  return 0; // no adjustment needed
1183  }
1184  if (adj == 0 || std::abs(adj_for_intron) < std::abs(adj)) {
1185  adj = adj_for_intron;
1186  }
1187  }
1188  return plus_strand ? adj : -adj;
1189 }
1190 */
1191 
1192 
1193 // END_NCBI_SCOPE
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Checksum and hash calculation classes.
CAlign_CI –.
Definition: align_ci.hpp:63
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CHGVS_Coordinate_Set –.
CHGVS_Coordinate –.
CMappedFeat –.
Definition: mapped_feat.hpp:59
CMappingRange - describes a single interval to interval mapping.
Storage for multiple mapping ranges.
CObjectCoords(objects::CScope &scope)
Definition: objcoords.cpp:812
CRef< objects::CScope > m_Scope
Definition: objcoords.hpp:58
CRef< CHGVS_Coordinate_Set > GetCoordinates(const CSeq_id &id, TSeqPos pos, TSeqPos window=kSearchWindow)
Definition: objcoords.cpp:818
CObject –.
Definition: ncbiobj.hpp:180
bool x_MapPos(const CSeq_loc_Mapper &mapper, const CSeq_id &id, TSeqPos pos, TSeqPos &mapped_pos) const
Definition: objcoords.cpp:443
void x_SetId(CScope &scope, const CSeq_id &seq_id)
Definition: objcoords.cpp:185
void x_SetHgvs(CHGVS_Coordinate &ref, const char *prefix, TSignedSeqPos pos, TSignedSeqPos adjustment=0, bool utr3_tail=false, bool in_gap=false) const
Definition: objcoords.cpp:409
void SetGene(CScope &scope, const CSeq_feat &feat)
Definition: objcoords.cpp:615
void x_SetRnaCds(CScope &scope, const CSeq_feat &cds)
Definition: objcoords.cpp:683
TSeqPos m_cds_offset
Definition: objcoords.cpp:115
TSeqPos m_cdr_length
CDS feature on transcript, where main is genomic.
Definition: objcoords.cpp:113
void SetAlignment(CScope &scope, const CSeq_align &align)
Definition: objcoords.cpp:722
string m_locus
Definition: objcoords.cpp:125
void x_GetCCoordinate(CScope &scope, TSeqPos pos, TSignedSeqPos adj, CHGVS_Coordinate_Set &coords) const
Definition: objcoords.cpp:529
CConstRef< CSeq_id > m_main_id
Definition: objcoords.cpp:121
CRef< CSeq_loc_Mapper > x_GetCdsMapper(CScope &scope, const CSeq_feat &cds) const
Definition: objcoords.cpp:788
CRef< CHGVS_Coordinate > x_NewCoordinate(CScope &scope, const CSeq_id *id, TSeqPos pos) const
Definition: objcoords.cpp:376
void x_SetFeature(CScope &scope, feature::CFeatTree &feat_tree, const CMappedFeat &feat)
Definition: objcoords.cpp:204
void SetCds(CScope &scope, const CSeq_feat &feat)
Definition: objcoords.cpp:638
CConstRef< CSeq_id > m_prot_id
Definition: objcoords.cpp:124
bool x_CheckExonGap(TSeqPos pos) const
Definition: objcoords.cpp:330
CConstRef< CSeq_feat > m_mrna
Definition: objcoords.cpp:110
CConstRef< CSeq_align > m_mrna_align
Definition: objcoords.cpp:117
CRef< CSeq_loc_Mapper > m_mrna_mapper
Definition: objcoords.cpp:118
void x_GetRCoordinate(CScope &scope, TSeqPos pos, CHGVS_Coordinate_Set &coords) const
Definition: objcoords.cpp:503
CConstRef< CSeq_id > m_transcript_id
Definition: objcoords.cpp:123
bool IsValid()
Definition: objcoords.cpp:99
bool m_cds_offset_set
Definition: objcoords.cpp:114
TSignedSeqPos x_GetAdjustment(TSeqPos pos) const
Definition: objcoords.cpp:287
void x_SetMrnaMapper(CScope &scope, const CSeq_feat &feat)
Definition: objcoords.cpp:704
CConstRef< CSeq_feat > m_cds
mRNA feature on main sequence (genomic or transcript)
Definition: objcoords.cpp:111
void x_GetPCoordinate(CScope &scope, TSeqPos pos, CHGVS_Coordinate_Set &coords) const
Definition: objcoords.cpp:591
CConstRef< CSeq_id > GetTranscriptId()
Definition: objcoords.cpp:107
void SetMrna(CScope &scope, const CSeq_feat &feat)
Definition: objcoords.cpp:626
void x_SetSequence(CHGVS_Coordinate &ref, CScope &scope, const CSeq_id *id, TSignedSeqPos pos, TSignedSeqPos adjustment=0, bool plus_strand=true) const
Definition: objcoords.cpp:397
CConstRef< CSeq_feat > m_rna_cds
CDS feature on main sequence.
Definition: objcoords.cpp:112
CConstRef< CSeq_id > m_genomic_id
Definition: objcoords.cpp:122
void GetCoordinates(CScope &scope, Uint4 type_flags, TSeqPos pos, CHGVS_Coordinate_Set &coords) const
Definition: objcoords.cpp:256
TSeqPos x_GetCDSOffset() const
Definition: objcoords.cpp:370
bool m_mrna_plus_strand
Definition: objcoords.cpp:116
void x_CalculateUTRAdjustments(TSignedSeqPos &feature_pos, TSignedSeqPos &seq_pos, TSignedSeqPos &adjustment, bool &utr3_tail) const
Definition: objcoords.cpp:465
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
CSeqVector –.
Definition: seq_vector.hpp:65
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
CSpliced_exon_chunk –.
TSeqPos GetSeqStop(TDim row) const
TSeqPos GetSeqStart(TDim row) const
#define false
Definition: bool.h:36
type_flags
Definition: proto.h:431
int offset
Definition: replacements.h:160
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fAcc_prot
Definition: Seq_id.hpp:252
@ eAcc_mrna
Definition: Seq_id.hpp:268
@ fAcc_genomic
Definition: Seq_id.hpp:286
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
TSeqPos GetLength(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const CMappingRanges & GetMappingRanges(void) const
Get mapping ranges.
TSeqPos GetSrc_from(void) const
TRangeIterator BeginMappingRanges(CSeq_id_Handle id, TSeqPos from, TSeqPos to) const
Get mapping ranges iterator for the given seq-id and range.
@ eLocationToProduct
Map from the feature's location to product.
const TDescr & GetDescr(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool IsSetDescr(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetOverlapTotalRange(void)
Check overlapping only of total ranges.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
position_type GetLength(void) const
Definition: range.hpp:158
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
static TThisType GetWhole(void)
Definition: range.hpp:272
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static const char label[]
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
Definition: GB_block_.hpp:526
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
Definition: GB_block_.hpp:514
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
void SetHgvs_position(const THgvs_position &value)
Assign a value to Hgvs_position data member.
Tdata & Set(void)
Assign a value to data member.
void SetSequence(const TSequence &value)
Assign a value to Sequence data member.
bool IsSetGenomic_end(void) const
Check if a value has been assigned to Genomic_end data member.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
TMatch GetMatch(void) const
Get the variant data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsSetPartial(void) const
flag: is this exon complete or partial? Check if a value has been assigned to Partial data member.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
TDiag GetDiag(void) const
Get the variant data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
const TParts & GetParts(void) const
Get the Parts member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
bool IsSetGenomic_start(void) const
genomic-end >= genomic-start Check if a value has been assigned to Genomic_start data member.
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TPartial GetPartial(void) const
Get the Partial member data.
bool IsDiag(void) const
Check if variant Diag is selected.
list< CRef< CSpliced_exon_chunk > > TParts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
int i
Magic spell ;-) needed for some weird compilers... very empiric.
#define abs(a)
Definition: ncbi_heapmgr.c:130
EIPRangeType t
Definition: ncbi_localip.c:101
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
vector< CRef< CReportEntry > > TReportEntryList
Definition: objcoords.cpp:798
static void s_ExtendEntriesFromAlignments(CScope &scope, const CBioseq_Handle &bsh, TSeqRange search_range, TReportEntryList &entries)
Definition: objcoords.cpp:1017
static TReportEntryList s_GetFeaturesForRange(CScope &scope, const CBioseq_Handle &bsh, TSeqRange search_range)
Definition: objcoords.cpp:952
static const TSeqPos kSeqFrame
Definition: objcoords.cpp:72
static bool s_IsRefSeqGene(const CBioseq_Handle &bsh)
Definition: objcoords.cpp:924
static SAnnotSelector x_GetAnnotSelector()
Definition: objcoords.cpp:1129
static string x_GetSequence(CSeqVector &seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment=0)
Definition: objcoords.cpp:1067
USING_SCOPE(ncbi)
static const char * prefix[]
Definition: pcregrep.c:405
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
Definition: sequtils.cpp:42
SAnnotSelector –.
static wxAcceleratorEntry entries[3]
Modified on Wed Apr 17 13:08:01 2024 by modify_doxy.py rev. 669887