NCBI C++ ToolKit
micro_introns.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: micro_introns.cpp 91545 2020-11-11 15:10:23Z ludwigf $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:
27 *
28 * File Description:
29 * fasta-file generator application
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <objmgr/util/sequence.hpp>
35 #include <objmgr/align_ci.hpp>
37 
40 
41 // ============================================================================
42 static void InheritPartialness(
43  const CSeq_loc& src,
44  CSeq_loc& dest)
45 // ============================================================================
46 {
47  if( !src.GetId() || !dest.GetId() || !src.GetId()->Equals(*dest.GetId())
48  || src.GetStrand() != dest.GetStrand() ) {
49  return;
50  }
51 
53  const bool same_start = src.GetStart(ext) == dest.GetStart(ext);
54  const bool same_stop = src.GetStop(ext) == dest.GetStop(ext);
55 
56  if(same_start && src.IsPartialStart(ext)) {
57  dest.SetPartialStart(true, ext);
58  }
59 
60  if(same_start && src.IsTruncatedStart(ext)) {
61  dest.SetTruncatedStart(true, ext);
62  }
63 
64  if(same_stop && src.IsPartialStop(ext)) {
65  dest.SetPartialStop(true, ext);
66  }
67 
68  if(same_stop && src.IsTruncatedStop(ext)) {
69  dest.SetTruncatedStop(true, ext);
70  }
71 }
72 
73 
74 // ============================================================================
77  const CMappedFeat& a_feat,
78  CScope& a_scope,
79  bool ignore_errors)
80 // ============================================================================
81 {
82  CSeq_id_Handle feat_idh = a_feat.GetProductId();
83  CSeq_id_Handle canonical = sequence::GetId(feat_idh, a_scope, sequence::eGetId_Canonical);
84 
85  if (canonical) {
86  feat_idh = canonical;
87  }
88  if (!feat_idh) {
89  return CConstRef<CSeq_align>();
90  }
91  list<CConstRef<CSeq_align> > align_list;
92  for(CAlign_CI align_it(a_scope, a_feat.GetLocation()); align_it; ++align_it) {
93  const CSeq_align& cur_align = *align_it;
94  CSeq_id_Handle align_product_idh = CSeq_id_Handle::GetHandle(cur_align.GetSeq_id(0));
95  canonical = sequence::GetId(align_product_idh, a_scope, sequence::eGetId_Canonical);
96  if (canonical) {
97  align_product_idh = canonical;
98  }
99  if(feat_idh.MatchesTo(align_product_idh)) {
100  align_list.push_back(CConstRef<CSeq_align>(&*align_it));
101  }
102  }
103  if(align_list.size() > 1) {
104  if(!ignore_errors) {
105  NCBI_THROW(
106  CAlgoFeatureGeneratorException, eMicroIntrons, "Multiple alignments found.");
107  }
108  }
109  return (align_list.size() > 0 ? align_list.front() : CConstRef<CSeq_align>());
110 }
111 
112 // ============================================================================
114  SAnnotSelector& sel,
115  const string& annot_name)
116 // ============================================================================
117 {
118  if(!annot_name.empty()) {
119  sel.ResetAnnotsNames();
120  if(annot_name =="Unnamed") {
121  sel.AddUnnamedAnnots();
122  } else {
123  sel.AddNamedAnnots(annot_name);
124  sel.ExcludeUnnamedAnnots();
125  if (NStr::StartsWith(annot_name, "NA0")) {
126  sel.IncludeNamedAnnotAccession(annot_name.find(".") == string::npos ?
127  annot_name + ".1"
128  :
129  annot_name);
130  }
131  }
132  }
133 }
134 
135 
136 // ============================================================================
138  CScope& scope,
139  CBioseq_Handle bsh,
140  const string& annot_name,
141  TSeqRange* range,
142  bool ignore_errors)
143 // ============================================================================
144 {
146 
147  feature::CFeatTree feat_tree;
148  scope.GetEditHandle(bsh);
149  {{
150  SAnnotSelector sel;
151  x_SetAnnotName(sel, annot_name);
154  sel.SetResolveAll().SetAdaptiveDepth(true);
155  CFeat_CI feat_it(range ? CFeat_CI(bsh, *range, sel) : CFeat_CI(bsh, sel));
156  feat_tree.AddFeatures(feat_it);
157  }}
158 
159  SAnnotSelector orig_sel;
161  x_SetAnnotName(orig_sel, annot_name);
162  orig_sel.SetResolveAll().SetAdaptiveDepth(true);
163  SAnnotSelector mrna_sel;
165  mrna_sel.SetResolveAll().SetAdaptiveDepth(true);
166  for(CFeat_CI feat_it(range ? CFeat_CI(bsh, *range, orig_sel) : CFeat_CI(bsh, orig_sel)); feat_it; ++feat_it) {
167  CSeq_feat_Handle cur_feat = feat_it->GetSeq_feat_Handle();
168  CSeq_feat_Handle mrna_feat = feat_tree.GetParent(cur_feat, CSeqFeatData::e_Rna).GetSeq_feat_Handle();
169  if(!mrna_feat) {
170  continue;
171  }
172  CConstRef<CSeq_align> align_ref = GetAlignmentForRna(mrna_feat, scope, ignore_errors);
173  if(!align_ref) {
174  if(ignore_errors) {
175  continue;
176  }
178  "Unable to get alignment for mRNA.");
179  }
180 
181  const CSeq_align& cur_align = *align_ref;
182  CBioseq_Handle mrna_bsh = scope.GetBioseqHandle(mrna_feat.GetProductId());
183  if(!mrna_bsh) {
184  if(ignore_errors) {
185  continue;
186  }
188  "Unable to get mRNA sequence.");
189  }
190  CMappedFeat prod_cd_feat;
191  for(CFeat_CI prod_feat_it(mrna_bsh, mrna_sel); prod_feat_it; ++prod_feat_it) {
192  if(prod_cd_feat) {
193  if(ignore_errors) {
194  continue;
195  }
197  "Multiple cdregion features found on mRNA.");
198  }
199  prod_cd_feat = *prod_feat_it;
200  }
201  if(!prod_cd_feat) {
202  if(ignore_errors) {
203  continue;
204  }
206  "Unable to find cdregion on mRNA: " + mrna_feat.GetProductId().AsString());
207  }
208  CRef<CSeq_loc> projected_mrna_loc =
210  cur_align,
211  CConstRef<CSeq_loc>(&prod_cd_feat.GetLocation()));
212 
213  CRef<CSeq_loc> projected_cd_loc =
215  cur_align,
216  prod_cd_feat.GetLocation());
217 
219  mrna_feat.GetOriginalSeq_feat()->GetLocation(),
220  *projected_mrna_loc);
221 
223  cur_feat.GetOriginalSeq_feat()->GetLocation(),
224  *projected_cd_loc);
225 
226  scope.GetEditHandle(mrna_feat.GetAnnot());
227  scope.GetEditHandle(cur_feat.GetAnnot());
228 
229  CRef<CSeq_feat> new_mrna_feat(new CSeq_feat);
230  new_mrna_feat->Assign(*mrna_feat.GetOriginalSeq_feat());
231  new_mrna_feat->SetLocation(*projected_mrna_loc);
232  CSeq_feat_EditHandle mrna_eh(mrna_feat);
233  changes[mrna_eh] = new_mrna_feat;
234 
235  CRef<CSeq_feat> new_cds_feat(new CSeq_feat);
236  new_cds_feat->Assign(*cur_feat.GetOriginalSeq_feat());
237  new_cds_feat->SetLocation(*projected_cd_loc);
238  CSeq_feat_EditHandle cds_eh(cur_feat);
239  changes[cds_eh] = new_cds_feat;
240  }
241  for (auto mit = changes.begin(); mit != changes.end(); mit++) {
242  mit->first.Replace(*mit->second);
243  }
244 }
ESeqLocExtremes
Used to determine the meaning of a location's Start/Stop positions.
Definition: Na_strand.hpp:61
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CAlign_CI –.
Definition: align_ci.hpp:63
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
static void CreateMicroIntrons(objects::CScope &scope, objects::CBioseq_Handle bsh, const string &annot_name="", TSeqRange *range=NULL, bool ignore_errors=false)
static CRef< objects::CSeq_loc > s_ProjectCDS(const objects::CSeq_align &spliced_aln, const objects::CSeq_loc &product_cds_loc, bool convert_overlaps=true)
Similar to s_ProjectRNA(...) Postcondition: seq-vector of the returned loc is of exact same length an...
static CRef< objects::CSeq_loc > s_ProjectRNA(const objects::CSeq_align &spliced_aln, CConstRef< objects::CSeq_loc > product_cds_loc=CConstRef< objects::CSeq_loc >(NULL), size_t unaligned_ends_partialness_thr=kDefaultAllowedUnaligned)
Project RNA, preserving discontinuities in the CDS.
static void x_SetAnnotName(objects::SAnnotSelector &sel, const string &annot_name)
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
Definition: map.hpp:338
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
bool MatchesTo(const CSeq_id_Handle &h) const
True if *this matches to h.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
void SetTruncatedStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (tl/tr - indicating removed parts of the seq-loc)
Definition: Seq_loc.cpp:3398
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsTruncatedStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3372
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
Definition: Seq_loc.cpp:3346
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
void SetTruncatedStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3431
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Canonical
Definition: sequence.hpp:114
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CBioseq_EditHandle GetEditHandle(const CBioseq_Handle &seq)
Get editable Biosec handle by regular one.
Definition: scope.cpp:301
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
virtual CSeq_id_Handle GetProductId(void) const
CSeq_id_Handle GetProductId(void) const
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_loc & GetLocation(void) const
SAnnotSelector & ResetAnnotsNames(void)
Select annotations from all Seq-annots.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & ExcludeUnnamedAnnots(void)
Add unnamed annots to set of annots names to exclude.
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
USING_SCOPE(objects)
static void InheritPartialness(const CSeq_loc &src, CSeq_loc &dest)
CConstRef< CSeq_align > GetAlignmentForRna(const CMappedFeat &a_feat, CScope &a_scope, bool ignore_errors)
range(_Ty, _Ty) -> range< _Ty >
SAnnotSelector –.
Modified on Sat Dec 02 09:24:08 2023 by modify_doxy.py rev. 669887