NCBI C++ ToolKit
NSeq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * $Id: NSeq.cpp 71537 2016-03-11 18:54:17Z kiryutin $
3 *
4 * =========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannt warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * =========================================================================
27 *
28 * Author: Boris Kiryutin
29 *
30 * =========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
37 
38 #include "NSeq.hpp"
39 #include "nucprot.hpp"
40 
42 #include <objmgr/seq_vector.hpp>
43 
45 BEGIN_SCOPE(prosplign)
47 
48 CNSeq::CNSeq(void)
49 {
50  m_size = 0;
51 }
52 
54 {
55 }
56 
57 // letter by position
58 char CNSeq::Upper(int pos) const
59 {
60  return CTranslationTable::NucToChar(seq[pos]);
61 }
62 
63 
64 void CNSeq::Init(const CNSeq& fullseq, const vector<pair<int, int> >& igi)
65 {
66  seq.clear();
67  NSEQ::const_iterator beg, end;
68  beg = fullseq.seq.begin();
69  m_size = fullseq.m_size;
70  _ASSERT((int)fullseq.seq.size() >= fullseq.size());
71  for(vector<pair<int, int> >::const_iterator it = igi.begin(); it != igi.end(); ++it) {
72  if(it->first < 0 || it->second < 1) NCBI_THROW(CProSplignException, eGenericError, "Intron coordinates are invalid");
73  if(it->first + it->second > fullseq.size()) NCBI_THROW(CProSplignException, eGenericError, "Intron coordinate is out of sequence");
74  end = fullseq.seq.begin() + it->first;
75  if(end < beg) NCBI_THROW(CProSplignException, eGenericError, "Intron coordinates have wrong order");
76  seq.insert(seq.end(), beg, end);
77  beg = end + it->second;
78  m_size -= it->second;
79  }
80  if(beg < fullseq.seq.end()) seq.insert(seq.end(), beg, fullseq.seq.end());
81 }
82 
83 void CNSeq::Init(CScope& scope, CSeq_loc& genomic)
84 {
85  CRef<CSeq_id> seqid( new CSeq_id );
86  seqid->Assign(*genomic.GetId());
87 
88  TSeqPos loc_end = sequence::GetStop(genomic, &scope);
89  TSeqPos seq_end = sequence::GetLength(*genomic.GetId(), &scope)-1;
90 
91  CRef<CSeq_loc> extended_seqloc(new CSeq_loc);
92  if (loc_end<=seq_end)
93  extended_seqloc->Assign(genomic);
94  else {
95  CRef<CSeq_loc> extra_seqloc(new CSeq_loc(*seqid,seq_end+1,loc_end,genomic.GetStrand()) );
96  extended_seqloc = sequence::Seq_loc_Subtract(genomic,*extra_seqloc,CSeq_loc::fMerge_All|CSeq_loc::fSort,&scope);
97  if(extended_seqloc.Empty() || extended_seqloc->IsNull() || extended_seqloc->IsEmpty() ) {
98  NCBI_THROW(CProSplignException, eGenericError, "[from,to] range provided is out of sequence.");
99  }
100  extended_seqloc->SetId(*seqid); // Seq_loc_Subtract might change the id, e.g. replace accession with gi
101  genomic.Assign(*extended_seqloc);
102  }
103 
104  m_size = sequence::GetLength(*extended_seqloc,&scope);
105 
106  if (IsForward(genomic.GetStrand())) {
107  TSeqPos pos = sequence::GetStop(*extended_seqloc, &scope);
108  if (pos<seq_end) {
109  TSeqPos pos1 = pos + 3;
110  if (pos1 > seq_end)
111  pos1 = seq_end;
112  CRef<CSeq_loc> extra_seqloc(new CSeq_loc(*seqid,pos,pos1,eNa_strand_plus) );
113  extended_seqloc = sequence::Seq_loc_Add(*extended_seqloc,*extra_seqloc,CSeq_loc::fMerge_All|CSeq_loc::fSort,&scope);
114  }
115  } else {
116  TSeqPos pos = sequence::GetStart(*extended_seqloc, &scope);
117  if (pos > 0) {
118  TSeqPos pos0 = pos < 3 ? 0 : pos - 3;
119  CRef<CSeq_loc> extra_seqloc(new CSeq_loc(*seqid,pos0,pos-1,eNa_strand_minus) );
120  extended_seqloc = sequence::Seq_loc_Add(*extended_seqloc,*extra_seqloc,CSeq_loc::fMerge_All|CSeq_loc::fSort,&scope);
121  }
122  }
123 
124  CSeqVector seq_vec(*extended_seqloc,scope,CBioseq_Handle::eCoding_Ncbi);
125 
126  vector<char> convert(16,nN);
127  convert[1] = nA;
128  convert[2] = nC;
129  convert[4] = nG;
130  convert[8] = nT;
131 
132  seq.clear();
133  for (CSeqVector_CI i(seq_vec); i; ++i) {
134  seq.push_back(convert[*i&0xf]);
135  }
136 }
137 
138 
139 END_SCOPE(prosplign)
USING_SCOPE(ncbi::objects)
@ nA
Definition: NSeq.hpp:47
@ nG
Definition: NSeq.hpp:47
@ nT
Definition: NSeq.hpp:47
@ nN
Definition: NSeq.hpp:47
@ nC
Definition: NSeq.hpp:47
bool IsForward(ENa_strand s)
Definition: Na_strand.hpp:68
Definition: NSeq.hpp:52
int m_size
Definition: NSeq.hpp:75
int size(void) const
Definition: NSeq.hpp:71
~CNSeq(void)
Definition: NSeq.cpp:53
void Init(CScope &scope, CSeq_loc &genomic)
Definition: NSeq.cpp:83
char Upper(int pos) const
Definition: NSeq.cpp:58
NSEQ seq
Definition: NSeq.hpp:76
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
static char NucToChar(int n)
Definition: nucprot.cpp:114
Include a standard set of the NCBI C++ Toolkit most basic headers.
static TDSRET convert(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION direction, const char *from, size_t from_len, char *dest, size_t *dest_len)
Definition: charconv.c:57
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
@ fMerge_All
Definition: Seq_loc.hpp:331
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
Magic spell ;-) needed for some weird compilers... very empiric.
#define _ASSERT
Modified on Mon Apr 22 04:03:41 2024 by modify_doxy.py rev. 669887