NCBI C++ ToolKit
snpread_packed.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: snpread_packed.cpp 100334 2023-07-20 14:37:04Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko
27  *
28  * File Description:
29  * Access to SNP files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
35 #include <sra/error_codes.hpp>
39 #include <unordered_map>
40 
42 
43 template<>
44 struct hash<ncbi::CTempString>
45 {
46  size_t operator()(ncbi::CTempString val) const
47  {
48  unsigned long __h = 5381;
49  for ( auto c : val ) {
50  __h = __h*17 + c;
51  }
52  return size_t(__h);
53  }
54 };
55 
57 
59 
60 #define NCBI_USE_ERRCODE_X SNPReader
62 
64 BEGIN_NAMESPACE(SNPDbPacked);
65 
66 
67 static const size_t kMax_AlleleLength = 32;
68 static const char kDefaultAnnotName[] = "SNP";
69 
70 
72 
73 inline
75  const CSNPDbSeqIterator& it)
76 {
77  range = range.IntersectionWith(it.GetSNPRange());
78 }
79 
80 
81 inline
83 {
84  info.m_Flags = info.fQualityCodesOs | info.fAlleleReplace;
85  info.m_CommentIndex = info.kNo_CommentIndex;
86  info.m_Weight = 0;
87  info.m_ExtraIndex = info.kNo_ExtraIndex;
88 }
89 
90 
91 inline
93  const CSNPDbFeatIterator& it,
94  CSeq_annot_SNP_Info& packed)
95 {
96  TSeqPos len = it.GetSNPLength();
97  if ( len > info.kMax_PositionDelta+1 ) {
98  return false;
99  }
100  info.m_PositionDelta = len-1;
101  info.m_ToPosition = it.GetSNPPosition()+len-1;
102 
104  if ( range.second > info.kMax_AllelesCount ) {
105  return false;
106  }
107  size_t index = 0;
108  for ( ; index < range.second; ++index ) {
109  CTempString allele = it.GetAllele(range, index);
110  if ( allele.size() > kMax_AlleleLength ) {
111  return false;
112  }
113  SSNP_Info::TAlleleIndex a_index = packed.x_GetAlleleIndex(allele);
114  if ( a_index == info.kNo_AlleleIndex ) {
115  return false;
116  }
117  info.m_AllelesIndices[index] = a_index;
118  }
119  for ( ; index < info.kMax_AllelesCount; ++index ) {
120  info.m_AllelesIndices[index] = info.kNo_AlleleIndex;
121  }
122 
123  vector<char> os;
124  it.GetBitfieldOS(os);
125  info.m_QualityCodesIndex = packed.x_GetQualityCodesIndex(os);
126  if ( info.m_QualityCodesIndex == info.kNo_QualityCodesIndex ) {
127  return false;
128  }
129 
130  auto feat_id = it.GetFeatId();
131  if ( feat_id > kMax_Int ) {
132  NCBI_THROW(CSraException, eDataError,
133  "CSNPDbSeqIterator: FEAT_ID doesn't fit into table SNPId");
134  }
135  info.m_SNP_Id = SSNP_Info::TSNPId(feat_id);
136 
137  packed.x_AddSNP(info);
138  return true;
139 }
140 
141 
142 CRef<CSeq_annot> x_NewAnnot(const string& annot_name = kDefaultAnnotName)
143 {
144  CRef<CSeq_annot> annot(new CSeq_annot);
145  annot->SetNameDesc(annot_name);
146  return annot;
147 }
148 
149 
151 
152 
155  const CSNPDbSeqIterator::SFilter& filter,
156  CSNPDbSeqIterator::TFlags flags)
157 {
158  x_AdjustRange(range, seq);
159  CRef<CSeq_annot> annot = x_NewAnnot();
161  CSeq_annot::TData::TFtable& feats = annot->SetData().SetFtable();
162 
163  SSNP_Info info;
166  for ( CSNPDbFeatIterator it(seq, range, sel); it; ++it ) {
167  if ( !x_ParseSNP_Info(info, it, *packed) ) {
168  feats.push_back(it.GetSeq_feat());
169  }
170  }
171  if ( packed->empty() ) {
172  packed = null;
173  if ( feats.empty() ) {
174  annot = null;
175  }
176  }
177  else {
178  packed->SetSeq_id(*seq.GetSeqId());
179  }
180  return TPackedAnnot(annot, packed);
181 }
182 
183 
186  CSNPDbSeqIterator::TFlags flags)
187 {
188  return GetPackedFeatAnnot(seq, range, seq.GetFilter(), flags);
189 }
190 
191 END_NAMESPACE(SNPDbPacked);
void GetBitfieldOS(vector< char > &octet_stream) const
Definition: snpread.cpp:2502
CTempString GetAllele(const TExtraRange &range, size_t index) const
Definition: snpread.cpp:2487
pair< TVDBRowId, size_t > TExtraRange
Definition: snpread.hpp:890
TSeqPos GetSNPLength(void) const
Definition: snpread.hpp:917
TSeqPos GetSNPPosition(void) const
Definition: snpread.hpp:914
Uint8 GetFeatId(void) const
Definition: snpread.cpp:2464
TExtraRange GetExtraRange(void) const
Definition: snpread.cpp:2471
const SFilter & GetFilter() const
Definition: snpread.hpp:610
CRef< CSeq_id > GetSeqId(void) const
Definition: snpread.hpp:500
CRange< TSeqPos > GetSNPRange(void) const
Definition: snpread.cpp:837
SSNP_Info::TAlleleIndex x_GetAlleleIndex(const string &allele)
SSNP_Info::TQualityCodesIndex x_GetQualityCodesIndex(const string &str)
void SetSeq_id(const CSeq_id &id)
bool empty(void) const
void x_AddSNP(const SSNP_Info &snp_info)
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
static uch flags
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define kMax_Int
Definition: ncbi_limits.h:184
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
Definition of all error codes used in SRA C++ support libraries.
int len
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
Magic spell ;-) needed for some weird compilers... very empiric.
NCBI_DEFINE_ERR_SUBCODE_X(1)
BEGIN_STD_NAMESPACE
bool x_ParseSNP_Info(SSNP_Info &info, const CSNPDbFeatIterator &it, CSeq_annot_SNP_Info &packed)
BEGIN_NAMESPACE(objects)
static const char kDefaultAnnotName[]
BEGIN_LOCAL_NAMESPACE
END_STD_NAMESPACE
void x_InitSNP_Info(SSNP_Info &info)
END_LOCAL_NAMESPACE
static const size_t kMax_AlleleLength
TPackedAnnot GetPackedFeatAnnot(const CSNPDbSeqIterator &seq, CRange< TSeqPos > range, const CSNPDbSeqIterator::SFilter &filter, CSNPDbSeqIterator::TFlags flags)
END_NAMESPACE(SNPDbPacked)
CRef< CSeq_annot > x_NewAnnot(const string &annot_name=kDefaultAnnotName)
END_NCBI_NAMESPACE
BEGIN_NCBI_NAMESPACE
void x_AdjustRange(CRange< TSeqPos > &range, const CSNPDbSeqIterator &it)
pair< CRef< CSeq_annot >, CRef< CSeq_annot_SNP_Info > > TPackedAnnot
@ eSearchByStart
Definition: snpread.hpp:70
Uint2 TAlleleIndex
Definition: snp_info.hpp:152
int TSNPId
Definition: snp_info.hpp:122
size_t operator()(ncbi::CTempString val) const
Definition: _hash_fun.h:40
Modified on Wed Sep 04 15:03:24 2024 by modify_doxy.py rev. 669887