NCBI C++ ToolKit
local_blastdb_adapter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: local_blastdb_adapter.cpp 96780 2022-05-10 12:31:06Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  * ===========================================================================
29  */
30 
31 /** @file local_blastdb_adapter.cpp
32  * Defines the CLocalBlastDbAdapter class
33  */
34 #include <ncbi_pch.hpp>
36 #include <objects/seq/Seq_inst.hpp>
39 #include <objects/seq/Seq_ext.hpp>
41 
44 
47 {
48  return m_SeqDB->GetSequenceType();
49 }
50 
51 TTaxId
53 {
54  TTaxId retval = INVALID_TAX_ID;
55  CConstRef<CSeq_id> id = idh.GetSeqId();
56  if (id.NotEmpty()) {
57  int oid = 0;
58  if (SeqidToOid(*id, oid)) {
59  map<TGi, TTaxId> gi_to_taxid;
60  m_SeqDB->GetTaxIDs(oid, gi_to_taxid);
61  if (idh.IsGi()) {
62  retval = gi_to_taxid[idh.GetGi()];
63  } else {
64  retval = gi_to_taxid.begin()->second;
65  }
66  }
67  }
68  return retval;
69 }
70 
71 int
73 {
74  return m_SeqDB->GetSeqLength(oid);
75 }
76 
79 {
80  return m_SeqDB->GetSeqIDs(oid);
81 }
82 
84 CLocalBlastDbAdapter::GetBioseqNoData(int oid, TGi target_gi /* = 0 */, const CSeq_id * target_id /* = NULL */)
85 {
86  return m_SeqDB->GetBioseqNoData(oid, target_gi, target_id);
87 }
88 
89 /// Assigns a buffer of nucleotide sequence data as retrieved from CSeqDB into
90 /// the CSeq_data object
91 /// @param buffer contains the sequence data to assign [in]
92 /// @param seq_data object to assign the data to in ncbi4na format [in|out]
93 /// @param length sequence length [in]
94 static void
96  CSeq_data& seq_data,
97  TSeqPos length)
98 {
99  // This code works around the fact that SeqDB
100  // currently only produces 8 bit output -- it builds an array and
101  // packs the output into it in 4 bit format. SeqDB should probably
102  // provide more formats and combinations so that this code can
103  // disappear.
104 
105  vector<char>& v4 = seq_data.SetNcbi4na().Set();
106  v4.reserve((length+1)/2);
107 
108  const TSeqPos length_whole = length & ~1;
109 
110  for(TSeqPos i = 0; i < length_whole; i += 2) {
111  v4.push_back((buffer[i] << 4) | buffer[i+1]);
112  }
113  if (length_whole != length) {
114  _ASSERT((length_whole) == (length-1));
115  v4.push_back(buffer[length_whole] << 4);
116  }
117 }
118 
121  int begin /* = 0 */,
122  int end /* = 0*/)
123 {
124  const bool kIsProtein = (GetSequenceType() == CSeqDB::eProtein)
125  ? true : false;
126  const int kNuclCode(kSeqDBNuclNcbiNA8);
127  CRef<CSeq_data> retval(new CSeq_data);
128  const char* buffer = NULL;
129 
130  if (begin == end && begin == 0) {
131  // Get full sequence
132  if (kIsProtein) {
133  TSeqPos length = m_SeqDB->GetSequence(oid, &buffer);
134  retval->SetNcbistdaa().Set().assign(buffer, buffer+length);
136  } else {
137  TSeqPos length = m_SeqDB->GetAmbigSeq(oid, &buffer, kNuclCode);
138  s_AssignBufferToSeqData(buffer, *retval, length);
140  }
141  } else {
142  // Get parts of the sequence
143  if (kIsProtein) {
144  TSeqPos length = m_SeqDB->GetSequence(oid, &buffer);
145  _ASSERT((end-begin) <= (int)length);
146  retval->SetNcbistdaa().Set().assign(buffer + begin, buffer + end);
148  length += 0; // to avoid compiler warning
149  } else {
150  TSeqPos length =
151  m_SeqDB->GetAmbigSeq(oid, &buffer, kNuclCode, begin, end);
152  _ASSERT((end-begin) == (int)length);
153  s_AssignBufferToSeqData(buffer, *retval, length);
155  }
156  }
157  return retval;
158 }
159 
160 bool
162 {
163  return m_SeqDB->SeqidToOid(id, oid);
164 }
165 
168 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
virtual CSeqDB::ESeqType GetSequenceType()
@inheritDoc
virtual CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_id=NULL)
@inheritDoc
CRef< CSeqDB > m_SeqDB
The BLAST database handle.
virtual TSeqIdList GetSeqIDs(int oid)
@inheritDoc
virtual TTaxId GetTaxId(const CSeq_id_Handle &id)
@inheritDoc
virtual int GetSeqLength(int oid)
@inheritDoc
virtual bool SeqidToOid(const CSeq_id &id, int &oid)
@inheritDoc
virtual CRef< CSeq_data > GetSequence(int oid, int begin=0, int end=0)
@inheritDoc
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Definition: seqdb.cpp:765
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
Definition: seqdb.cpp:400
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
Definition: seqdb.cpp:427
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
Definition: seqdb.hpp:173
@ eProtein
Definition: seqdb.hpp:174
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
Definition: seqdb.cpp:903
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
Definition: seqdb.cpp:563
CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence without sequence data.
Definition: seqdb.cpp:514
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
Definition: seqdb.cpp:441
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
Definition: seqdb.cpp:523
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
Definition: seqdb.cpp:530
int GetAmbigSeq(int oid, const char **buffer, int nucl_code) const
Get a pointer to sequence data with ambiguities.
Definition: seqdb.cpp:550
list< CRef< CSeq_id > > TSeqIdList
Convenience typedef for a list of CSeq_id-s.
const_iterator begin() const
Definition: map.hpp:151
Definition: map.hpp:338
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
TPrim & Set(void)
Definition: serialbase.hpp:351
CConstRef< CSeq_id > GetSeqId(void) const
bool IsGi(void) const
TGi GetGi(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
Definition: Seq_data_.hpp:697
TNcbi4na & SetNcbi4na(void)
Select the variant.
Definition: Seq_data_.hpp:577
int i
static void s_AssignBufferToSeqData(const char *buffer, CSeq_data &seq_data, TSeqPos length)
Assigns a buffer of nucleotide sequence data as retrieved from CSeqDB into the CSeq_data object.
Declaration of the CLocalBlastDbAdapter class.
const string kIsProtein
static pcre_uint8 * buffer
Definition: pcretest.c:1051
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
#define _ASSERT
Modified on Fri May 24 14:57:46 2024 by modify_doxy.py rev. 669887