49 bool use_fixed_size_slices)
50 : m_DbName(db_name), m_DbType(db_type), m_NextLocalId(1),
51 m_UseFixedSizeSlices(use_fixed_size_slices)
57 out << (
kIsProtein ?
"Protein" :
"Nucleotide") <<
" BLAST database "
58 <<
"'" << db_name <<
"' does not exist in the NCBI servers";
67 return m_Cache[oid].GetLength();
74 return m_Cache[oid].GetIdList();
96 const bool retval =
false;
104 if ( !errors.empty() ) {
107 if ( !warnings.empty() ) {
108 msg += (msg.empty() ? warnings :
" " + warnings);
111 msg =
"Failed to retrieve sequence data via remote BLAST database ";
112 msg +=
"data loader";
132 string errors, warnings;
133 const bool kVerbose = (getenv(
"VERBOSE") ?
true :
false);
136 errors, warnings, kVerbose);
137 if (seq_data.empty() || !errors.empty() || !warnings.empty() ||
141 _ASSERT(ids.size() == seq_data.size());
147 const vector<TSeqRange>& ranges)
155 seqids.reserve(oids.size());
156 for (vector<int>::size_type
i = 0;
i < oids.size();
i++) {
159 ranges[
i].GetToOpen()) );
166 ranges[
i].GetFrom(), ranges[
i].GetToOpen()));
167 seqids.push_back(seq_int);
172 string errors, warnings;
173 const bool kVerbose = (getenv(
"VERBOSE") ?
true :
false);
176 errors, warnings, kVerbose);
177 if (seq_data.empty() || !errors.empty() || !warnings.empty() ||
181 _ASSERT(seqids.size() == ids.size());
182 _ASSERT(ids.size() == seq_data.size());
184 for (vector<int>::size_type
i = 0;
i < oids.size();
i++) {
187 ranges[
i].GetToOpen()) = seq_data[
i];
189 ranges[
i].GetToOpen()));
208 const vector<TSeqRange>& ranges,
212 _ASSERT(oids.size() == ranges.size());
213 sequence_data.clear();
215 vector<int> oids2fetch;
216 vector<TSeqRange> ranges2fetch;
217 for (vector<int>::size_type
i = 0;
i < oids.size();
i++) {
221 int begin = 0, end = cached_seqdata.
GetLength();
223 begin = ranges[
i].GetFrom();
224 end = ranges[
i].GetToOpen();
227 oids2fetch.push_back(oids[
i]);
228 ranges2fetch.push_back(
TSeqRange(begin, end-1));
230 _ASSERT(ranges[
i] == ranges2fetch.back());
238 sequence_data.reserve(oids.size());
239 for (vector<int>::size_type
i = 0;
i < oids.size();
i++) {
242 int begin = 0, end = cached_seqdata.
GetLength();
244 begin = ranges[
i].GetFrom();
245 end = ranges[
i].GetToOpen();
250 _ASSERT(sequence_data.size() == oids.size());
253 for (vector<int>::size_type
i = 0;
i < sequence_data.size();
i++) {
272 const bool kVerbose = (getenv(
"VERBOSE") ?
true :
false);
273 string errors, warnings;
277 warnings, kVerbose,
true);
278 if ( !errors.empty() || !warnings.empty() || bioseqs.empty() ) {
281 _ASSERT(bioseqs.size() == seqids.size());
285 cached_seqdata.
SetLength(bioseqs.front()->GetLength(),
287 cached_seqdata.
SetIdList(bioseqs.front()->SetId());
288 cached_seqdata.
SetBioseq(bioseqs.front());
289 return cached_seqdata.
IsValid();
304 oids.reserve(ids.size());
305 for (vector<int>::size_type
i = 0;
i < ids.size();
i++) {
311 const bool kVerbose = (getenv(
"VERBOSE") ?
true :
false);
312 string errors, warnings;
315 (
const_cast< vector< CRef<CSeq_id>
>& >(*&ids),
m_DbName, seqtype,
316 bioseqs, errors, warnings, kVerbose,
true);
317 if ( !errors.empty() || !warnings.empty() || bioseqs.empty() ) {
320 _ASSERT(bioseqs.size() == ids.size());
323 for (vector<int>::size_type
i = 0;
i < oids.size();
i++) {
327 cached_seqdata.
SetIdList(bioseqs[
i]->SetId());
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastServices class.
API for Remote Blast Services.
This class defines a "bundle" of elements to cache which are then returned by CRemoteBlastDbAdapter.
void SetBioseq(CRef< CBioseq > bioseq)
Set the Bioseq associated with a given sequence.
IBlastDbAdapter::TSeqIdList GetIdList() const
Retrieve the Seq-id's associated with a given sequence.
TSeqPos GetLength() const
Retrieve the sequence length.
bool IsValid()
Returns true if this object has been properly initialized and it's ready to be used.
bool HasSequenceData(int begin, int end)
Returns true if the requested range has sequence data already.
void SetLength(TSeqPos length, bool use_fixed_size_slices)
Sets the length of the sequence data for a given Bioseq.
CRef< CSeq_data > & GetSeqDataChunk(int begin, int end)
Access the sequence data chunk for a given starting and ending offset.
void SetIdList(const IBlastDbAdapter::TSeqIdList &idlist)
Sets the Seq-id's associated with a given sequence param idlist IDs to assign to this object [in].
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
This class allows retrieval of sequence data from BLAST databases at NCBI.
virtual bool SeqidToOid(const CSeq_id &id, int &oid)
@inheritDoc
bool SeqidToOidBatch(const vector< CRef< CSeq_id > > &ids, vector< int > &oids)
Batch-version of SeqidToOid.
virtual CSeqDB::ESeqType GetSequenceType()
@inheritDoc
virtual TSeqIdList GetSeqIDs(int oid)
@inheritDoc
void GetSequenceBatch(const vector< int > &oids, const vector< TSeqRange > &ranges, vector< CRef< CSeq_data > > &sequence_data)
Batch-version of GetSequence.
int m_NextLocalId
Our local "OID generator".
virtual int GetSeqLength(int oid)
@inheritDoc
map< int, CCachedSeqDataForRemote > m_Cache
Internal cache, maps OIDs to CCachedSeqDataForRemote.
virtual CRef< CSeq_data > GetSequence(int oid, int begin=0, int end=0)
@inheritDoc
virtual CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_id=NULL)
@inheritDoc
string m_DbName
BLAST database name.
void x_FetchData(int oid, int begin, int end)
This method actually retrieves the sequence data.
bool m_UseFixedSizeSlices
Determines whether sequences should be fetched in fixed size slices or in incrementally larger sizes.
void x_FetchDataByBatch(const vector< int > &oids, const vector< TSeqRange > &ranges)
list< CRef< CSeq_id > > TSeqIdList
Convenience typedef for a list of CSeq_id-s.
std::ofstream out("events_result.xml")
main entry point for tests
static void GetSequenceParts(const TSeqIntervalVector &seqids, const string &database, char seqtype, TSeqIdVector &ids, TSeqDataVector &seq_data, string &errors, string &warnings, bool verbose=false)
This retrieves (partial) sequence data from the remote BLAST server.
static void GetSequencesInfo(TSeqIdVector &seqids, const string &database, char seqtype, TBioseqVector &bioseqs, string &errors, string &warnings, bool verbose=false, bool target_only=false)
Get a set of Bioseqs without their sequence data given an input set of.
bool IsValidBlastDb(const string &dbname, bool is_protein)
Returns true if the BLAST database specified exists in the NCBI servers.
vector< CRef< objects::CSeq_data > > TSeqDataVector
Defines a std::vector of CRef<CSeq_data>
vector< CRef< objects::CBioseq > > TBioseqVector
Defines a std::vector of CRef<CBioseq>
vector< CRef< objects::CSeq_interval > > TSeqIntervalVector
Defines a std::vector of CRef<CSeq_interval>
vector< CRef< objects::CSeq_id > > TSeqIdVector
Defines a std::vector of CRef<CSeq_id>
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
static TThisType GetEmpty(void)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
CBioseq_Info & GetBioseq(CTSE_Info &tse, const CBioObjectId &id)
static bool RemoteBlastDbLoader_ErrorHandler(const string &errors, const string &warnings)
Returns false always.
Declaration of the CRemoteBlastDbAdapter class.