NCBI C++ ToolKit
remote_blast.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_BLAST_API___REMOTE_BLAST__HPP
2 #define ALGO_BLAST_API___REMOTE_BLAST__HPP
3 
4 /* $Id: remote_blast.hpp 91977 2020-12-17 15:26:40Z grichenk $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Kevin Bealer
30  *
31  */
32 
33 /// @file remote_blast.hpp
34 /// Declares the CRemoteBlast class.
35 
40 #include <objects/blast/names.hpp>
41 #include <util/format_guess.hpp>
42 
43 /** @addtogroup AlgoBlast
44  *
45  * @{
46  */
47 
49 
51  /// forward declaration of ASN.1 object containing PSSM (scoremat.asn)
52  class CPssmWithParameters;
53  class CBioseq_set;
54  class CSeq_loc;
55  class CSeq_id;
56  class CSeq_align_set;
58 
59 BEGIN_SCOPE(blast)
60 
61 struct SInteractingOptions;
62 
63 /// Exception class for the CRemoteBlast class
65 {
66 public:
67  /// Types of exceptions generated by the CRemoteBlast class
68  enum EErrCode {
69  eServiceNotAvailable, ///< Service is not available
70  eIncompleteConfig ///< Remote BLAST object not fully configured
71  };
72 
73  /// Translate from the error code value to its string representation.
74  virtual const char* GetErrCodeString(void) const override {
75  switch (GetErrCode()) {
76  case eServiceNotAvailable: return "eServiceNotAvailable";
77  case eIncompleteConfig: return "eIncompleteConfig";
78  default: return CException::GetErrCodeString();
79  }
80  }
81 #ifndef SKIP_DOXYGEN_PROCESSING
83 #endif /* SKIP_DOXYGEN_PROCESSING */
84 };
85 
86 
87 /// API for Remote Blast Requests
88 ///
89 /// API Class to facilitate submission of Remote Blast requests.
90 /// Provides an interface to build a Remote Blast request given an
91 /// object of a subclass of CBlastOptionsHandle.
92 
94 {
95 public:
96  /// Use the specified RID to get results for an existing search.
97  CRemoteBlast(const string & RID);
98 
99  /// Uses the file to populate results.
100  /// The file may be text or binary ASN.1 or XML. type is automatically detected.
101  ///@param f istream to archive file
103 
104  /// Create a search using any kind of options handle.
105  CRemoteBlast(CBlastOptionsHandle * any_opts);
106 
107  /// Create a sequence search and set options, queries, and database.
108  /// @param queries Queries corresponding to Seq-loc-list or Bioseq-set.
109  /// @param opts_handle Blast options handle.
110  /// @param db Database used for this search.
112  CRef<CBlastOptionsHandle> opts_handle,
113  const CSearchDatabase & db);
114 
115  /// Create a search and set options, queries, and subject sequences.
116  /// @param queries Queries corresponding to Seq-loc-list or Bioseq-set.
117  /// @param opts_handle Blast options handle.
118  /// @param subjects Subject corresponding to Seq-loc-list or Bioseq-set.
120  CRef<CBlastOptionsHandle> opts_handle,
121  CRef<IQueryFactory> subjects);
122 
123  /// Create a PSSM search and set options, queries, and database.
124  /// @param pssm Search matrix for a PSSM search.
125  /// @param opts_handle Blast options handle.
126  /// @param db Database used for this search.
128  CRef<CBlastOptionsHandle> opts_handle,
129  const CSearchDatabase & db);
130 
131  /// Destruct the search object.
132  ~CRemoteBlast();
133 
134  /// This restricts the subject database to this list of GIs (this is not
135  /// supported yet on the server end).
136  /// @param gi_list list of GIs to restrict the search to [in]
137  void SetGIList(const list<TGi> & gi_list);
138 
139  /// This excludes the provided GIs from the subject database (this is not
140  /// supported yet on the server end).
141  /// @param gi_list list of GIs to exclude [in]
142  void SetNegativeGIList(const list<TGi> & gi_list);
143 
144  /// Sets the filtering algorithm ID to be applied to the BLAST database
145  /// (not supported by server yet)
146  /// @param algo_id algorithm ID to use (ignored if -1)
147  void SetDbFilteringAlgorithmId(int algo_id, ESubjectMaskingType mask_type=eSoftSubjMasking);
148 
149  /// Sets the filtering algorithm key to be applied to the BLAST database
150  /// (not supported by server yet)
151  /// @param algo_id algorithm ID to use (ignored if -1)
152  void SetDbFilteringAlgorithmKey(string algo_key, ESubjectMaskingType mask_type=eSoftSubjMasking);
153 
154  ESubjectMaskingType GetSubjectMaskingType() const;
155 
156  /// Set the name of the database to search against.
157  void SetDatabase(const string & x);
158 
159  /// Set a list of subject sequences to search against.
160  void SetSubjectSequences(CRef<IQueryFactory> subj);
161 
162  /// Set a list of subject sequences to search against.
163  void SetSubjectSequences(const list< CRef<objects::CBioseq> > & subj);
164 
165  /// Restrict search to sequences matching this Entrez query.
166  void SetEntrezQuery(const char * x);
167 
168  /// Set the query as a Bioseq_set.
169  void SetQueries(CRef<objects::CBioseq_set> bioseqs);
170 
171  /// Convert a TSeqLocInfoVector to a list< CRef<CBlast4_mask> > objects
172  /// @param masking_locations Masks to convert [in]
173  /// @param program CORE BLAST program type [in]
174  /// @param warnings optional argument where warnings will be returned
175  /// [in|out]
176  static objects::CBlast4_get_search_results_reply::TMasks
177  ConvertToRemoteMasks(const TSeqLocInfoVector& masking_locations,
178  EBlastProgramType program,
179  vector<string>* warnings = NULL);
180 
181  /// Set the query as a Bioseq_set along with the corresponding masking
182  /// locations.
183  /// @param bioseqs Query sequence data [in]
184  /// @param masking_locations Masked regions for the queries above [in]
185  void SetQueries(CRef<objects::CBioseq_set> bioseqs,
186  const TSeqLocInfoVector& masking_locations);
187 
188  /// Set the masking locations for queries.
189  /// @param masking_locations Masked regions for the queries above [in]
190  void SetQueryMasks(const TSeqLocInfoVector& masking_locations);
191 
192  /// Typedef for a list of Seq-locs
193  typedef list< CRef<objects::CSeq_loc> > TSeqLocList;
194 
195  /// Set the query as a list of Seq_locs.
196  /// @param seqlocs One interval Seq_loc or a list of whole Seq_locs.
197  void SetQueries(TSeqLocList& seqlocs);
198 
199  /// Set the query as a list of Seq_locs.
200  /// @param seqlocs One interval Seq_loc or a list of whole Seq_locs.
201  /// @param masking_locations Masked regions for the queries above [in]
202  void SetQueries(TSeqLocList& seqlocs,
203  const TSeqLocInfoVector& masking_locations);
204 
205  /// Set a PSSM query (as for PSI blast), which must include a bioseq set.
206  void SetQueries(CRef<objects::CPssmWithParameters> pssm);
207 
208 
209  /* Getting Results */
210 
211 
212  /// Submit the search (if necessary) and return the results.
213  /// @return Search results.
214  CRef<CSearchResultSet> GetResultSet();
215 
216  /// This submits the search (if necessary) and polls for results.
217  ///
218  /// If a new search is configured, and not already submitted, this will
219  /// submit it. It then polls for results until either completion or error
220  /// state is reached, or until the search times out. The polling is done
221  /// at an increasing interval, which starts out at 10 seconds, increasing
222  /// by 30% after each check to a maximum of 300 seconds per sleep.
223  ///
224  /// @return true if the search was submitted, otherwise false.
225  bool SubmitSync(void);
226 
227  /// This submits the search (if necessary) and polls for results.
228  ///
229  /// If a new search is configured, and not already submitted, this will
230  /// submit it. It then polls for results until either completion or error
231  /// state is reached, or until the search times out. The polling is done
232  /// at an increasing interval, which starts out at 10 seconds, increasing
233  /// by 30% after each check to a maximum of 300 seconds per sleep.
234  ///
235  /// @param timeout Search timeout specified as a number of seconds.
236  /// @return true if the search was submitted, otherwise false.
237  bool SubmitSync(int timeout);
238 
239  /// This submits the search (if necessary) and returns immediately.
240  ///
241  /// If a new search is configured, and not already submitted, this will
242  /// submit it. It then polls for results until either completion or error
243  /// state is reached, or until the search times out. The polling is done
244  /// at an increasing interval, which starts out at 10 seconds, increasing
245  /// by 30% after each check to a maximum of 300 seconds per sleep.
246  ///
247  /// @return true if the search was submitted, otherwise false.
248  bool Submit(void);
249 
250  /// Represents the status of previously submitted search/RID
252  /// Never submitted or purged from the system
254  /// Completed successfully
256  /// Not completed yet
258  /// Completed but failed, call GetErrors/GetErrorVector()
259  eStatus_Failed
260  };
261 
262  /// Returns the status of a previously submitted search/RID
263  ESearchStatus CheckStatus();
264 
265  /// Check whether the search has completed.
266  ///
267  /// This checks the status of the search. Please delay at least
268  /// 10 seconds between subsequent calls. If this function returns
269  /// true, it will already have gotten the results as part of its
270  /// processing. With the common technique of polling with
271  /// CheckDone before calling GetAlignments (or other results
272  /// access operations), the first CheckDone call after results are
273  /// available will perform the CPU, network, and memory intensive
274  /// processing, and the GetAlignments() (for example) call will
275  /// simply return a pointer to part of this data.
276  /// @return true If search is not still running.
277  bool CheckDone(void);
278 
279  /// This returns a string containing any errors that were produced by the
280  /// search. A successful search should return an empty string here.
281  ///
282  /// @return An empty string or a newline seperated list of errors.
283  string GetErrors(void);
284 
285  /// This returns any warnings encountered. These do not necessarily
286  /// indicate an error or even a potential error; some warnings are always
287  /// returned from certain types of searches. (This is a debugging feature
288  /// and warnings should probably not be returned to the end-user).
289  ///
290  /// @return Empty string or newline seperated list of warnings.
291  string GetWarnings(void);
292 
293  /// This returns any warnings encountered as a vector of strings.
294  /// @sa CRemoteBlast::GetWarnings
295  /// @return Reference to a vector of warnings.
296  const vector<string> & GetWarningVector();
297 
298  /// This returns any errors encountered as a vector of strings.
299  /// @sa CRemoteBlast::GetErrors
300  /// @return Reference to a vector of errors.
301  const vector<string> & GetErrorVector();
302 
303  /* Getting Results */
304 
305  /// Gets the request id (RID) associated with the search.
306  ///
307  /// If the search was not successfully submitted, this will be empty.
308  const string & GetRID(void);
309 
310  /// Get the seqalign set from the results.
311  ///
312  /// This method returns the alignment data as a seq align set. If
313  /// this search contains multiple queries, this method will return
314  /// all data as a single set. Most users will probably prefer to
315  /// call GetSeqAlignSets() in this case.
316  ///
317  /// @return Reference to a seqalign set.
318  CRef<objects::CSeq_align_set> GetAlignments(void);
319 
320  /// Get the seqalign vector from the results.
321  ///
322  /// This method returns the alignment data from the search as a
323  /// TSeqAlignVector, which is a vector of CSeq_align_set objects.
324  /// For multiple query searches, this method will normally return
325  /// each alignment as a seperate element of the TSeqAlignVector.
326  /// Note that in some cases, the TSeqAlignVector will not have an
327  /// entry for some queries. If the vector contains fewer
328  /// alignments than there were queries, it may be necessary for
329  /// the calling code to match queries to alignments by comparing
330  /// Seq_ids. This normally happens only if the same query is
331  /// specified multiple times, or if one of the searches does not
332  /// find any matches.
333  ///
334  /// @return A seqalign vector.
335  ///
336  /// @todo Separate the results for each query into discontinuous seq-aligns
337  /// separated by the subject sequence. Also, using the upcoming feature of
338  /// retrieving the query sequences, insert empty seqaligns into vector
339  /// elements where there are no results for a given query (use
340  /// x_CreateEmptySeq_align_set from blast_seqalign.cpp)
341  TSeqAlignVector GetSeqAlignSets();
342 
343  /// Get the results of a PHI-Align request, if PHI pattern was set.
344  /// @return Reference to PHI alignment set.
345  CRef<objects::CBlast4_phi_alignments> GetPhiAlignments(void);
346 
347  /// Convenience typedef for a list of CRef<CBlast4_ka_block>
348  typedef list< CRef<objects::CBlast4_ka_block > > TKarlinAltschulBlocks;
349 
350  /// Get the Karlin/Altschul parameter blocks produced by the search.
351  /// @return List of references to KA blocks.
352  TKarlinAltschulBlocks GetKABlocks(void);
353 
354  /// Get the queries' masked locations
355  TSeqLocInfoVector GetMasks(void);
356 
357  /// Get the search statistics block as a list of strings.
358  ///
359  /// Search statistics describe the data flow during each step of a BLAST
360  /// search. They are subject to change, and are not formally defined, but
361  /// can sometimes provide insight when investigating software problems.
362  ///
363  /// @return List of strings, each contains one line of search stats block.
364  list< string > GetSearchStats(void);
365 
366  /// Get the PSSM produced by the search.
367  /// @return Reference to a Score-matrix-parameters object.
369 
370  /// Debugging support can be turned on with eDebug or off with eSilent.
371  enum EDebugMode {
372  eDebug = 0,
373  eSilent
374  };
375 
376  /// Adjust the debugging level.
377  ///
378  /// This causes debugging trace data to be dumped to standard output,
379  /// along with ASN.1 objects used during the search and other text. It
380  /// produces a great deal of output, none of which is expected to be
381  /// useful to the end-user.
382  void SetVerbose(EDebugMode verb = eDebug);
383 
384  /// Defines a std::vector of CRef<CSeq_interval>
385  typedef vector< CRef<objects::CSeq_interval> > TSeqIntervalVector;
386  /// Defines a std::vector of CRef<CSeq_data>
387  typedef vector< CRef<objects::CSeq_data> > TSeqDataVector;
388 
389  /// Return values states whether GetDatabases or GetSubjectSequences
390  /// call should be used.
391  /// @return true indicates that GetDatabases should be used.
392  bool IsDbSearch();
393 
394  /// Get the database used by the search.
395  ///
396  /// An object is returned, describing the name and type of
397  /// database used as the subject of this search.
398  ///
399  /// @return An object describing the searched database(s).
400  CRef<objects::CBlast4_database> GetDatabases();
401 
402  /// Returns subject sequences if "bl2seq" mode used.
403  /// @return a list of bioseqs
404  list< CRef<objects::CBioseq> > GetSubjectSequences();
405 
406  CBlast4_subject::TSeq_loc_list GetSubjectSeqLocs();
407 
408  /// Get the program used for this search.
409  /// @return The value of the program parameter.
410  string GetProgram();
411 
412  /// Get the service used for this search.
413  /// @return The value of the service parameter.
414  string GetService();
415 
416  /// Get the created-by string associated with this search.
417  ///
418  /// The created by string for this search will be returned.
419  ///
420  /// @return The value of the created-by string.
421  string GetCreatedBy();
422 
423  /// Get the queries used for this search.
424  ///
425  /// The queries specified for this search will be returned. The
426  /// returned object will include either a list of seq-locs, a
427  /// CBioseq, or a PSSM query.
428  ///
429  /// @return The queries used for this search.
430  CRef<objects::CBlast4_queries> GetQueries();
431 
432  /// Get the search options used for this search.
433  ///
434  /// This returns the CBlastOptionsHandle for this search. If this
435  /// object was constructed with an RID, a CBlastOptionsHandle will
436  /// be constructed from the search options stored on the remote
437  /// server. In this case the returned CBlastOptionsHandle will
438  /// have a concrete type that corresponds to the program+service,
439  /// and a locality of "eLocal".
440  CRef<CBlastOptionsHandle> GetSearchOptions();
441 
442  /// Fetch the search strategy for this object without submitting the search
443  CRef<objects::CBlast4_request> GetSearchStrategy();
444 
445  /// Returns the filtering algorithm ID used in the database
447  return m_DbFilteringAlgorithmId;
448  }
449 
450  /// Returns the filtering algorithm key used in the database
451  string GetDbFilteringAlgorithmKey() const {
452  return m_DbFilteringAlgorithmKey;
453  }
454 
455  /// Returns the task used to create the remote search (if any)
456  string GetTask() const {
457  return m_Task;
458  }
459 
460  /// Retrieves the client ID used by this object to send requests
461  string GetClientId() const { return m_ClientId; }
462  /// Sets the client ID used by this object to send requests
463  void SetClientId(const string& client_id) { m_ClientId = client_id; }
464 
465  /// Loads next chunk of archive from file.
466  bool LoadFromArchive();
467 
468  /// Get the title assigned for this search.
469  string GetTitle(void);
470 
471  /// Controls disk cache usage for results retrieval
472  void EnableDiskCacheUse() { m_use_disk_cache = true; }
473  void DisableDiskCacheUse(){ m_use_disk_cache = false; }
474  bool IsDiskCacheActive(void) { return m_use_disk_cache; }
475  /// disk cache error handling
476  // m_disk_cache_error_flag
477  bool IsDiskCacheError(void) { return m_disk_cache_error_flag; }
478  void ClearDiskCacheError(void){ m_disk_cache_error_flag = false;}
479  string GetDiskCacheErrorMessahe(void) { return m_disk_cache_error_msg; }
480  // ask for search stats to check status.
481  CRef<objects::CBlast4_reply> x_GetSearchStatsOnly(void);
482  // actual code to get results using disk cache intermediate storage
483  CRef<objects::CBlast4_reply> x_GetSearchResultsHTTP(void);
484 
485  // For Psiblast
486  unsigned int GetPsiNumberOfIterations(void);
487 
488  bool IsErrMsgArchive(void);
489 
490  set<TTaxId> & GetTaxidList() { return m_TaxidList; };
491  set<TTaxId> & GetNegativeTaxidList() { return m_NegativeTaxidList; };
492 
493 private:
494 
495  bool x_HasRetrievedSubjects() const {
496  return !m_SubjectSeqLocs.empty() || !m_SubjectSequences.empty();
497  }
498 
499  /// Retrieve the request body for a search submission
500  CRef<objects::CBlast4_request_body> x_GetBlast4SearchRequestBody();
501 
502  /// Sets a subset (only m_Dbs) of what the public SetDatabase sets.
503  ///@param x name of database.
504  void x_SetDatabase(const string & x);
505 
506  /// Set a list of subject sequences to search against (only
507  /// m_SubjectSequences)
508  ///@param subj subject bioseqs
509  void x_SetSubjectSequences(const list< CRef<objects::CBioseq> > & subj);
510 
511  /// Value list.
512  typedef list< CRef<objects::CBlast4_parameter> > TValueList;
513 
514  /// An alias for the most commonly used part of the Blast4 search results.
515  typedef objects::CBlast4_get_search_results_reply TGSRR;
516 
517  /// Get the query masks from the results.
518  /// @return list of references to Blast4_mask object.
519  TGSRR::TMasks x_GetMasks(void);
520 
521  /// Various states the search can be in.
522  ///
523  /// eStart Not submitted, can still be configured.
524  /// eFailed An error was encountered.
525  /// eWait The search is still running.
526  /// eDone Results are available.
527  enum EState {
528  eStart = 0,
531  eDone
532  };
533 
534  /// Indicates whether to use async mode.
535  enum EImmediacy {
536  ePollAsync = 0,
537  ePollImmed
538  };
539 
540  /// This class attempts to verify whether all necessary configuration is
541  /// complete before attempting to submit the search.
542  enum ENeedConfig {
543  eNoConfig = 0x0,
544  eProgram = 0x1,
545  eService = 0x2,
546  eQueries = 0x4,
547  eSubject = 0x8,
548  eNeedAll = 0xF
549  };
550 
551  /// The default timeout is 3.5 hours.
552  int x_DefaultTimeout(void);
553 
554  /// Uses the file to populate results.
555  /// The file may be text or binary ASN.1 or XML. type is automatically detected.
556  ///@param f istream to archive file
557  void x_Init(CNcbiIstream& f);
558 
559  /// Called by new search constructors: initialize a new search.
560  void x_Init(CBlastOptionsHandle * algo_opts,
561  const string & program,
562  const string & service);
563 
564  /// Called by new search constructors: initialize a new search.
565  void x_Init(CBlastOptionsHandle * algo_opts);
566 
567  /// Called by RID constructor: set up monitoring of existing search.
568  void x_Init(const string & RID);
569 
570  /// Initialize a search with a database and options handle.
571  /// @param opts_handle Blast options handle.
572  /// @param db Database used for this search.
573  void x_Init(CRef<CBlastOptionsHandle> opts_handle,
574  const CSearchDatabase & db);
575 
576  /// Initialize queries based on a query factory.
577  /// @param queries Query factory from which to pull queries.
578  void x_InitQueries(CRef<IQueryFactory> queries);
579 
580  /// Initialize disk caching
581  void x_InitDiskCache(void);
582 
583  /// Configure new search from options handle passed to constructor.
584  void x_SetAlgoOpts(void);
585 
586  /// Set an integer parameter (not used yet).
587  /// @param field CBlast4Field object corresponding to option.
588  /// @param value Pointer to integer value to use.
589  void x_SetOneParam(objects::CBlast4Field & field, const int * value);
590 
591  /// Set a list of integers.
592  /// @param field CBlast4Field object corresponding to option.
593  /// @param value Pointer to list of integers to use.
594  void x_SetOneParam(objects::CBlast4Field & field, const list<int> * value);
595 
596  /// Set a list of 8 byte integers.
597  /// @param field CBlast4Field object corresponding to option.
598  /// @param value Pointer to list of integers to use.
599  void x_SetOneParam(objects::CBlast4Field & field, const list<Int8> * value);
600 
601  /// Set a string parameter.
602  /// @param field CBlast4Field object corresponding to option.
603  /// @param value Pointer to pointer to null delimited string.
604  void x_SetOneParam(objects::CBlast4Field & field, const char ** value);
605 
606  /// Set a masking location for query
607  /// @param field CBlast4Field object corresponding to option.
608  /// @param mask masking location [in]
609  void x_SetOneParam(objects::CBlast4Field & field, CRef<objects::CBlast4_mask> mask);
610 
611  /// Determine what state the search is in.
612  EState x_GetState(void);
613 
614  /// Determine if this is an unknown RID.
615  /// @note caller must have contacted the server for this method to work
616  /// (e.g.: via CheckDone());
617  bool x_IsUnknownRID(void);
618 
619  /// Poll until done, return the CBlast4_get_search_results_reply.
620  /// @return Pointer to GSR reply object or NULL if search failed.
621  TGSRR * x_GetGSRR(void);
622 
623  /// Send a Blast4 request and get a reply.
624  /// @return The blast4 server response.
626  x_SendRequest(CRef<objects::CBlast4_request_body> body);
627 
628  /// Try to get the search results.
629  /// @return The blast4 server response.
631  x_GetSearchResults(void);
632 
633  /// Verify that search object contains mandatory fields.
634  void x_CheckConfig(void);
635 
636  /// Submit the search and process results (of submit action).
637  void x_SubmitSearch(void);
638 
639  /// Try to get and process results.
640  void x_CheckResults(void);
641 
642  /// Try to get and process results using disk cache.
643  void x_CheckResultsDC(void);
644 
645 
646  /// Iterate over error list, splitting into errors and warnings.
647  void x_SearchErrors(CRef<objects::CBlast4_reply> reply);
648 
649  /// Poll until results are found, error occurs, or timeout expires.
650  void x_PollUntilDone(EImmediacy poll_immed, int seconds);
651 
652  /// Fetch the request info (wait for completion if necessary).
653  void x_GetRequestInfo();
654  /// Fetch the requested info from splitd.
655  void x_GetRequestInfoFromRID();
656  /// Fetch the requested info from an archive file.
657  void x_GetRequestInfoFromFile();
658 
659  /// Extract the query IDs from the CBlast4_queries for a given search
660  /// @param query_ids the query IDs to be returned [in|out]
661  void x_ExtractQueryIds(CSearchResultSet::TQueryIdVector& query_ids);
662 
663  /// Set the masking locations AFTER the queries have been set in the
664  /// m_QSR field
665  void x_SetMaskingLocationsForQueries(const TSeqLocInfoVector&
666  masking_locations);
667 
668  /// Extract the user specified masking locations from the query factory
669  /// @note this method only extracts masking locations for
670  /// CObjMgr_QueryFactory objects, for other types use the SetQueries method
671  /// @param query_factory source of query sequence data [in]
672  /// @param masks masking locations extracted [out]
673  void
674  x_ExtractUserSpecifiedMasks(CRef<IQueryFactory> query_factory,
675  TSeqLocInfoVector& masks);
676 
677  /// Converts the provided query masking locations (if any) to the network
678  /// representation following the BLAST 4 ASN.1 spec
679  void x_QueryMaskingLocationsToNetwork();
680 
681  /// Add an algorithm or program option to the provided handle.
683  const string & nm,
684  const objects::CBlast4_value & v,
685  struct SInteractingOptions & io);
686 
687  /// Add algorithm and program options to the provided handle.
689  const TValueList & L,
690  struct SInteractingOptions & io);
691 
692  /// Adjust the EProgram based on option values.
693  ///
694  /// The blast4 protocol uses a notion of program and service to
695  /// represent the type of search to do. However, for some values
696  /// of program and service, it is necessary to look at options
697  /// values in order to determine the precise EProgram value. This
698  /// is particularly true when dealing with discontiguous megablast
699  /// for example.
700  ///
701  /// @param L The list of options used for this search.
702  /// @param pstr The program string used by the blast4 protocol.
703  /// @param program The EProgram suggested by program+service.
704  /// @return The EProgram value as adjusted by options.
706  const string & pstr,
707  EProgram program);
708 
709  // Retrieve the subject sequences using the get-search-info functionality
710  void x_GetSubjects(void);
711 
712  string x_GetStringFromSearchInfoReply(CRef<CBlast4_reply> reply,
713  const string& name,
714  const string& value);
715 
716  // Get psi iterations for an RID
717  unsigned int x_GetPsiIterationsFromServer(void);
718  /// Prohibit copy construction.
720 
721  /// Prohibit assignment.
723 
724 
725  // Data
726 
727  /// Options for new search.
729 
730  /// Request object for new search.
732 
733  /// Results of BLAST search.
735 
736  /// Archive of BLAST search and results.
738 
739  /// true if a CBlast4_archive should be read in.
741 
742  /// Use to ready CBlast4_archive
743  unique_ptr<CObjectIStream> m_ObjectStream;
744 
745  /// Type of object CBlast4_archive as determined by CFormatGuess
747 
748  /// List of errors encountered.
749  vector<string> m_Errs;
750 
751  /// List of warnings encountered.
752  vector<string> m_Warn;
753 
754  /// Request ID of submitted or pre-existing search.
755  string m_RID;
756 
757  /// Count of server glitches (communication errors) to ignore.
758  int m_ErrIgn;
759 
760  /// Pending state: indicates that search still needs to be queued.
761  bool m_Pending;
762 
763  /// Verbosity mode: whether to produce debugging info on stdout.
765 
766  /// Bitfield to track whether all necessary configuration is done.
768 
769 
770  // "Get request info" fields.
771 
772  /// Databases
774 
775  /// Subject Sequences
776  list< CRef<objects::CBioseq> > m_SubjectSequences;
777  /// This field is populated when dealing with a remote bl2seq search (e.g.:
778  /// when formatting an RID using blast_formatter)
780 
781  /// Program value used when submitting this search
782  string m_Program;
783 
784  /// Service value used when submitting this search
785  string m_Service;
786 
787  /// Created-by attribution for this search.
788  string m_CreatedBy;
789 
790  /// Queries associated with this search.
792 
793  /// Options relevant to the search algorithm.
795 
796  /// Options relevant to the search application.
798 
799  /// Options relevant to the format application.
801 
802  /// Masking locations for queries.
804 
805  /// Entrez Query, if any.
807 
808  /// First database sequence.
809  string m_FirstDbSeq;
810 
811  /// Final database sequence.
812  string m_FinalDbSeq;
813 
814  /// GI list.
815  list<TGi> m_GiList;
816 
817  /// Negative GI list.
818  list<TGi> m_NegativeGiList;
819 
820  /// filtering algorithm to use in the database
822 
823  /// filtering algorithm key to use in the database
825 
827 
828  /// Task used when the search was submitted (recovered via
829  /// CBlastOptionsBuilder)
830  string m_Task;
831 
832  /// Client ID submitting requests throw this interface
833  string m_ClientId;
834 
835  /// Use disk cache for retrieving results
836  /// default: false
838  /// disk cache error flag
840  /// disk cache error message
844 };
845 
846 /** Converts the return value of CSeqLocInfo::GetFrame into the
847  * Blast4-frame-type field. Note that for non-translated queries, this value
848  * should be set to notset (value = 0).
849  * @param frame frame as specified by CSeqLocInfo::ETranslationFrame
850  * @param program CORE BLAST program type
851  */
854 FrameNumber2NetworkFrame(int frame, EBlastProgramType program);
855 
856 
857 /** Converts Blast4-frame-type into CSeqLocInfo::ETranslationFrame
858  * @param frame frame as specified by objects::EBlast4_frame_type
859  * @param program CORE BLAST program type
860  */
864  EBlastProgramType program);
865 
866 
867 /// Function to convert from program and service name into the CORE BLAST
868 /// program type
869 /// This is based on the values set in the various CBlastOptionsHandle
870 /// subclasses (look for SetRemoteProgramAndService_Blast3 in include tree)
871 /// @note This function needs to be updated if the program/service name
872 /// combinations change
875 NetworkProgram2BlastProgramType(const string& program, const string& service);
876 
877 /// Extract a Blast4-request (a.k.a.: a search strategy) from an input stream.
878 /// This function supports reading binary and text ASN.1 as well as XML.
879 /// A Blast4-get-search-strategy-reply will be attempted to read first (output
880 /// of BLAST SOAP server), then a Blast4-request (output of BLAST C++ command
881 /// line binaries).
882 /// @param in stream to read the data from [in]
883 /// @throws CException if input cannot be recognized
887 
888 void
889 FlattenBioseqSet(const CBioseq_set & bss, list< CRef<CBioseq> > & seqs);
890 
891 END_SCOPE(blast)
893 
894 /* @} */
895 
896 #endif /* ALGO_BLAST_API___REMOTE_BLAST__HPP */
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
@ eSoftSubjMasking
Definition: blast_def.h:237
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
ncbi::TMaskedQueryRegions mask
Defines BLAST error codes (user errors included)
Handle to the options to the BLAST algorithm.
EFormat
The formats are checked in the same order as declared here.
CObject –.
Definition: ncbiobj.hpp:180
Exception class for the CRemoteBlast class.
API for Remote Blast Requests.
Blast Search Subject.
list< CRef< objects::CBlast4_parameter > > TValueList
Value list.
objects::CBlast4_get_search_results_reply TGSRR
An alias for the most commonly used part of the Blast4 search results.
ENeedConfig m_NeedConfig
Bitfield to track whether all necessary configuration is done.
list< CRef< objects::CBioseq > > m_SubjectSequences
Subject Sequences.
CRef< objects::CBlast4_queries > m_Queries
Queries associated with this search.
objects::EBlast4_frame_type FrameNumber2NetworkFrame(int frame, EBlastProgramType program)
Converts the return value of CSeqLocInfo::GetFrame into the Blast4-frame-type field.
list< CRef< objects::CBlast4_ka_block > > TKarlinAltschulBlocks
Convenience typedef for a list of CRef<CBlast4_ka_block>
void EnableDiskCacheUse()
Controls disk cache usage for results retrieval.
list< TGi > m_GiList
GI list.
void x_ProcessOneOption(CBlastOptionsHandle &opts, const string &nm, const objects::CBlast4_value &v, struct SInteractingOptions &io)
Add an algorithm or program option to the provided handle.
EErrCode
Error types that BLAST can generate.
string m_RID
Request ID of submitted or pre-existing search.
TSeqLocInfoVector m_QueryMaskingLocations
Masking locations for queries.
list< TGi > m_NegativeGiList
Negative GI list.
CFormatGuess::EFormat m_ObjectType
Type of object CBlast4_archive as determined by CFormatGuess.
bool m_use_disk_cache
Use disk cache for retrieving results default: false.
ENeedConfig
This class attempts to verify whether all necessary configuration is complete before attempting to su...
CRef< objects::CBlast4_archive > m_Archive
Archive of BLAST search and results.
set< TTaxId > & GetNegativeTaxidList()
CRemoteBlast & operator=(const CRemoteBlast &)
Prohibit assignment.
CRef< blast::CBlastOptionsHandle > m_CBOH
Options for new search.
string GetTask() const
Returns the task used to create the remote search (if any)
ESearchStatus
Represents the status of previously submitted search/RID.
vector< CConstRef< objects::CSeq_id > > TQueryIdVector
List of query ids.
CRef< objects::CBlast4_parameters > m_FormatOpts
Options relevant to the format application.
vector< string > m_Warn
List of warnings encountered.
EDebugMode
Debugging support can be turned on with eDebug or off with eSilent.
void SetClientId(const string &client_id)
Sets the client ID used by this object to send requests.
int m_DbFilteringAlgorithmId
filtering algorithm to use in the database
EDebugMode m_Verbose
Verbosity mode: whether to produce debugging info on stdout.
EImmediacy
Indicates whether to use async mode.
set< TTaxId > m_NegativeTaxidList
vector< string > m_Errs
List of errors encountered.
string GetDiskCacheErrorMessahe(void)
virtual const char * GetErrCodeString(void) const override
Translate from the error code value to its string representation.
void FlattenBioseqSet(const CBioseq_set &bss, list< CRef< CBioseq > > &seqs)
void SetQueries(TSeqLocList &seqlocs)
Set the query as a list of Seq_locs.
unique_ptr< CObjectIStream > m_ObjectStream
Use to ready CBlast4_archive.
CRef< objects::CBlast4_request > ExtractBlast4Request(CNcbiIstream &in)
Extract a Blast4-request (a.k.a.
string m_Service
Service value used when submitting this search.
bool m_ReadFile
true if a CBlast4_archive should be read in.
CBlast4_subject::TSeq_loc_list m_SubjectSeqLocs
This field is populated when dealing with a remote bl2seq search (e.g.
string m_FinalDbSeq
Final database sequence.
CRef< objects::CBlast4_parameters > m_AlgoOpts
Options relevant to the search algorithm.
string m_FirstDbSeq
First database sequence.
CRemoteBlast(const CRemoteBlast &)
Prohibit copy construction.
NCBI_EXCEPTION_DEFAULT(CRemoteBlastException, CBlastException)
bool IsDiskCacheActive(void)
CRef< objects::CBlast4_database > m_Dbs
Databases.
EBlastProgramType NetworkProgram2BlastProgramType(const string &program, const string &service)
Function to convert from program and service name into the CORE BLAST program type This is based on t...
string m_ClientId
Client ID submitting requests throw this interface.
CRef< objects::CBlast4_parameters > m_ProgramOpts
Options relevant to the search application.
string GetDbFilteringAlgorithmKey() const
Returns the filtering algorithm key used in the database.
vector< CRef< objects::CSeq_data > > TSeqDataVector
Defines a std::vector of CRef<CSeq_data>
EProgram x_AdjustProgram(const TValueList &L, const string &pstr, EProgram program)
Adjust the EProgram based on option values.
CRef< objects::CBlast4_queue_search_request > m_QSR
Request object for new search.
set< TTaxId > m_TaxidList
string m_DbFilteringAlgorithmKey
filtering algorithm key to use in the database
CRef< objects::CBlast4_reply > m_Reply
Results of BLAST search.
void DisableDiskCacheUse()
list< CRef< objects::CSeq_loc > > TSeqLocList
Typedef for a list of Seq-locs.
void x_ProcessOptions(CBlastOptionsHandle &opts, const TValueList &L, struct SInteractingOptions &io)
Add algorithm and program options to the provided handle.
vector< CRef< objects::CSeq_interval > > TSeqIntervalVector
Defines a std::vector of CRef<CSeq_interval>
string m_disk_cache_error_msg
disk cache error message
void SetQueries(TSeqLocList &seqlocs, const TSeqLocInfoVector &masking_locations)
Set the query as a list of Seq_locs.
set< TTaxId > & GetTaxidList()
bool m_Pending
Pending state: indicates that search still needs to be queued.
string m_Task
Task used when the search was submitted (recovered via CBlastOptionsBuilder)
bool m_disk_cache_error_flag
disk cache error flag
Int4 GetDbFilteringAlgorithmId() const
Returns the filtering algorithm ID used in the database.
ESubjectMaskingType m_SubjectMaskingType
string m_Program
Program value used when submitting this search.
string m_EntrezQuery
Entrez Query, if any.
bool x_HasRetrievedSubjects() const
string m_CreatedBy
Created-by attribution for this search.
string GetClientId() const
Retrieves the client ID used by this object to send requests.
int m_ErrIgn
Count of server glitches (communication errors) to ignore.
bool IsDiskCacheError(void)
disk cache error handling
void ClearDiskCacheError(void)
CSeqLocInfo::ETranslationFrame NetworkFrame2FrameNumber(objects::EBlast4_frame_type frame, EBlastProgramType program)
Converts Blast4-frame-type into CSeqLocInfo::ETranslationFrame.
@ eStatus_Pending
Not completed yet.
@ eStatus_Unknown
Never submitted or purged from the system.
@ eStatus_Done
Completed successfully.
@ eServiceNotAvailable
Service is not available.
@ eIncompleteConfig
Remote BLAST object not fully configured.
#define NULL
Definition: ncbistd.hpp:225
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
EBlast4_frame_type
Access to EBlast4_frame_type's attributes (values, names) as defined in spec.
list< CRef< CSeq_loc > > TSeq_loc_list
Names used in blast4 network communications.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
Uniform BLAST Search Interface.
Modified on Sat Apr 13 11:43:32 2024 by modify_doxy.py rev. 669887