NCBI C++ ToolKit
writedb.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_BLAST_SEQDB_WRITER___WRITEDB__HPP
2 #define OBJTOOLS_BLAST_SEQDB_WRITER___WRITEDB__HPP
3 
4 /* $Id: writedb.hpp 101152 2023-11-07 15:39:13Z camacho $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file writedb.hpp
34 /// Defines BLAST database construction classes.
35 ///
36 /// Defines classes:
37 /// CWriteDB
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
47 #include <objects/seq/seq__.hpp>
48 
49 #include <objmgr/bioseq_handle.hpp>
50 
51 
53 
54 /// Include definitions from the objects namespace.
56 
57 /// Forward definition for PIMPL idiom.
58 class CWriteDB_Impl;
59 
60 /// This represents a set of masks for a given sequence. It is represented as a
61 /// vector because there can be multiple types of filtering applied to a single
62 /// sequence (e.g.: DUST, WINDOWMASKER, REPEATS, etc).
63 /// The type of masking data produced in IMaskDataSource
64 class NCBI_XOBJWRITE_EXPORT CMaskedRangesVector: public vector<SBlastDbMaskData>
65 {
66 public:
67  /// Our parent class
68  typedef vector<SBlastDbMaskData> TParent;
69 
70  /// Redefine empty to mean no elements or none of its elements being empty
71  bool empty() const {
72  ITERATE(TParent, itr, *this) {
73  if ( !itr->empty() ) {
74  return false;
75  }
76  }
77  return true;
78  }
79 };
80 
81 
82 /// CWriteDB
83 ///
84 /// User interface class for blast databases.
85 ///
86 /// This class provides the top-level interface class for BLAST
87 /// database users. It defines access to the database component by
88 /// calling methods on objects which represent the various database
89 /// files, such as the index, header, sequence, and alias files.
90 
92 {
93 public:
94  /// Sequence types.
95  enum ESeqType {
96  /// Protein database.
97  eProtein = 0,
98 
99  /// Nucleotide database.
100  eNucleotide = 1
101  };
102 
103  /// Whether and what kind of indices to build.
104  enum EIndexType {
105  /// Build a database without any indices.
106  eNoIndex = 0,
107 
108  /// Use only simple accessions in the string index.
109  eSparseIndex = 0x1,
110 
111  /// Use several forms of each Seq-id in the string index.
112  eFullIndex = 0x2,
113 
114  /// OR this in to add an index for trace IDs.
115  eAddTrace = 0x4,
116 
117  /// Like eFullIndex but also build a numeric Trace ID index.
118  eFullWithTrace = eFullIndex | eAddTrace,
119 
120  /// Like eFullIndex but also build a numeric Trace ID index.
121  eDefault = eFullIndex | eAddTrace,
122 
123  // Specialized ISAMs; these can be ORred into the above.
124 
125  /// Add an index from sequence hash to OID.
126  eAddHash = 0x100
127  };
128  typedef int TIndexType; ///< Bitwise OR of "EIndexType"
129 
130  //
131  // Setup
132  //
133 
134  /// Constructor
135  ///
136  /// Starts construction of a blast database.
137  ///
138  /// @param dbname
139  /// A list of database or alias names, seperated by spaces. [in]
140  /// @param seqtype
141  /// Specify eProtein, eNucleotide, or eUnknown. [in]
142  /// @param title
143  /// The database title. [in]
144  /// @param itype
145  /// Indicates the type of indices to build if specified. [in]
146  /// @param parse_ids
147  /// If true, generate ISAM files [in]
148  /// @param long_ids
149  /// If true, assume long sequence ids (database|accession) when parsing
150  /// string ids [in]
151  /// @param use_gi_mask
152  /// If true, generate GI-based mask files [in]
153  /// @param dbver
154  /// version of BLAST database to generate [in]
155  /// @param scan_bioseq_4_cfastareader_usrobj [in]
156  /// If true, scan the Bioseq objects for a CFastaReader-created User-object
157  /// containing a defline
158  CWriteDB(const string & dbname,
159  ESeqType seqtype,
160  const string & title,
161  int itype = eDefault,
162  bool parse_ids = true,
163  bool long_ids = false,
164  bool use_gi_mask = false,
166  bool limit_defline = false,
167  Uint8 oid_masks = EOidMaskType::fNone,
168  bool scan_bioseq_4_cfastareader_usrobj = false);
169 
170  /// Destructor.
171  ///
172  /// This will return resources acquired by this object, and call Close()
173  /// if it has not already been called.
174  ~CWriteDB();
175 
176  //
177  // Adding data
178  //
179 
180  // Each new sequence is started when the client calls one of the
181  // AddSequence() methods. This can optionally be followed by one
182  // or more calls to Set...() methods or AddDefline(), to add or
183  // change other data. The accumulated data for the sequence is
184  // combined and written when the sequence after it is started
185  // (with another AddSequence() call), or when Close() is called.
186 
187  /// Add a sequence as a CBioseq.
188  ///
189  /// This adds the sequence data in the specified CBioseq to the
190  /// database. If the CBioseq contains deflines, they will also be
191  /// used unless there is a call to SetDeflines() or AddDefline().
192  /// Note that the CBioseq will be held by CWriteDB at least until
193  /// the next sequence is provided. If this method is used, the
194  /// CBioseq is expected to contain sequence data accessible via
195  /// GetInst().GetSeq_data(). If this might not be true, it may be
196  /// better to use the version of this function that also takes a
197  /// CSeqVector.
198  ///
199  /// @param bs The sequence and related data as a CBioseq. [in]
200  void AddSequence(const CBioseq & bs);
201 
202  /// Add a sequence as a CBioseq.
203  ///
204  /// This adds the sequence data in the specified CSeqVector, and
205  /// the meta data in the specified CBioseq, to the database. If
206  /// the CBioseq contains deflines, they will also be used unless
207  /// there is a call to SetDeflines() or AddDefline(). Note that
208  /// the CBioseq will be held by CWriteDB at least until the next
209  /// sequence is provided. This version will use the CSeqVector if
210  /// the sequence data is not found in the CBioseq.
211  ///
212  /// @param bs A CBioseq containing meta data for the sequence. [in]
213  /// @param sv The sequence data for the sequence. [in]
214  void AddSequence(const CBioseq & bs, CSeqVector & sv);
215 
216  /// Add a sequence as a CBioseq.
217  ///
218  /// This adds the sequence found in the given CBioseq_Handle to
219  /// the database.
220  ///
221  /// @param bsh The sequence and related data as a CBioseq_Handle. [in]
222  void AddSequence(const CBioseq_Handle & bsh);
223 
224  /// Add a sequence as raw data.
225  ///
226  /// This adds a sequence provided as raw sequence data. The raw
227  /// data must be (and is assumed to be) encoded correctly for the
228  /// format of database being produced. For protein databases, the
229  /// ambiguities string should be empty (and is thus optional). If
230  /// this version of AddSequence() is used, the user must also
231  /// provide one or more deflines with SetDeflines() or
232  /// AddDefline() calls.
233  ///
234  /// @param sequence The sequence data as a string of bytes. [in]
235  /// @param ambiguities The ambiguity data as a string of bytes. [in]
236  void AddSequence(const CTempString & sequence,
237  const CTempString & ambiguities = "");
238 
239  /// Set the PIG to be used for the sequence.
240  ///
241  /// For proteins, this sets the PIG of the protein sequence.
242  ///
243  /// @param pig PIG identifier as an integer. [in]
244  void SetPig(int pig);
245 
246  /// Set the deflines to be used for the sequence.
247  ///
248  /// This method sets all the deflines at once as a complete set,
249  /// overriding any deflines provided by AddSequence(). If this
250  /// method is used with the CBioseq version of AddSequence, it
251  /// replaces the deflines found in the CBioseq.
252  ///
253  /// @param deflines Deflines to use for this sequence. [in]
254  void SetDeflines(const CBlast_def_line_set & deflines);
255 
256  /// Register a type of filtering data found in this database.
257  ///
258  /// @return algorithm ID for the filtering data.
259  /// @param program Program used to produce this masking data. [in]
260  /// @param options Algorithm options provided to the program. [in]
261  /// @param name Name of the GI-based mask. [in]
263  const string & options = string(),
264  const string & name = string());
265 
266  /// Register a type of filtering data found in this database.
267  ///
268  /// @return algorithm ID for the filtering data.
269  /// @param id A string to identify the masking data. [in]
270  /// @param description Details about the masking data. [in]
271  /// @param options Algorithm options provided to the program. [in]
272  int RegisterMaskAlgorithm(const string & id,
273  const string & description = string(),
274  const string & options = string());
275 
276  /// Set filtering data for a sequence.
277  ///
278  /// This method specifies filtered regions for this sequence. A
279  /// sequence may have filtering data from one or more algorithms.
280  /// For each algorithm_id value specified in ranges, a description
281  /// should be added to the database using RegisterMaskAlgorithm().
282  /// This must be done before the first call to SetMaskData() that
283  /// uses the algorithm id for a non-empty offset range list.
284  ///
285  /// @param ranges Filtered ranges for this sequence and algorithm.
286  /// @param gis GIs associated with this sequence.
287  void SetMaskData(const CMaskedRangesVector & ranges,
288  const vector<TGi> & gis);
289 
290  //
291  // Output
292  //
293 
294  /// List Volumes
295  ///
296  /// Returns the base names of all volumes constructed by this
297  /// class; the returned list may not be complete until Close() has
298  /// been called.
299  ///
300  /// @param vols The set of volumes produced by this class. [out]
301  void ListVolumes(vector<string> & vols);
302 
303  /// List Filenames
304  ///
305  /// Returns a list of the files constructed by this class; the
306  /// returned list may not be complete until Close() has been
307  /// called.
308  ///
309  /// @param files The set of resolved database path names. [out]
310  void ListFiles(vector<string> & files);
311 
312  /// Close the Database.
313  ///
314  /// Flush all data to disk and close any open files.
315  void Close();
316 
317  //
318  // Controls
319  //
320 
321  // The blast volume format has internal limits for these fields;
322  // these are called 'hard limits' here. If the value specified
323  // here exceeds that limit, it will be silently reduced. Limits
324  // are applied simultaneously; creation of a new volume is
325  // triggered as soon as any of the limits is reached (unless the
326  // current volume is empty).
327 
328  /// Set maximum size for output files.
329  ///
330  /// The provided size is applied as a limit on the size of output
331  /// files. If adding a sequence would cause any output file to
332  /// exceed this size, the volume is closed and a new volume is
333  /// started (unless the current volume is empty, in which case the
334  /// size limit is ignored and a one-sequence volume is created).
335  /// The default value is 2^30-1. There is also a hard limit
336  /// required by the database format.
337  ///
338  /// @param sz Maximum size in bytes of any volume component file. [in]
339  void SetMaxFileSize(Uint8 sz);
340 
341  /// Set maximum letters for output volumes.
342  ///
343  /// The provided size is applied as a limit on the size of output
344  /// volumes. If adding a sequence would cause a volume to exceed
345  /// this many protein or nucleotide letters (*not* bytes), the
346  /// volume is closed and a new volume is started (unless the
347  /// volume is currently empty). There is no default, but there is
348  /// a hard limit required by the format definition. Ambiguity
349  /// encoding is not counted toward this limit.
350  ///
351  /// @param letters Maximum letters to pack in one volume. [in]
352  void SetMaxVolumeLetters(Uint8 letters);
353 
354  /// Extract Deflines From Bioseq.
355  ///
356  /// Deflines are extracted from the CBioseq and returned to the
357  /// user. The caller can then modify or inspect the deflines, and
358  /// apply them to a sequence with SetDeflines().
359  ///
360  /// @param bs The bioseq from which to extract a defline set. [in]
361  /// @param parse_ids If seqid should be parsed [in]
362  /// @param long_ids It true, use long sequence ids (database|accession) [in]
363  /// @param scan_bioseq_4_cfastareader_usrobj [in]
364  /// If true, scan the Bioseq objects for a CFastaReader-created User-object
365  /// containing a defline
366  /// @return A set of deflines for this CBioseq.
368  ExtractBioseqDeflines(const CBioseq & bs, bool parse_ids=true,
369  bool long_ids=false,
370  bool scan_bioseq_4_cfastareader_usrobj=false);
371 
372  /// Set letters that should not be used in sequences.
373  ///
374  /// This method specifies letters that should not be used in the
375  /// resulting database. The masked letters are expected to be
376  /// specified in an IUPAC (alphabetic) encoding, and will be
377  /// replaced by 'X' (for protein) when the sequences are packed.
378  /// This method should be called before any sequences are added.
379  /// This method only works with protein (the motivating case
380  /// cannot happen with nucleotide).
381  ///
382  /// @param masked Letters to disinclude. [in]
383  void SetMaskedLetters(const string & masked);
384 
385 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
386  (!defined(NCBI_COMPILER_MIPSPRO)) )
387  /// Find an existing column.
388  ///
389  /// This looks for an existing column with the specified title and
390  /// returns the column ID if found.
391  ///
392  /// @param title The column title to look for.
393  /// @return The column ID if this title is defined, otherwise -1.
394  int FindColumn(const string & title) const;
395 
396  /// Set up a user-defined CWriteDB column.
397  ///
398  /// This method creates a user-defined column associated with this
399  /// database. The column is indexed by OID and contains arbitrary
400  /// binary data, which is applied using the SetBlobData method
401  /// below. The `title' parameter identifies the column and must
402  /// be unique within this database. Because tables are accessed
403  /// by title, it is not necessary to permanently associate file
404  /// extensions with specific purposes or data types. The return
405  /// value of this method is an integer that identifies this column
406  /// for the purpose of inserting blob data. (The number of columns
407  /// allowed is currently limited due to the file naming scheme,
408  /// but some columns are used for built-in purposes.)
409  ///
410  /// @param title Name identifying this column.
411  /// @return Column identifier (a positive integer).
412  int CreateUserColumn(const string & title);
413 
414  /// Add meta data to a user-defined column.
415  ///
416  /// In addition to normal blob data, database columns can store a
417  /// `dictionary' of user-defined metadata in key/value form. This
418  /// method adds one such key/value pair to the column. Specifying
419  /// a key a second time causes replacement of the previous value.
420  /// Using this mechanism to store large amounts of data may have a
421  /// negative impact on performance.
422  ///
423  /// @param col_id Specifies the column to add this metadata to.
424  /// @param key A unique key string.
425  /// @param value A value string.
426  void AddColumnMetaData(int col_id,
427  const string & key,
428  const string & value);
429 
430  /// Add blob data to a user-defined column.
431  ///
432  /// To add data to a user-defined blob column, call this method,
433  /// providing the column handle. A blob object will be returned;
434  /// the user data should be stored in this object. The data can
435  /// be stored any time up to the next call to an `AddSequence'
436  /// method (just as with any other per-sequence data) but access
437  /// to the returned object after that point results is incorrect
438  /// and will have undefined consequences.
439  ///
440  /// @param column_id Identifier for a user-defined column.
441  /// @return Blob data should be written to this object.
442  CBlastDbBlob & SetBlobData(int column_id);
443 #endif
444 
445 protected:
446  /// Implementation object.
448 };
449 
450 
451 /// Binary GI or TI List Builder.
452 ///
453 /// This class assists in building binary GI or TI lists for use with
454 /// BLAST databases and associated software.
456 {
457 public:
458  /// Type definition of the container that stores the IDs for this class
459  typedef vector<Int8> TContainerType;
460 
461  /// Standard size_type definition
462  typedef TContainerType::size_type size_type;
463 
464  /// Identifier types.
465  enum EIdType {
466  /// Genomic id.
468 
469  /// Trace id.
470  eTi
471  };
472 
473  /// Construct a list of a given type.
474  CBinaryListBuilder(EIdType id_type);
475 
476  /// Write the list to a file.
477  /// @param fname Filename of the file to write the object to.
478  void Write(const string & fname);
479 
480  /// Write the list to a stream
481  /// @param stream Stream to write the object to.
482  void Write(CNcbiOstream& stream);
483 
484  /// Add an identifier to the list.
485  void AppendId(const Int8 & id)
486  {
487  m_Ids.push_back(id);
488  }
489 
490  /// Add several 4 byte IDs to the list.
491  ///
492  /// This should take begin and end indicators, such as pointers to
493  /// the beginning and end (past the last element) of an array of
494  /// integers, or begin() and end() iterators to a compatible STL
495  /// collection type such as vector<Int4> or set<int>.
496  ///
497  /// @param a Iterator to the first included element.
498  /// @param b Iterator to element after the last included element.
499  template<class T>
500  void AppendIdList(const T & a, const T & b)
501  {
502  for(T c = a; c != b; ++c) {
503  Int8 id = *c;
504  AppendId(id);
505  }
506  }
507 
508  /// Returns the number of IDs stored in an instance of this class
509  size_type Size() const {
510  return m_Ids.size();
511  }
512 
513 private:
514  /// List of identifiers to use.
516 
517  /// Whether to use GIs or TIs.
519 
520  /// Prevent copy construction.
522 
523  /// Prevent copy assignment.
525 };
526 
527 
528 /// Builder for BlastDb format column files.
529 ///
530 /// This class supports construction of BlastDb format column files
531 /// outside of BlastDb volumes. To build column files as part of a
532 /// volume, use CWriteDB's column related methods. This class is an
533 /// interface to the column file construction functionality, but is
534 /// intended for data not associated with specific BlastDb volumes.
535 /// Columns built with CWriteDB::CreateColumn participate in WriteDB's
536 /// other volume-oriented policies such as volume breaking to enforce
537 /// file size limits, and compatibility with component file naming
538 /// conventions for CWriteDB and CSeqDB.
539 
541 public:
542  /// Construct a BlastDb format column.
543  ///
544  /// The `title' string names this column, and can be used to
545  /// uniquely identify it in cases where the file name must be
546  /// chosen arbitrarily. This version chooses file extensions
547  /// using a basic pattern (<name>.x?[ab]) designed to not conflict
548  /// with columns created by WriteDB as part of a volume. The
549  /// file_id character must be alphanumeric.
550  ///
551  /// @param title Internal name of this column.
552  /// @param basename Column filename (minus extension).
553  /// @param file_id Identifier for this column.
554  CWriteDB_ColumnBuilder(const string & title,
555  const string & basename,
556  char file_id = 'a');
557 
558  /// Add meta data to the column.
559  ///
560  /// In addition to normal blob data, database columns can store a
561  /// `dictionary' of user-defined metadata in key/value form. This
562  /// method adds one such key/value pair to the column. Specifying
563  /// a key a second time causes replacement of the previous value.
564  /// Using this mechanism to store large amounts of data may have a
565  /// negative impact on performance.
566  ///
567  /// @param key Key string.
568  /// @param value Value string.
569  void AddMetaData(const string & key, const string & value);
570 
571  /// Destructor.
573 
574 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
575  (!defined(NCBI_COMPILER_MIPSPRO)) )
576  /// Add a blob to the column.
577  ///
578  /// The data described by `blob' is added to the column. If the
579  /// blob is empty, no data is stored but the OID is incremented.
580  ///
581  /// @param blob The blob to add to the column.
582  void AddBlob(const CBlastDbBlob & blob);
583 #endif
584 
585  /// Complete and close the column files.
586  void Close();
587 
588  /// List Filenames
589  ///
590  /// Returns a list of the files constructed by this class; the
591  /// returned list may not be complete until Close() has been
592  /// called.
593  ///
594  /// @param files The list of files created for this column.
595  void ListFiles(vector<string> & files) const;
596 
597 private:
598  /// Prevent the copy constructor.
600 
601  /// Prevent copy assignment.
603 
604  /// Implementation object.
606 };
607 
608 /// Defines the possible filtering types that can be applied to an alias file
610  eNoAliasFilterType, ///< Sentinel value
611  eGiList, ///< Filter a BLAST database via GIs
612  eTiList, ///< Filter a BLAST database via TIs (Trace IDs)
613  eSeqIdList, ///< Filter a BLAST database via a Seq-id list
614  eTaxIdList ///< Filter a BLAST database via Taxonomy Id list
615 };
616 
617 /**
618  * @brief Writes an alias file that restricts a database with a gi list.
619  *
620  * @param file_name alias file name to create, it will overwrite any existing
621  * files of that name. It can be specified as an absolute path, or a path
622  * relative to the current working directory [in]
623  * @param db_name database name to restrict. Can be specified as an absolute path,
624  * or a path relative to the target directory or the default directory [in]
625  * @param seq_type type of sequences stored in the database [in]
626  * @param gi_file_name name of the file containing gis [in]
627  * @param title title to use in this alias file [in]
628  * @param alias_type Type of alias file to create [in]
629  */
632  const string& db_name,
633  CWriteDB::ESeqType seq_type,
634  const string& gi_file_name,
635  const string& title = string(),
636  EAliasFileFilterType alias_type = eGiList);
637 
638 /**
639  * @brief Writes an alias file that aggregates multiple existing BLAST
640  * databases.
641  *
642  * @param file_name alias file name to create, it will overwrite any existing
643  * files of that name. It can be specified as an absolute path, or a path
644  * relative to the current working directory [in]
645  * @param db_names database names to aggregate. Can be specified as absolute paths,
646  * or paths relative to the target directory or the default directory [in]
647  * @param gi_file_name name of the file containing gis [in]
648  * @param seq_type type of sequences stored in the database [in]
649  * @param title title to use in this alias file [in]
650  * @param alias_type Type of alias file to create [in]
651  */
654  const vector <string> & db_names,
655  CWriteDB::ESeqType seq_type,
656  const string& gi_file_name,
657  const string& title = string(),
658  EAliasFileFilterType alias_type = eGiList);
659 
660 /**
661  * @brief Writes an alias file that aggregates multiple existing BLAST
662  * database volumes. For instance, it can be used to request a top level alias
663  * file for a database called wgs composed of 3 volumes, creating wgs.nal,
664  * which refers to wgs.00, wgs.01, and wgs.02
665  *
666  * @param file_name alias file name to create, it will overwrite any existing
667  * files of that name [in]
668  * @param num_volumes Number of volumes that will be referred to in the alias
669  * file [in]
670  * @param seq_type type of sequences stored in the database [in]
671  * @param title title to use in this alias file [in]
672  */
675  unsigned int num_volumes,
676  CWriteDB::ESeqType seq_type,
677  const string& title = string());
678 
681  const vector<string>& db_names,
682  CWriteDB::ESeqType seq_type,
683  const TSeqRange& oid_range,
684  const string& title = string());
685 
686 /** Consolidate the alias files specified into a group alias file.
687  * @param alias_files list of alias file names with extension to
688  * consolidate [in]
689  * @param delete_source_alias_files if true, the alias files in the alias_files
690  * argument are deleted [in]
691  * @post a group alias file is written in the current working directory
692  * @throws CWriteDBException if no alias files are provided to write group
693  * alias file
694  */
696 void CWriteDB_ConsolidateAliasFiles(const list<string>& alias_files,
697  const string& output_directory = kEmptyStr,
698  bool delete_source_alias_files = false);
699 
700 /** Consolidate all the alias files in the current working directory.
701  * @param delete_source_alias_files if true, the alias files consolidated are
702  * deleted [in]
703  * @throws CWriteDBException if no alias files can be consolidated
704  */
706 void CWriteDB_ConsolidateAliasFiles(bool delete_source_alias_files = false);
707 
708 
710 void CWriteDB_CreateOidMaskDB(const string& input_db,
711  const string & output_db,
712  CWriteDB::ESeqType seq_type,
713  int oid_mask_type,
714  const string & title = string());
715 
717 
718 #endif // OBJTOOLS_BLAST_SEQDB_WRITER___WRITEDB__HPP
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Binary GI or TI List Builder.
Definition: writedb.hpp:456
TContainerType m_Ids
List of identifiers to use.
Definition: writedb.hpp:515
TContainerType::size_type size_type
Standard size_type definition.
Definition: writedb.hpp:462
vector< Int8 > TContainerType
Type definition of the container that stores the IDs for this class.
Definition: writedb.hpp:459
CBinaryListBuilder(CBinaryListBuilder &)
Prevent copy construction.
EIdType m_IdType
Whether to use GIs or TIs.
Definition: writedb.hpp:518
CBinaryListBuilder & operator=(CBinaryListBuilder &)
Prevent copy assignment.
size_type Size() const
Returns the number of IDs stored in an instance of this class.
Definition: writedb.hpp:509
void AppendIdList(const T &a, const T &b)
Add several 4 byte IDs to the list.
Definition: writedb.hpp:500
EIdType
Identifier types.
Definition: writedb.hpp:465
@ eGi
Genomic id.
Definition: writedb.hpp:467
void AppendId(const Int8 &id)
Add an identifier to the list.
Definition: writedb.hpp:485
CBioseq_Handle –.
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
This represents a set of masks for a given sequence.
Definition: writedb.hpp:65
bool empty() const
Redefine empty to mean no elements or none of its elements being empty.
Definition: writedb.hpp:71
vector< SBlastDbMaskData > TParent
Our parent class.
Definition: writedb.hpp:68
CObject –.
Definition: ncbiobj.hpp:180
CSeqVector –.
Definition: seq_vector.hpp:65
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Builder for BlastDb format column files.
Definition: writedb.hpp:540
CWriteDB_ColumnBuilder(const CWriteDB_ColumnBuilder &)
Prevent the copy constructor.
class CWriteDB_Column * m_Impl
Implementation object.
Definition: writedb.hpp:605
CWriteDB_Column class.
CWriteDB_Impl class.
CWriteDB.
Definition: writedb.hpp:92
ESeqType
Sequence types.
Definition: writedb.hpp:95
int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options=string(), const string &name=string())
Register a type of filtering data found in this database.
CWriteDB_Impl * m_Impl
Implementation object.
Definition: writedb.hpp:447
EIndexType
Whether and what kind of indices to build.
Definition: writedb.hpp:104
int RegisterMaskAlgorithm(const string &id, const string &description=string(), const string &options=string())
Register a type of filtering data found in this database.
int TIndexType
Bitwise OR of "EIndexType".
Definition: writedb.hpp:128
#define T(s)
Definition: common.h:230
const char * file_name[]
Blast defline related defines.
#define basename(path)
Definition: replacements.h:116
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void Write(CObjectOStream &out, TConstObjectPtr object, const CTypeRef &type)
Definition: serial.cpp:55
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_XOBJWRITE_EXPORT
Definition: ncbi_export.h:1347
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
unsigned int a
Definition: ncbi_localip.c:102
Defines BlastDb `Blob' class for SeqDB and WriteDB.
Defines exception class and several constants for SeqDB.
EBlastDbVersion
BLAST database version.
Definition: seqdbcommon.hpp:51
@ eBDB_Version4
Definition: seqdbcommon.hpp:52
@ fNone
USING_SCOPE(objects)
Include definitions from the objects namespace.
void CWriteDB_ConsolidateAliasFiles(const list< string > &alias_files, const string &output_directory=kEmptyStr, bool delete_source_alias_files=false)
Consolidate the alias files specified into a group alias file.
void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title=string(), EAliasFileFilterType alias_type=eGiList)
Writes an alias file that restricts a database with a gi list.
void CWriteDB_CreateOidMaskDB(const string &input_db, const string &output_db, CWriteDB::ESeqType seq_type, int oid_mask_type, const string &title=string())
EAliasFileFilterType
Defines the possible filtering types that can be applied to an alias file.
Definition: writedb.hpp:609
@ eTiList
Filter a BLAST database via TIs (Trace IDs)
Definition: writedb.hpp:612
@ eSeqIdList
Filter a BLAST database via a Seq-id list.
Definition: writedb.hpp:613
@ eTaxIdList
Filter a BLAST database via Taxonomy Id list.
Definition: writedb.hpp:614
@ eGiList
Filter a BLAST database via GIs.
Definition: writedb.hpp:611
@ eNoAliasFilterType
Sentinel value.
Definition: writedb.hpp:610
Defines exception class for WriteDB.
Modified on Fri Sep 20 14:57:45 2024 by modify_doxy.py rev. 669887