NCBI C++ ToolKit
win_mask_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: win_mask_util.hpp 91951 2020-12-17 12:53:00Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksandr Morgulis
27  *
28  * File Description:
29  * WindowMasker helper functions (prototypes).
30  *
31  */
32 
33 #ifndef C_WIN_MASK_UTIL_HPP
34 #define C_WIN_MASK_UTIL_HPP
35 
36 #include <set>
37 
38 #include <objmgr/bioseq_ci.hpp>
40 #include <objmgr/scope.hpp>
42 #include <objmgr/bioseq_handle.hpp>
45 
46 // #include "win_mask_config.hpp"
47 
49 
51 {
52  public:
53 
54  /**\brief Base class for sets of seq_id representations used with
55  -ids and -exclude-ids options.
56  */
58  {
59  public:
60 
61  /**\brief Object destructor.
62  */
63  virtual ~CIdSet() {}
64 
65  /**\brief Add a string to the id set.
66  \param id_str id to add
67  */
68  virtual void insert( const string & id_str ) = 0;
69 
70  /**\brief Check if the id set is empty.
71  \return true if the id set is empty, false otherwise
72  */
73  virtual bool empty() const = 0;
74 
75  /**\brief Check if the id of the given bioseq is in the id set.
76  \param bsh bioseq handle which id is to be checked
77  \return true if the id of the bsh is found in the id set;
78  false otherwise
79  */
80  virtual bool find( const objects::CBioseq_Handle & bsh ) const = 0;
81  };
82 
83  /**\brief Implementation of CIdSet that compares CSeq_id handles.
84  */
86  {
87  public:
88 
89  /**\brief Object destuctor.
90  */
91  virtual ~CIdSet_SeqId() {}
92 
93  /**\brief See documentation for CIdSet::insert().
94  */
95  virtual void insert( const string & id_str );
96 
97  /**\brief See documentation for CIdSet::empty().
98  */
99  virtual bool empty() const { return idset.empty(); }
100 
101  /**\brief See documentation for CIdSet::find().
102  */
103  virtual bool find( const objects::CBioseq_Handle & ) const;
104 
105  private:
106 
107  /**\internal
108  \brief Container to store id handles.
109  */
111  };
112 
113  /**\brief Implementation of CIdSet that does substring matching.
114  */
116  {
117  public:
118 
119  /**\brief Object destructor.
120  */
121  virtual ~CIdSet_TextMatch() {}
122 
123  /**\brief See documentation for CIdSet::insert().
124  */
125  virtual void insert( const string & id_str );
126 
127  /**\brief See documentation for CIdSet::empty().
128  */
129  virtual bool empty() const { return nword_sets_.empty(); }
130 
131  /**\brief See documentation for CIdSet::find().
132  */
133  virtual bool find( const objects::CBioseq_Handle & ) const;
134 
135  private:
136 
137  /**\internal\brief Set of ids consisting of the same number of words.
138  */
140 
141  /**\internal\brief Split a string into words and return an array
142  of word start offsets.
143 
144  The last element is always one past the end of the last word.
145 
146  \param id_str string to split into words
147  \return vector of word start offsets
148  */
149  static const vector< Uint4 > split( const string & id_str );
150 
151  /**\internal\brief Match an id by string.
152  \param id_str string to match against.
153  \return true if some id in the id set is a whole word substring
154  of id_str, false otherwise
155  */
156  bool find( const string & id_str ) const;
157 
158  /**\internal\brief Match an n-word id by strings.
159  \param id_str n-word id substring
160  \param nwords number of words in id_str - 1
161  \return true if id_str is found in id set, false otherwise
162  */
163  bool find( const string & id_str, Uint4 nwords ) const;
164 
165  /**\internal\brief Set of ids grouped by the number of words.
166  */
167  vector< TNwordSet > nword_sets_;
168  };
169 
170  /** Function iterating over bioseqs in input. Handles input as a list of seq-ids
171  * to be queried from the object manager, in Fasta format or in BlastDB format
172  */
174  {
175  public:
176  CInputBioseq_CI(const string & input_file, const string & input_format);
177 
178  /// Move to the next object in iterated sequence
179  CInputBioseq_CI& operator++ (void);
180 
181  /// Check if iterator points to an object
182  DECLARE_OPERATOR_BOOL(m_CurrentBioseq);
183 
184  const objects::CBioseq_Handle& operator* (void) const { return m_CurrentBioseq; }
185  const objects::CBioseq_Handle* operator-> (void) const { return &m_CurrentBioseq; }
186 
187  private:
188  unique_ptr< CNcbiIstream > m_InputFile; // input file
189  unique_ptr< CMaskReader > m_Reader; // reader used for fasta and bdb formats
191  objects::CBioseq_Handle m_CurrentBioseq; // current found Bioseq
192 
193  // disallow copying of object
195  CInputBioseq_CI& operator= (const CInputBioseq_CI&);
196  };
197 
198  /**
199  \brief Check if the given bioseq should be considered for
200  processing.
201 
202  ids and exclude_ids should not be simultaneousely non empty.
203 
204  \param bsh bioseq handle in question
205  \param ids set of seq ids to consider
206  \param exclude_ids set of seq ids excluded from consideration
207  \return true if ids is not empty and bsh is among ids, or else
208  if exclude_ids is not empty and bsh is not among
209  exclude_ids;
210  false otherwise
211  */
212  static bool consider(
213  const objects::CBioseq_Handle & bsh,
214  const CIdSet * ids,
215  const CIdSet * exclude_ids );
216 
217 };
218 
220 
221 #endif
Implementation of CIdSet that compares CSeq_id handles.
virtual bool empty() const
See documentation for CIdSet::empty().
set< objects::CSeq_id_Handle > idset
virtual ~CIdSet_SeqId()
Object destuctor.
Implementation of CIdSet that does substring matching.
virtual bool empty() const
See documentation for CIdSet::empty().
vector< TNwordSet > nword_sets_
\brief Set of ids grouped by the number of words.
set< string > TNwordSet
\brief Set of ids consisting of the same number of words.
virtual ~CIdSet_TextMatch()
Object destructor.
Base class for sets of seq_id representations used with -ids and -exclude-ids options.
virtual bool find(const objects::CBioseq_Handle &bsh) const =0
Check if the id of the given bioseq is in the id set.
virtual bool empty() const =0
Check if the id set is empty.
virtual void insert(const string &id_str)=0
Add a string to the id set.
virtual ~CIdSet()
Object destructor.
Function iterating over bioseqs in input.
CRef< objects::CScope > m_Scope
DECLARE_OPERATOR_BOOL(m_CurrentBioseq)
Check if iterator points to an object.
unique_ptr< CNcbiIstream > m_InputFile
objects::CBioseq_Handle m_CurrentBioseq
unique_ptr< CMaskReader > m_Reader
CInputBioseq_CI(const CInputBioseq_CI &)
static FILE * input_file
Definition: common.c:35
CVect2< NCBI_PROMOTE(int,U) > operator*(int v1, const CVect2< U > &v2)
Definition: globals.hpp:371
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NCBI_XALGOWINMASK_EXPORT
Definition: ncbi_export.h:1033
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
The Object manager core.
Modified on Fri Sep 20 14:57:28 2024 by modify_doxy.py rev. 669887