NCBI C++ ToolKit
query_parse.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL__QUERY_PARSER_HPP__
2 #define UTIL__QUERY_PARSER_HPP__
3 
4 /* $Id: query_parse.hpp 84663 2018-11-27 18:22:00Z ucko $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Anatoliy Kuznetsov, Mike DiCuccio, Maxim Didenko
30  *
31  * File Description: Query parser implementation
32  *
33  */
34 
35 /// @file query_parse.hpp
36 /// Query string parsing components
37 
38 #include <corelib/ncbi_tree.hpp>
39 #include <corelib/ncbiobj.hpp>
40 
42 
43 class CQueryParseTree;
44 
45 
46 
47 /** @addtogroup QParser
48  *
49  * @{
50  */
51 
52 /// Base class for query node user defined object
53 ///
54 /// User object used to carry field dependent data, metainformation,
55 /// execution time data, etc. It can be a bridge between parser and a query
56 /// execution engine.
57 ///
58 
60 {
61 public:
62 
63  /// Reset user object (for reuse without reallocation)
64  virtual void Reset() = 0;
65 
66  /// String value for debuging
67  virtual string GetVisibleValue() const { return ""; };
68 };
69 
70 
71 /// Query node class
72 ///
73 /// Query node describes element of the recursive parsing tree
74 /// for the query language.
75 /// (The tree is then interpreted by the execution machine)
76 ///
77 
79 {
80 public:
81 
82  /// Query node type
83  ///
84  enum EType {
85  eNotSet = 0, ///< Produced by the (private) default constructor
86  eIdentifier, ///< Identifier like db.field (Org, Fld12, etc.)
87  eIntConst, ///< Integer const
88  eFloatConst, ///< Floating point const
89  eBoolConst, ///< Boolean (TRUE or FALSE)
90  eString, ///< String ("free text")
91  eFunction, ///< Function
92 
93  // Operation codes:
98  eIn,
109 
110  // SQL specific components
114 
116 
117  eMaxType
118  };
119 
120  /// Source location (points to the position in the original src)
121  /// All positions are 0 based
122  ///
123  struct SSrcLoc
124  {
125  unsigned line; ///< Src line number
126  unsigned pos; ///< Position in the src line
127  unsigned length; ///< Token length (optional)
128 
129  SSrcLoc(unsigned src_line = 0, unsigned src_pos = 0, unsigned len = 0)
130  : line(src_line), pos(src_pos), length(len)
131  {}
132  };
133 
134 public:
135  /// Construct the query node
136  /// @param value Node value
137  /// @param orig_text Value as it appears in the original program
138  /// @param isIdent true whe the string is identifier (no quoting)
139  ///
140  CQueryParseNode(const string& value, const string& orig_text, bool isIdent);
141 
142  explicit CQueryParseNode(Int8 val, const string& orig_text);
143  explicit CQueryParseNode(bool val, const string& orig_text);
144  explicit CQueryParseNode(double val, const string& orig_text);
145  explicit CQueryParseNode(EType op_type, const string& orig_text);
146 
147  /// @name Source reference accessors
148  /// @{
149 
150  /// Set node location in the query text (for error diagnostics)
151  void SetLoc(const SSrcLoc& loc) { m_Location = loc; }
152  void SetLoc(unsigned line, unsigned pos)
153  {
154  m_Location.line = line;
155  m_Location.pos = pos;
156  }
157  const SSrcLoc& GetLoc() const { return m_Location; }
158 
159  /// @}
160 
161 
162  /// @name Value accessors
163  /// @{
164 
165  EType GetType() const { return m_Type; }
166  const string& GetStrValue() const;
167  const string& GetIdent() const;
168  const string& GetOriginalText() const { return m_OrigText; }
169  Int8 GetInt() const;
170  bool GetBool() const;
171  double GetDouble() const;
172 
173  int GetIdentIdx() const;
174  const string& GetOrig() const { return m_OrigText; }
175 
176  /// @}
177 
178  /// TRUE if node was created as explicitly
179  /// FALSE - node was created as a result of a default and the interpreter has
180  /// a degree of freedom in execution
181  bool IsExplicit() const { return m_Explicit; }
182  void SetExplicit(bool expl=true) { m_Explicit = expl; }
183 
184  /// Check if node is marked with NOT flag (like != )
185  bool IsNot() const { return m_Not; }
186  void SetNot(bool n=true) { m_Not = n; }
187 
188  /// Returns TRUE if node describes logical operation (AND, OR, etc.)
189  bool IsLogic() const
190  {
191  return m_Type == eNot || m_Type == eAnd || m_Type == eOr ||
192  m_Type == eSub || m_Type == eXor;
193  }
194 
195  /// Returns TRUE if node is value (INT, String, etc.)
196  bool IsValue() const
197  {
198  return m_Type == eIdentifier || m_Type == eIntConst ||
199  m_Type == eString || m_Type == eFloatConst ||
200  m_Type == eBoolConst;
201  }
202 
203  /// Elapsed time in seconds
204  double Elapsed() const { return m_Elapsed; }
205  /// Elapsed time in seconds
206  double GetElapsed() const { return Elapsed(); }
207 
208  /// Set node timing
209  void SetElapsed(double e) { m_Elapsed = e; }
210 
211 
212  /// @name User object operations
213  ///
214  /// Methods to associate application specific data with
215  /// parsing tree node.
216  /// Data should be encapsulated into a user object derived
217  /// from CQueryParseBaseUserObject.
218  ///
219  /// @{
220 
221  /// Get user object
223  { return m_UsrObj.GetPointer(); }
225  { return m_UsrObj.GetPointer(); }
226 
227  /// Set user object. Query node takes ownership.
228  void AttachUserObject(IQueryParseUserObject* obj);
230  { AttachUserObject(obj); }
231 
232  /// Reset the associated user object
233  /// (see IQueryParseUserObject::Reset())
234  ///
235  void ResetUserObject();
236 
237  /// @}
238 
239  /// Return query node type as a string (for debugging output)
240  string GetNodeTypeAsString() const;
241  static string GetNodeTypeAsString(EType node_type);
242 
243 private:
244  // required for use with CTreeNode<>
246  friend class CTreeNode<CQueryParseNode>;
247 
248 private:
250  union {
254  };
255  string m_Value;
256  string m_OrigText;
258  bool m_Not;
259  SSrcLoc m_Location; ///< Reference to original location in query
260  double m_Elapsed; ///< Execution timing
261 
263 };
264 
265 
266 /// Query tree and associated utility methods
267 ///
269 {
270 public:
272 public:
273  /// Contruct the query. Takes the ownership of the clause.
274  explicit CQueryParseTree(TNode *clause=0);
275  virtual ~CQueryParseTree();
276 
277 
278  /// Case sensitive parsing
279  ///
280  enum ECase {
281  eCaseSensitiveUpper, ///< Operators must come in upper case (AND)
282  eCaseInsensitive ///< Case insensitive parsing (AnD)
283  };
284 
285  /// Level of tolerance to syntax errors and problems
286  ///
288  eSyntaxCheck, ///< Best possible check for errors
289  eSyntaxRelax ///< Relaxed parsing rules
290  };
291 
292  /// List of keywords recognised as functions
293  typedef vector<string> TFunctionNames;
294 
295  /// Query parser front-end function
296  ///
297  /// @param query_str
298  /// Query string subject of parsing
299  /// @param case_sense
300  /// Case sensitivity (AND, AnD, etc.)
301  /// @param syntax_check
302  /// Sensitivity to syntax errors
303  /// @param verbose
304  /// Debug print switch
305  /// @param functions
306  /// List of names recognised as functions
307  ///
308  void Parse(const char* query_str,
309  ECase case_sense = eCaseInsensitive,
310  ESyntaxCheck syntax_check = eSyntaxCheck,
311  bool verbose = false,
312  const TFunctionNames& functions = TFunctionNames(0),
313  unsigned line = 0,
314  unsigned linePos = 0);
315 
316 
317  /// Replace current query tree with the new one.
318  /// CQueryParseTree takes ownership on the passed argument.
319  ///
320  void SetQueryTree(TNode* qtree);
321  const TNode* GetQueryTree() const { return m_Tree.get(); }
322  TNode* GetQueryTree() { return m_Tree.get(); }
323 
324  /// Reset all user objects attached to the parsing tree
325  void ResetUserObjects();
326 
327 
328  /// @name Static node creation functions -
329  /// class factories working as virtual constructors
330  /// @{
331 
332  /// Create Identifier node or string node
333  virtual
334  TNode* CreateNode(const string& value,
335  const string& orig_text,
336  bool isIdent);
337  virtual TNode* CreateNode(Int8 value, const string& orig_text);
338  virtual TNode* CreateNode(bool value, const string& orig_text);
339  virtual TNode* CreateNode(double value, const string& orig_text);
340  virtual
341  TNode* CreateNode(CQueryParseNode::EType op,
342  TNode* arg1,
343  TNode* arg2,
344  const string& orig_text="");
345  /// Create function node
346  virtual
347  TNode* CreateFuncNode(const string& func_name);
348 
349  /// @}
350 
351  /// Print the query tree (debugging)
352  void Print(CNcbiOstream& os) const;
353 
354 private:
357 private:
358  unique_ptr<TNode> m_Tree;
359 };
360 
361 /// Query parser exceptions
362 ///
364 {
365 public:
366  enum EErrCode {
370  eUnknownFunction
371  };
372 
373  virtual const char* GetErrCodeString(void) const override
374  {
375  switch (GetErrCode())
376  {
377  case eIncorrectNodeType: return "eIncorrectNodeType";
378  case eParserError: return "eParserError";
379  case eCompileError: return "eCompileError";
380  case eUnknownFunction: return "eUnknownFunction";
381 
382  default: return CException::GetErrCodeString();
383  }
384  }
385 
387 };
388 
389 
390 /* @} */
391 
393 
394 
395 #endif // UTIL__QUERY_PARSER_HPP__
396 
397 
CObject –.
Definition: ncbiobj.hpp:180
Query parser exceptions.
Query node class.
Definition: query_parse.hpp:79
Query tree and associated utility methods.
definition of a Culling tree
Definition: ncbi_tree.hpp:100
Base class for query node user defined object.
Definition: query_parse.hpp:60
void Print(const CCompactSAMApplication::AlignInfo &ai)
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
unsigned length
Token length (optional)
void SetExplicit(bool expl=true)
void SetElapsed(double e)
Set node timing.
void SetLoc(unsigned line, unsigned pos)
const string & GetOrig() const
const IQueryParseUserObject * GetUserObject() const
Get user object.
SSrcLoc m_Location
Reference to original location in query.
void SetLoc(const SSrcLoc &loc)
Set node location in the query text (for error diagnostics)
double GetElapsed() const
Elapsed time in seconds.
const string & GetOriginalText() const
CQueryParseTree & operator=(const CQueryParseTree &)
void SetNot(bool n=true)
ECase
Case sensitive parsing.
bool IsNot() const
Check if node is marked with NOT flag (like != )
double m_Elapsed
Execution timing.
unsigned pos
Position in the src line.
TNode * GetQueryTree()
NCBI_EXCEPTION_DEFAULT(CQueryParseException, CException)
const TNode * GetQueryTree() const
EType
Query node type.
Definition: query_parse.hpp:84
vector< string > TFunctionNames
List of keywords recognised as functions.
SSrcLoc(unsigned src_line=0, unsigned src_pos=0, unsigned len=0)
bool IsValue() const
Returns TRUE if node is value (INT, String, etc.)
ESyntaxCheck
Level of tolerance to syntax errors and problems.
void SetUserObject(IQueryParseUserObject *obj)
double Elapsed() const
Elapsed time in seconds.
virtual void Reset()=0
Reset user object (for reuse without reallocation)
unsigned line
Src line number.
bool IsExplicit() const
TRUE if node was created as explicitly FALSE - node was created as a result of a default and the inte...
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
CTreeNode< CQueryParseNode > TNode
CQueryParseTree(const CQueryParseTree &)
int GetIdentIdx() const
CRef< IQueryParseUserObject > m_UsrObj
IQueryParseUserObject * GetUserObject()
EType GetType() const
unique_ptr< TNode > m_Tree
const SSrcLoc & GetLoc() const
virtual string GetVisibleValue() const
String value for debuging.
Definition: query_parse.hpp:67
bool IsLogic() const
Returns TRUE if node describes logical operation (AND, OR, etc.)
@ eCaseSensitiveUpper
Operators must come in upper case (AND)
@ eFunction
Function.
Definition: query_parse.hpp:91
@ eIdentifier
Identifier like db.field (Org, Fld12, etc.)
Definition: query_parse.hpp:86
@ eFloatConst
Floating point const.
Definition: query_parse.hpp:88
@ eIntConst
Integer const.
Definition: query_parse.hpp:87
@ eBoolConst
Boolean (TRUE or FALSE)
Definition: query_parse.hpp:89
@ eString
String ("free text")
Definition: query_parse.hpp:90
@ eSyntaxCheck
Best possible check for errors.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
yy_size_t n
int len
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
true_type verbose
Definition: processing.cpp:890
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
Source location (points to the position in the original src) All positions are 0 based.
Modified on Sun Jul 14 04:54:47 2024 by modify_doxy.py rev. 669887