NCBI C++ ToolKit
macro_parse.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef GUI_OBJUTILS___MACRO_PARSER__HPP
2 #define GUI_OBJUTILS___MACRO_PARSER__HPP
3 
4 /* $Id: macro_parse.hpp 45170 2020-06-11 18:16:13Z asztalos $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Anatoly Osipov
30  *
31  * File Description: Macro parser
32  *
33  */
34 
35 /// @file macro_parse.hpp
36 /// Macro parser components
37 
38 #include <corelib/ncbistl.hpp>
39 #include <corelib/ncbistd.hpp>
40 
42 
43 #include <gui/gui_export.h>
45 
46 #include <set>
47 
48 /** @addtogroup GUI_MACRO_SCRIPTS_UTIL
49  *
50  * @{
51  */
52 
54 BEGIN_SCOPE(macro)
55 
56 /// Class for lexical tokens extraction from macro string
57 ///
58 /// sequentially gets macro lexems from the input string
59 class CScanner
60 {
61 private:
62  /// Max buffer size for lexem string info
63  static const int MAX_STRING_LEXEM_LENGTH = 4094;
64 public:
65  enum EToken {
66  eTokenILLEGAL = -1, ///< Error
67  eTokenEOS = 0, ///< If next token is eos
68 
69  eTokenKWMACRO , ///< Keyword macro
70  eTokenKWVARS , ///< Keyword var or vars
71  eTokenKWFOR , ///< Keyword for as a part of for each
72  eTokenKWEACH , ///< Keyword each as a part of for each
73  eTokenKWFROM , ///< Keyword from for specifying named annotation
74  eTokenKWCHOICE , ///< Keyword choice
75  eTokenKWRANGE , ///< Keyword range
76  eTokenKWWHERE , ///< Keyword where
77  eTokenKWDO , ///< Keyword do
78  eTokenKWDONE , ///< Keyword done
79  eTokenKWDOPARL , ///< Keyword do_p(arallel)
80  eTokenKWPARLAUTO , ///< Keyword auto used with do_p keyword
81 
82  eTokenFUNCTION , ///< Built-in function
83  eTokenIDENT , ///< Identifier
84  eTokenASNSELECTOR , ///< ASN selector
85  eTokenNAMEDANNOT , ///< Named annotation
86  eTokenINTNUMBER , ///< Integer number
87  eTokenFLOATNUMBER , ///< Floating point number
88  eTokenSTRING , ///< String in double quotes
89  eTokenASK , ///< String enclosed in % symbols
90  eTokenBOOLCONST , ///< Boolean const value
91 
92  eTokenASSIGNMENT , ///< '=' token
93  eTokenLEFTPAREN , ///< '(' token
94  eTokenRIGHTPAREN , ///< ')' token
95  eTokenLEFTBRACKET , ///< '[' token
96  eTokenRIGHTBRACKET , ///< ']' token
97  eTokenLEFTBRACE , ///< '{' token
98  eTokenRIGHTBRACE , ///< '}' token
99  eTokenCOMMA , ///< ',' token
100  eTokenSEMICOLON , ///< ';' token
101 
102  eTokenMINUS , ///< '-' token
103  eTokenPLUS , ///< '+' token
104 
105  eTokenDOT , ///< '.' token
106  eTokenMULT , ///< '*' token
107  eTokenDIV , ///< '/' token
108  eTokenGREATER , ///< '>' token
109  eTokenLESS , ///< '<' token
110  eTokenNOTLESS , ///< '>=' token
111  eTokenNOTGREATER , ///< '<=' token
112  eTokenNOTEQUAL , ///< '<>' token
113 
114  eTokenCOMMENT ///< Comment read on request
115  };
116 
117  //Types
118  struct SLocation {
119  SLocation() : m_Pos(-1), m_Line(0), m_Column(0) {} // set invalid
120  SLocation(Int4 pos, int line, int col) : m_Pos(pos), m_Line(line), m_Column(col) {}
121 
122  void Reset() { m_Pos = -1; m_Line = 0; m_Column = 0; } // set invalid
123  void IncLine() { m_Pos++; m_Line++; m_Column = 1; }
124  void IncColumn() { m_Pos++; m_Column++; }
125 
126  operator CQueryParseNode::SSrcLoc() const
127  {
128  return CQueryParseNode::SSrcLoc(m_Line - 1, m_Column - 1); // SSrcLoc positions are 0 based
129  }
130 
131  Int4 m_Pos; // Position in string. Zero based.
132  unsigned m_Line; // The first is 1. line
133  unsigned m_Column; // The first is 1.
134  };
135 
136  struct SData {
140  char m_Str[MAX_STRING_LEXEM_LENGTH + 1];
141 
142  void Init()
143  {
144  m_IntNumber = 0;
145  m_FloatNumber = 0.0;
146  *m_Str = '\0';
147  }
148  };
149 
150  struct SLexem {
152  SLocation m_Position; // actual position of the token (it can be also white space)
153 
154  SLocation m_Start; // starting position of the token
155 
157 
158  SLocation m_ErrorLoc; // the position of the error
159  string m_ErrorMsg;
160 
161  string GetStringRepr() const
162  {
163  CNcbiOstrstream oss;
164  switch (m_Token) {
165  case eTokenBOOLCONST:
166  oss << m_Data.m_BoolValue;
167  break;
168  case eTokenINTNUMBER:
169  oss << m_Data.m_IntNumber;
170  break;
171  case eTokenFLOATNUMBER:
172  oss << m_Data.m_FloatNumber;
173  break;
174  default:
175  oss << m_Data.m_Str;
176  }
177  return string(CNcbiOstrstreamToString(oss));
178  }
179  };
180 
181 public:
182  // Constructors
183  CScanner(const char* sMacroText) : m_StrMacro(sMacroText)
184  { x_InitScanner(); }
185  // Destructors
186  virtual ~CScanner(){}
187 
188  void Reset(const char* sMacroText) { m_StrMacro = sMacroText; x_InitScanner(); }
189 
190  // Read the lexem but ASN selector.
191  //
192  int Next(bool parseComment = false);
193  // Read ASN selector lexem.
194  //
195  int NextASNSelector();
196  // Read Named annotation lexem
197  //
198  int NextNamedAnnot();
199  // Get previously read lexem code.
200  //
201  CScanner::EToken GetToken();
202  // Get meta info about lexem.
203  //
204  const SLexem& GetLexemInfo() const { return m_CurrentLexem; }
205  // Get error location in input string.
206  //
207  SLocation GetErrorLocation() const { return m_CurrentLexem.m_ErrorLoc; }
208 
209 private:
210  // Reset scanner to the initial state. Scanned string is untouched.
211  //
212  void x_InitScanner();
213 
214  // Get the content of the string limited by the first symbol.
215  //
216  int x_GetQuotedString();
217  // Skip whitespaces and reset lexem meta info.
218  //
219  void x_InitForNextLexem();
220 
221  int x_SetCurrentLexem(Int4& pos, CScanner::EToken token);
222  // Pointer to the string being scanned.
223  //
224  const char* m_StrMacro;
225  // Pointer to the lexem meta info.
226  //
228 
229 private:
230  /// Prohibit copy constructor and assignment operator
233 };
234 
235 
236 
237 /// Class provides macro language interface for bulk bio-editing
238 ///
239 /// Parses text containing macro(s) into binary macro representation
240 /// (instance of CMacroRep).
241 /// While parsing it uses CScanner for extracting lexems from a string and
242 /// it uses QParse library (util/qparse) for parsing Where-clause of a macro
244 {
245 public:
246  /// Type for the list of functions in Where/Do clauses
247  ///
249 
251  eErrorLineAndColumn, ///< Report the line and column for the error (default)
252  eErrorAbsolutePos ///< Report the position of the error from the beginning of the script
253  };
254 
255  static const char* sm_Automatic;
256 public:
257  /// Constructor
258  ///
259  CMacroParser() : m_StrMacro(0), m_Scanner(0), m_MacroRep(nullptr), m_ErrorReportType(eErrorLineAndColumn)
260  { x_InitParser(); }
261  /// Destructor
262  ///
263  virtual ~CMacroParser() { if (!m_MacroRep) delete m_MacroRep; }
264 
265  /// initializes lists of known functions that should be recognized by the parser when
266  /// going through WHERE and DO clauses
267  ///
268  /// @param wh_funcs
269  /// functions that should be recognized in WHERE
270  /// @param do_funcs
271  /// functions that should be recognized in DO
272  void SetFunctionNames(const TFunctionNamesList& wh_funcs, const TFunctionNamesList& do_funcs);
273  /// get list of where functions
274  ///
275  const TFunctionNamesList& GetWhereFunctions() const { return m_WhereFunctionsList; }
276  /// get list of do functions
277  ///
278  const TFunctionNamesList& GetDoFunctions() const { return m_DoFunctionsList; }
279 
280  /// Reset the parser and sets macro text to be parsed
281  ///
282  void SetSource(const char* sMacroText)
283  { m_StrMacro = sMacroText; x_InitParser(); m_Scanner.Next(); }
284 
285  /// Parse the macro and build its representation
286  /// Function throws CMacroException if parsing is unsuccessful
287  ///
288  /// @param bParseSingleMacro
289  /// if it's true then function will throw an exception if anything but EOF
290  /// exists after one parsed macro.
291  /// @return
292  /// false if already at EOF and nothing was parsed
293  bool Parse(bool bSingleMacroMode = true, CQueryExec* exec=NULL);
294 
295  /// Detach macro representation for futher processing
296  /// Caller is responsible for deletion of returned object
297  //!! maybe use CRef instead [?]
299  CMacroRep* temp = m_MacroRep;
300  m_MacroRep = 0;
301  return temp;
302  }
303 
304  /// Get the error location from the most recent parsing
305  ///
306  //CScanner::SLocation GetErrorLocation();
307  /// Set the report type of error location
308  ///
309  void SetErrLocReportType(EParsingErrReportType type) { m_ErrorReportType = type; }
310 
311 private:
312  /// string to be parsed
313  ///
314  const char* m_StrMacro;
315  /// scanner for extracting tokens and meta info
316  ///
318  /// list of functions available for Where clause
319  ///
321  /// list of functions available for Do clause
322  ///
324  /// list of run-time variables met in the left side of assignment in DO clause
325  ///
327  /// binary representation of macro while parsing
328  ///
330 
331  // for error reporting:
332  /// error location
333  ///
335  /// Specifies the type of error reporting
336  ///
338 
339 private:
340  /// Resets parser to initial state. String to be parsed is not reset.
341  ///
342  void x_InitParser();
343  /// Entry point to parse whole script.
344  ///
345  void x_ParseScript();
346  /// Entry point to parse vars section.
347  ///
348  void x_ParseVariables();
349  /// Entry point to parse choice section.
350  ///
351  void x_ParseChoice();
352  /// Entry point to parse range
353  ///
354  void x_ParseRange();
355  /// Entry point to parse number of threads to be used
356  ///
357  void x_ParseThreadCount();
358  /// Entry point to parse body section.
359  ///
360  void x_ParseBody();
361  /// Entry point to parse where clause.
362  ///
363  void x_ParseWhere();
364  /// Entry point to parse where clause in function call.
365  ///
366  void x_ParseFunctionWhere();
367  /// Entry point to parse DO-DONE section.
368  ///
369  void x_ParseDo();
370  /// Entry point to parse function call.
371  ///
372  void x_ParseFunctionCall(CQueryParseTree::TNode *assignmentNode);
373 
374  void x_ParseMetaData();
375 
376  int x_ParseSign();
377  /// Entry point to parse numeric var value.
378  ///
379  void x_ParseNumericVarValue(const string& var_name, int sign);
380  /// Parses numeric value of a choice variable
381  ///
382  void x_ParseNumericChoiceValue(int sign);
383  /// Parses numeric value of a function parameter
384  ///
385  void x_ParseNumericParameterValue(int sign, CScanner::SLocation start);
386  /// Checks whether its parameter is a name of the registered functions
387  ///
388  bool x_IsFunction(const string& name) const;
389  /// Extracts where clause as a string
390  ///
391  void x_GetWhereClause(Int4 istart, Int4 iend, string& result) const;
392  /// Translates Where clause into a tree.
393  /// User is responsible for deletion of its result.
394  ///
395  CQueryParseTree* x_QParseWhere(const string& str, unsigned line, unsigned linePos) const;
396  /// Sorts the nodes according to a weight function for faster tree evaluation
397  ///
398  void x_SortParseTree(CQueryParseTree::TNode& node) const;
399  /// Extracts Where clause string and builds Where clause tree;
400  /// it uses terminator token to determine the end of Where clause.
401  /// User is responsible for deletion of a resulting tree.
402  ///
403  void x_InternalParseWhere(const CScanner::EToken terminator, string& str, CQueryParseTree*& tree);
404 
405  void x_UpdateErrorLocation();
406 
407  string x_GetErrorLocation() const;
408 
409  string x_GetError();
410 private:
411  /// Prohibit copy constructor and assignment operator
414 };
415 
416 END_SCOPE(macro)
418 
419 /* @} */
420 
421 #endif // GUI_OBJUTILS___MACRO_PARSER__HPP
Class provides macro language interface for bulk bio-editing.
Class for parsed macro representation.
Definition: macro_rep.hpp:254
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Query execution environment holds the function registry and the execution context.
Definition: query_exec.hpp:144
Query tree and associated utility methods.
Class for lexical tokens extraction from macro string.
Definition: macro_parse.hpp:60
definition of a Culling tree
Definition: ncbi_tree.hpp:100
Include a standard set of the NCBI C++ Toolkit most basic headers.
static int type
Definition: getdata.c:31
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
CMacroParser(const CMacroParser &)
Prohibit copy constructor and assignment operator.
static const char * sm_Automatic
const SLexem & GetLexemInfo() const
void SetSource(const char *sMacroText)
Reset the parser and sets macro text to be parsed.
SLocation GetErrorLocation() const
CMacroRep * m_MacroRep
binary representation of macro while parsing
SLexem m_CurrentLexem
virtual ~CScanner()
CMacroRep * DetachMacroRep()
Detach macro representation for futher processing Caller is responsible for deletion of returned obje...
TFunctionNamesList m_DoFunctionsList
list of functions available for Do clause
void SetErrLocReportType(EParsingErrReportType type)
Get the error location from the most recent parsing.
CScanner(const char *sMacroText)
const TFunctionNamesList & GetWhereFunctions() const
get list of where functions
CQueryParseTree::TFunctionNames TFunctionNamesList
Type for the list of functions in Where/Do clauses.
CScanner & operator=(const CScanner &)
CScanner::SLocation m_ErrorLocation
error location
const TFunctionNamesList & GetDoFunctions() const
get list of do functions
string GetStringRepr() const
set< string > m_RTVars
list of run-time variables met in the left side of assignment in DO clause
CScanner(const CScanner &)
Prohibit copy constructor and assignment operator.
CMacroParser & operator=(const CMacroParser &)
CScanner m_Scanner
scanner for extracting tokens and meta info
SLocation m_Position
SLocation(Int4 pos, int line, int col)
CMacroParser()
Constructor.
EParsingErrReportType m_ErrorReportType
Specifies the type of error reporting.
virtual ~CMacroParser()
Destructor.
TFunctionNamesList m_WhereFunctionsList
list of functions available for Where clause
void Reset(const char *sMacroText)
const char * m_StrMacro
const char * m_StrMacro
string to be parsed
char m_Str[MAX_STRING_LEXEM_LENGTH+1]
SLocation m_ErrorLoc
@ eErrorLineAndColumn
Report the line and column for the error (default)
@ eTokenKWDONE
Keyword done.
Definition: macro_parse.hpp:78
@ eTokenKWVARS
Keyword var or vars.
Definition: macro_parse.hpp:70
@ eTokenKWDOPARL
Keyword do_p(arallel)
Definition: macro_parse.hpp:79
@ eTokenKWPARLAUTO
Keyword auto used with do_p keyword.
Definition: macro_parse.hpp:80
@ eTokenNOTEQUAL
'<>' token
@ eTokenRIGHTPAREN
')' token
Definition: macro_parse.hpp:94
@ eTokenKWDO
Keyword do.
Definition: macro_parse.hpp:77
@ eTokenCOMMA
',' token
Definition: macro_parse.hpp:99
@ eTokenKWEACH
Keyword each as a part of for each.
Definition: macro_parse.hpp:72
@ eTokenKWCHOICE
Keyword choice.
Definition: macro_parse.hpp:74
@ eTokenNOTLESS
'>=' token
@ eTokenASNSELECTOR
ASN selector.
Definition: macro_parse.hpp:84
@ eTokenLEFTPAREN
'(' token
Definition: macro_parse.hpp:93
@ eTokenMULT
'*' token
@ eTokenKWMACRO
Keyword macro.
Definition: macro_parse.hpp:69
@ eTokenBOOLCONST
Boolean const value.
Definition: macro_parse.hpp:90
@ eTokenMINUS
'-' token
@ eTokenDIV
'/' token
@ eTokenLEFTBRACE
'{' token
Definition: macro_parse.hpp:97
@ eTokenLEFTBRACKET
'[' token
Definition: macro_parse.hpp:95
@ eTokenFLOATNUMBER
Floating point number.
Definition: macro_parse.hpp:87
@ eTokenKWWHERE
Keyword where.
Definition: macro_parse.hpp:76
@ eTokenASSIGNMENT
'=' token
Definition: macro_parse.hpp:92
@ eTokenNOTGREATER
'<=' token
@ eTokenINTNUMBER
Integer number.
Definition: macro_parse.hpp:86
@ eTokenKWFROM
Keyword from for specifying named annotation.
Definition: macro_parse.hpp:73
@ eTokenSTRING
String in double quotes.
Definition: macro_parse.hpp:88
@ eTokenFUNCTION
Built-in function.
Definition: macro_parse.hpp:82
@ eTokenLESS
'<' token
@ eTokenKWRANGE
Keyword range.
Definition: macro_parse.hpp:75
@ eTokenASK
String enclosed in % symbols.
Definition: macro_parse.hpp:89
@ eTokenGREATER
'>' token
@ eTokenRIGHTBRACE
'}' token
Definition: macro_parse.hpp:98
@ eTokenRIGHTBRACKET
']' token
Definition: macro_parse.hpp:96
@ eTokenSEMICOLON
';' token
@ eTokenIDENT
Identifier.
Definition: macro_parse.hpp:83
@ eTokenKWFOR
Keyword for as a part of for each.
Definition: macro_parse.hpp:71
@ eTokenDOT
'.' token
@ eTokenNAMEDANNOT
Named annotation.
Definition: macro_parse.hpp:85
@ eTokenPLUS
'+' token
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
vector< string > TFunctionNames
List of keywords recognised as functions.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_GUIOBJUTILS_EXPORT
Definition: gui_export.h:512
Defines to provide correct exporting from DLLs in Windows.
#define nullptr
Definition: ncbimisc.hpp:45
The NCBI C++/STL use hints.
static char terminator
Definition: njn_ioutil.cpp:56
Query parser execution implementations.
static const char * str(char *buf, int n)
Definition: stats.c:84
Source location (points to the position in the original src) All positions are 0 based.
Definition: type.c:6
else result
Definition: token2.c:20
Modified on Tue Feb 27 05:54:06 2024 by modify_doxy.py rev. 669887