NCBI C++ ToolKit
multipattern.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: multipattern.cpp 98741 2022-12-29 18:00:10Z gotvyans $
2  * =========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Author: Sema
27  *
28  * File Description:
29  * Main() of Multipattern Search Code Generator
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
36 
38 
39 
41 {
42 public:
43  CMultipatternApp(void);
44  virtual void Init(void);
45  virtual int Run (void);
46 };
47 
48 
50 
51 
53 {
54  // Prepare command line descriptions
55  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
56  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "Multipattern Search Code Generator");
57  arg_desc->AddFlag("A", "Generate an array/map data");
58  arg_desc->AddFlag("D", "Generate DOT graph");
59  arg_desc->AddOptionalKey("i", "InFile", "Input File", CArgDescriptions::eInputFile);
60  arg_desc->AddExtra(0, kMax_UInt, "Search Patterns as /regex/ or \"plain string\"", CArgDescriptions::eString);
61  SetupArgDescriptions(arg_desc.release()); // call CreateArgs
62 }
63 
64 
65 static const pair<string, CMultipatternSearch::TFlags> FlagNames[] = {
66  { "#NO_CASE", CMultipatternSearch::fNoCase },
67  { "#BEGIN_STRING", CMultipatternSearch::fBeginString },
68  { "#END_STRING", CMultipatternSearch::fEndString },
69  { "#WHOLE_STRING", CMultipatternSearch::fWholeString },
70  { "#BEGIN_WORD", CMultipatternSearch::fBeginWord },
71  { "#END_WORD", CMultipatternSearch::fEndWord },
72  { "#WHOLE_WORD", CMultipatternSearch::fWholeWord }
73 };
74 
75 
77 {
78  vector<pair<string, CMultipatternSearch::TFlags>> input;
79  const CArgs& args = GetArgs();
80  string fname;
81  string params;
82  if (args["i"]) {
83  fname = args["i"].AsString();
84  ifstream file(fname);
85  if (!file) {
86  cerr << "Cannot open file \'" << fname << "\'\n";
87  return 1;
88  }
89  std::string line;
90  size_t m;
91  while (std::getline(file, line)) {
92  // input line: <query> [<//comment>]
93  // /regex/ // comment ignored
94  // any text // #NO_CASE #WHOLE_WORD etc...
95  string comment;
96  if ((m = line.find("//")) != string::npos) {
97  comment = line.substr(m);
98  line = line.substr(0, m);
99  }
100  if ((m = line.find_first_not_of(" \t")) != string::npos) {
101  line = line.substr(m);
102  }
103  if ((m = line.find_last_not_of(" \t")) != string::npos) {
104  line = line.substr(0, m + 1);
105  }
106  unsigned int flags = 0;
107  for (auto f: FlagNames) {
108  if (comment.find(f.first) != string::npos) {
109  flags |= f.second;
110  }
111  }
112  input.push_back(pair<string, unsigned int>(line, flags));
113  }
114  if ((m = fname.find_last_of("\\/")) != string::npos) {
115  fname = fname.substr(m + 1);
116  }
117  }
118 
119  for (size_t i = 1; i <= args.GetNExtra(); i++) {
120  string param = args["#" + to_string(i)].AsString();
121  params += " " + param;
122  input.push_back(pair<string, unsigned int>(param, 0));
123  }
125  try {
126  FSM.AddPatterns(input);
127  }
128  catch (string s) {
129  cerr << s << "\n";
130  return 1;
131  }
132  if (args["D"]) {
133  FSM.GenerateDotGraph(cout);
134  }
135  else if (args["A"]) {
136  cout << "//\n// This code was generated by the multipattern application.\n//\n// Command line:\n// multipattern -A";
137  if (!fname.empty()) {
138  cout << " -i " << fname;
139  }
140  cout << "\n//\n";
141  FSM.GenerateArrayMapData(cout);
142  }
143  else {
144  cout << "//\n// This code was generated by the multipattern application.\n//\n// Command line:\n// multipattern";
145  if (!fname.empty()) {
146  cout << " -i " << fname;
147  }
148  cout << "\n//\n";
149  FSM.GenerateSourceCode(cout);
150  }
151 
152  return 0;
153 }
154 
155 
156 int main(int argc, const char* argv[])
157 {
158  return CMultipatternApp().AppMain(argc, argv);
159 }
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
virtual int Run(void)
Run the application.
virtual void Init(void)
Initialize the application.
CMultipatternSearch.
static uch flags
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
size_t GetNExtra(void) const
Get the number of unnamed positional (a.k.a. extra) args.
Definition: ncbiargs.hpp:422
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
string
Definition: cgiapp.hpp:687
#define kMax_UInt
Definition: ncbi_limits.h:185
FILE * file
static int input()
int i
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
static const pair< string, CMultipatternSearch::TFlags > FlagNames[]
Simultaneous search of multiple RegEx patterns in the input string.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
Modified on Sat Dec 02 09:20:48 2023 by modify_doxy.py rev. 669887