NCBI C++ ToolKit
pcre2grep.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16  Copyright (c) 1997-2023 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22  * Redistributions of source code must retain the above copyright notice,
23  this list of conditions and the following disclaimer.
24 
25  * Redistributions in binary form must reproduce the above copyright
26  notice, this list of conditions and the following disclaimer in the
27  documentation and/or other materials provided with the distribution.
28 
29  * Neither the name of the University of Cambridge nor the names of its
30  contributors may be used to endorse or promote products derived from
31  this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62  && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h> /* For _setmode() */
79 #include <fcntl.h> /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117  (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126 
127 #define FALSE 0
128 #define TRUE 1
129 
130 typedef int BOOL;
131 
132 #define DEFAULT_CAPTURE_MAX 50
133 
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139 
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142 
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146 
148 
149 /* File reading styles */
150 
152 
153 /* Actions for the -d and -D options */
154 
156 enum { DEE_READ, DEE_SKIP };
157 
158 /* Actions for special processing options (flag bits) */
159 
160 #define PO_WORD_MATCH 0x0001
161 #define PO_LINE_MATCH 0x0002
162 #define PO_FIXED_STRINGS 0x0004
163 
164 /* Binary file options */
165 
167 
168 /* Return values from decode_dollar_escape() */
169 
171 
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177 
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179 
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185 
186 #ifdef WIN32
187 #define STDOUT_NL "\r\n"
188 #define STDOUT_NL_LEN 2
189 #define QUOT "\""
190 #else
191 #define STDOUT_NL "\n"
192 #define STDOUT_NL_LEN 1
193 #define QUOT "'"
194 #endif
195 
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199 
200 #define STDOUT_NL_CODE 0x7fffffffu
201 
202 
203 
204 /*************************************************
205 * Global variables *
206 *************************************************/
207 
208 static const char *colour_string = "1;31";
209 static const char *colour_option = NULL;
210 static const char *dee_option = NULL;
211 static const char *DEE_option = NULL;
212 static const char *locale = NULL;
213 static const char *newline_arg = NULL;
214 static const char *group_separator = "--";
215 static const char *om_separator = NULL;
216 static const char *stdin_name = "(standard input)";
217 static const char *output_text = NULL;
218 
219 static char *main_buffer = NULL;
220 
221 static const char *printname_nl = STDOUT_NL; /* Changed to NULL for -Z */
222 static int printname_colon = ':'; /* Changed to 0 for -Z */
223 static int printname_hyphen = '-'; /* Changed to 0 for -Z */
224 
225 static int after_context = 0;
226 static int before_context = 0;
228 static int both_context = 0;
229 static int endlinetype;
230 
231 static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
232 static unsigned long int counts_printed = 0;
233 static unsigned long int total_count = 0;
234 
238 
239 #ifdef WIN32
240 static int dee_action = dee_SKIP;
241 #else
242 static int dee_action = dee_READ;
243 #endif
244 
245 static int DEE_action = DEE_READ;
246 static int error_count = 0;
247 static int filenames = FN_DEFAULT;
248 
249 #ifdef SUPPORT_PCRE2GREP_JIT
250 static BOOL use_jit = TRUE;
251 #else
252 static BOOL use_jit = FALSE;
253 #endif
254 
255 static const uint8_t *character_tables = NULL;
256 
262 
267 static int match_data_toggle;
270 
275 #ifdef WIN32
276 static BOOL do_ansi = FALSE;
277 #endif
280 static BOOL invert = FALSE;
284 static BOOL no_ucp = FALSE;
285 static BOOL number = FALSE;
288 static BOOL quiet = FALSE;
290 static BOOL silent = FALSE;
291 static BOOL utf = FALSE;
293 
295 
296 
297 /* Structure for list of --only-matching capturing numbers. */
298 
299 typedef struct omstr {
300  struct omstr *next;
301  int groupnum;
303 
307 
308 /* Structure for holding the two variables that describe a number chain. */
309 
310 typedef struct omdatastr {
314 
316 
317 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318 
319 typedef struct fnstr {
320  struct fnstr *next;
321  char *name;
323 
328 
333 
334 /* Structure for holding the two variables that describe a file name chain. */
335 
336 typedef struct fndatastr {
340 
345 
346 /* Structure for pattern and its compiled form; used for matching patterns and
347 also for include/exclude patterns. */
348 
349 typedef struct patstr {
350  struct patstr *next;
351  char *string;
355 
356 static patstr *patterns = NULL;
366 
367 /* Structure holding the two variables that describe a pattern chain. A pointer
368 to such structures is used for each appropriate option. */
369 
370 typedef struct patdatastr {
374 
380 
383 
384 static const char *incexname[4] = { "--include", "--exclude",
385  "--include-dir", "--exclude-dir" };
386 
387 /* Structure for options and list of them */
388 
391 
392 typedef struct option_item {
393  int type;
394  int one_char;
395  void *dataptr;
396  const char *long_name;
397  const char *help_text;
399 
400 /* Options without a single-letter equivalent get a negative value. This can be
401 used to identify them. */
402 
403 #define N_COLOUR (-1)
404 #define N_EXCLUDE (-2)
405 #define N_EXCLUDE_DIR (-3)
406 #define N_HELP (-4)
407 #define N_INCLUDE (-5)
408 #define N_INCLUDE_DIR (-6)
409 #define N_LABEL (-7)
410 #define N_LOCALE (-8)
411 #define N_NULL (-9)
412 #define N_LOFFSETS (-10)
413 #define N_FOFFSETS (-11)
414 #define N_LBUFFER (-12)
415 #define N_H_LIMIT (-13)
416 #define N_M_LIMIT (-14)
417 #define N_M_LIMIT_DEP (-15)
418 #define N_BUFSIZE (-16)
419 #define N_NOJIT (-17)
420 #define N_FILE_LIST (-18)
421 #define N_BINARY_FILES (-19)
422 #define N_EXCLUDE_FROM (-20)
423 #define N_INCLUDE_FROM (-21)
424 #define N_OM_SEPARATOR (-22)
425 #define N_MAX_BUFSIZE (-23)
426 #define N_OM_CAPTURE (-24)
427 #define N_ALLABSK (-25)
428 #define N_POSIX_DIGIT (-26)
429 #define N_GROUP_SEPARATOR (-27)
430 #define N_NO_GROUP_SEPARATOR (-28)
431 
433  { OP_NODATA, N_NULL, NULL, "", "terminate options" },
434  { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
435  { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
436  { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
437  { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
438  { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
439  { OP_SIZE, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
440  { OP_SIZE, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
441  { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
442  { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
443  { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
444  { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
445  { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
446  { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
447  { OP_NODATA, N_POSIX_DIGIT, NULL, "posix-digit", "\\d always matches [0-9], even in UTF/UCP mode" },
448  { OP_NODATA, 'E', NULL, "case-restrict", "restrict case matching (no mix ASCII/non-ASCII)" },
449  { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
450  { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
451  { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
452  { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
453  { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
454  { OP_STRING, N_GROUP_SEPARATOR, &group_separator, "group-separator=text", "set separator between groups of lines" },
455  { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
456  { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
457  { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
458  { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
459  { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
460  { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
461  { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
462  { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
463  { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
464  { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
465  { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
466  { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
467  { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
468  { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
469  { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
470  { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
471  { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
472  { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
473 #ifdef SUPPORT_PCRE2GREP_JIT
474  { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
475 #else
476  { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
477 #endif
478  { OP_NODATA, N_NO_GROUP_SEPARATOR, NULL, "no-group-separator", "suppress separators between groups of lines" },
479  { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
480  { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
481  { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
482  { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
483  { OP_NODATA, 'P', NULL, "no-ucp", "do not enable UCP mode with Unicode" },
484  { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
485  { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
486  { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
487  { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
488  { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
489  { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
490  { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
491  { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
492  { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
493  { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
494  { OP_NODATA, 'u', NULL, "utf", "use UTF/Unicode" },
495  { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF/Unicode, allow for invalid code units" },
496  { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
497  { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
498  { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
499  { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
500  { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
501  { OP_NODATA, 'Z', NULL, "null", "output 0 byte after file names" },
502  { OP_NODATA, 0, NULL, NULL, NULL }
503 };
504 
505 /* Table of names for newline types. Must be kept in step with the definitions
506 of PCRE2_NEWLINE_xx in pcre2.h. */
507 
508 static const char *newlines[] = {
509  "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
510 
511 /* UTF-8 tables */
512 
513 const int utf8_table1[] =
514  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
515 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
516 
517 const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
518 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
519 
520 const char utf8_table4[] = {
521  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
522  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
523  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
524  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
525 
526 
527 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
528 /*************************************************
529 * Emulated memmove() for systems without it *
530 *************************************************/
531 
532 /* This function can make use of bcopy() if it is available. Otherwise do it by
533 steam, as there are some non-Unix environments that lack both memmove() and
534 bcopy(). */
535 
536 static void *
537 emulated_memmove(void *d, const void *s, size_t n)
538 {
539 #ifdef HAVE_BCOPY
540 bcopy(s, d, n);
541 return d;
542 #else
543 size_t i;
544 unsigned char *dest = (unsigned char *)d;
545 const unsigned char *src = (const unsigned char *)s;
546 if (dest > src)
547  {
548  dest += n;
549  src += n;
550  for (i = 0; i < n; ++i) *(--dest) = *(--src);
551  return (void *)dest;
552  }
553 else
554  {
555  for (i = 0; i < n; ++i) *dest++ = *src++;
556  return (void *)(dest - n);
557  }
558 #endif /* not HAVE_BCOPY */
559 }
560 #undef memmove
561 #define memmove(d,s,n) emulated_memmove(d,s,n)
562 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
563 
564 
565 
566 /*************************************************
567 * Convert code point to UTF-8 *
568 *************************************************/
569 
570 /* A static buffer is used. Returns the number of bytes. */
571 
572 static int
574 {
575 int i, j;
576 uint8_t *utf8bytes = utf8_buffer;
577 for (i = 0; i < utf8_table1_size; i++)
578  if (value <= (uint32_t)utf8_table1[i]) break;
579 utf8bytes += i;
580 for (j = i; j > 0; j--)
581  {
582  *utf8bytes-- = 0x80 | (value & 0x3f);
583  value >>= 6;
584  }
585 *utf8bytes = utf8_table2[i] | value;
586 return i + 1;
587 }
588 
589 
590 
591 /*************************************************
592 * Case-independent string compare *
593 *************************************************/
594 
595 static int
596 strcmpic(const char *str1, const char *str2)
597 {
598 unsigned int c1, c2;
599 while (*str1 != '\0' || *str2 != '\0')
600  {
601  c1 = tolower(*str1++);
602  c2 = tolower(*str2++);
603  if (c1 != c2) return ((c1 > c2) << 1) - 1;
604  }
605 return 0;
606 }
607 
608 
609 /*************************************************
610 * Parse GREP_COLORS *
611 *************************************************/
612 
613 /* Extract ms or mt from GREP_COLORS.
614 
615 Argument: the string, possibly NULL
616 Returns: the value of ms or mt, or NULL if neither present
617 */
618 
619 static char *
620 parse_grep_colors(const char *gc)
621 {
622 static char seq[16];
623 char *col;
624 uint32_t len;
625 if (gc == NULL) return NULL;
626 col = strstr(gc, "ms=");
627 if (col == NULL) col = strstr(gc, "mt=");
628 if (col == NULL) return NULL;
629 len = 0;
630 col += 3;
631 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
632  seq[len++] = *col++;
633 seq[len] = 0;
634 return seq;
635 }
636 
637 
638 /*************************************************
639 * Exit from the program *
640 *************************************************/
641 
642 /* If there has been a resource error, give a suitable message.
643 
644 Argument: the return code
645 Returns: does not return
646 */
647 
648 static void
650 {
651 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
652 status of 1, which is not helpful. To help with this problem, define a symbol
653 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
654 therein. */
655 
656 #ifdef __VMS
657  char val_buf[4];
658  $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
659  $DESCRIPTOR(sym_val, val_buf);
660  sprintf(val_buf, "%d", rc);
661  sym_val.dsc$w_length = strlen(val_buf);
662  lib$set_symbol(&sym_nam, &sym_val);
663 #endif
664 
665 if (resource_error)
666  {
667  fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
668  "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
670  fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
671  }
672 exit(rc);
673 }
674 
675 
676 /*************************************************
677 * Add item to chain of patterns *
678 *************************************************/
679 
680 /* Used to add an item onto a chain, or just return an unconnected item if the
681 "after" argument is NULL.
682 
683 Arguments:
684  s pattern string to add
685  patlen length of pattern
686  after if not NULL points to item to insert after
687 
688 Returns: new pattern block or NULL on error
689 */
690 
691 static patstr *
692 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
693 {
694 patstr *p = (patstr *)malloc(sizeof(patstr));
695 
696 /* LCOV_EXCL_START - These won't be hit in normal testing. */
697 
698 if (p == NULL)
699  {
700  fprintf(stderr, "pcre2grep: malloc failed\n");
701  pcre2grep_exit(2);
702  }
703 if (patlen > MAXPATLEN)
704  {
705  fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
706  MAXPATLEN);
707  free(p);
708  return NULL;
709  }
710 
711 /* LCOV_EXCL_STOP */
712 
713 p->next = NULL;
714 p->string = s;
715 p->length = patlen;
716 p->compiled = NULL;
717 
718 if (after != NULL)
719  {
720  p->next = after->next;
721  after->next = p;
722  }
723 return p;
724 }
725 
726 
727 /*************************************************
728 * Free chain of patterns *
729 *************************************************/
730 
731 /* Used for several chains of patterns.
732 
733 Argument: pointer to start of chain
734 Returns: nothing
735 */
736 
737 static void
739 {
740 while (pc != NULL)
741  {
742  patstr *p = pc;
743  pc = p->next;
744  if (p->compiled != NULL) pcre2_code_free(p->compiled);
745  free(p);
746  }
747 }
748 
749 
750 /*************************************************
751 * Free chain of file names *
752 *************************************************/
753 
754 /*
755 Argument: pointer to start of chain
756 Returns: nothing
757 */
758 
759 static void
761 {
762 while (fn != NULL)
763  {
764  fnstr *f = fn;
765  fn = f->next;
766  free(f);
767  }
768 }
769 
770 
771 /*************************************************
772 * OS-specific functions *
773 *************************************************/
774 
775 /* These definitions are needed in all Windows environments, even those where
776 Unix-style directory scanning can be used (see below). */
777 
778 #ifdef WIN32
779 
780 #ifndef STRICT
781 # define STRICT
782 #endif
783 #ifndef WIN32_LEAN_AND_MEAN
784 # define WIN32_LEAN_AND_MEAN
785 #endif
786 
787 #include <windows.h>
788 
789 #define iswild(name) (strpbrk(name, "*?") != NULL)
790 
791 /* Convert ANSI BGR format to RGB used by Windows */
792 #define BGR_RGB(x) (((x) & 1 ? 4 : 0) | ((x) & 2) | ((x) & 4 ? 1 : 0))
793 
794 static HANDLE hstdout;
795 static CONSOLE_SCREEN_BUFFER_INFO csbi;
796 static WORD match_colour;
797 
798 static WORD
799 decode_ANSI_colour(const char *cs)
800 {
801 WORD result = csbi.wAttributes;
802 while (*cs)
803  {
804  if (isdigit((unsigned char)(*cs)))
805  {
806  int code = atoi(cs);
807  if (code == 1) result |= 0x08;
808  else if (code == 4) result |= 0x8000;
809  else if (code == 5) result |= 0x80;
810  else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
811  else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
812  else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
813  else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
814  /* aixterm high intensity colour codes */
815  else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
816  else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
817 
818  while (isdigit((unsigned char)(*cs))) cs++;
819  }
820  if (*cs) cs++;
821  }
822 return result;
823 }
824 
825 
826 static void
827 init_colour_output()
828 {
829 if (do_colour)
830  {
831  hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
832  /* This fails when redirected to con; try again if so. */
833  if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
834  {
835  HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
836  FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
837  GetConsoleScreenBufferInfo(hcon, &csbi);
838  CloseHandle(hcon);
839  }
840  match_colour = decode_ANSI_colour(colour_string);
841  /* No valid colour found - turn off colouring */
842  if (!match_colour) do_colour = FALSE;
843  }
844 }
845 
846 #endif /* WIN32 */
847 
848 
849 /* The following sets of functions are defined so that they can be made system
850 specific. At present there are versions for Unix-style environments, Windows,
851 native z/OS, and "no support". */
852 
853 
854 /************* Directory scanning Unix-style and z/OS ***********/
855 
856 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
857 #include <sys/types.h>
858 #include <sys/stat.h>
859 #include <dirent.h>
860 
861 #if defined NATIVE_ZOS
862 /************* Directory and PDS/E scanning for z/OS ***********/
863 /************* z/OS looks mostly like Unix with USS ************/
864 /* However, z/OS needs the #include statements in this header */
865 #include "pcrzosfs.h"
866 /* That header is not included in the main PCRE distribution because
867  other apparatus is needed to compile pcre2grep for z/OS. The header
868  can be found in the special z/OS distribution, which is available
869  from www.zaconsultants.net or from www.cbttape.org. */
870 #endif
871 
872 typedef DIR directory_type;
873 #define FILESEP '/'
874 
875 static int
876 isdirectory(char *filename)
877 {
878 struct stat statbuf;
879 if (stat(filename, &statbuf) < 0)
880  return 0; /* In the expectation that opening as a file will fail */
881 return S_ISDIR(statbuf.st_mode);
882 }
883 
884 static directory_type *
885 opendirectory(char *filename)
886 {
887 return opendir(filename);
888 }
889 
890 static char *
892 {
893 for (;;)
894  {
895  struct dirent *dent = readdir(dir);
896  if (dent == NULL) return NULL;
897  if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
898  return dent->d_name;
899  }
900 /* Control never reaches here */
901 }
902 
903 static void
905 {
906 closedir(dir);
907 }
908 
909 
910 /************* Test for regular file, Unix-style **********/
911 
912 static int
913 isregfile(char *filename)
914 {
915 struct stat statbuf;
916 if (stat(filename, &statbuf) < 0)
917  return 1; /* In the expectation that opening as a file will fail */
918 return S_ISREG(statbuf.st_mode);
919 }
920 
921 
922 #if defined NATIVE_ZOS
923 /************* Test for a terminal in z/OS **********/
924 /* isatty() does not work in a TSO environment, so always give FALSE.*/
925 
926 static BOOL
927 is_stdout_tty(void)
928 {
929 return FALSE;
930 }
931 
932 static BOOL
933 is_file_tty(FILE *f)
934 {
935 return FALSE;
936 }
937 
938 
939 /************* Test for a terminal, Unix-style **********/
940 
941 #else
942 static BOOL
943 is_stdout_tty(void)
944 {
945 return isatty(fileno(stdout));
946 }
947 
948 static BOOL
949 is_file_tty(FILE *f)
950 {
951 return isatty(fileno(f));
952 }
953 #endif
954 
955 
956 /************* Print optionally coloured match Unix-style and z/OS **********/
957 
958 static void
959 print_match(const void *buf, int length)
960 {
961 if (length == 0) return;
962 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
963 FWRITE_IGNORE(buf, 1, length, stdout);
964 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
965 }
966 
967 /* End of Unix-style or native z/OS environment functions. */
968 
969 
970 /************* Directory scanning in Windows ***********/
971 
972 /* I (Philip Hazel) have no means of testing this code. It was contributed by
973 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
974 when it did not exist. David Byron added a patch that moved the #include of
975 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
976 */
977 
978 #elif defined WIN32
979 
980 #ifndef INVALID_FILE_ATTRIBUTES
981 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
982 #endif
983 
984 typedef struct directory_type
985 {
986 HANDLE handle;
987 BOOL first;
988 WIN32_FIND_DATA data;
990 
991 #define FILESEP '/'
992 
993 int
994 isdirectory(char *filename)
995 {
996 DWORD attr = GetFileAttributes(filename);
997 if (attr == INVALID_FILE_ATTRIBUTES)
998  return 0;
999 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
1000 }
1001 
1003 opendirectory(char *filename)
1004 {
1005 size_t len;
1006 char *pattern;
1007 directory_type *dir;
1008 DWORD err;
1009 len = strlen(filename);
1010 pattern = (char *)malloc(len + 3);
1011 dir = (directory_type *)malloc(sizeof(*dir));
1012 if ((pattern == NULL) || (dir == NULL))
1013  {
1014  fprintf(stderr, "pcre2grep: malloc failed\n");
1015  pcre2grep_exit(2);
1016  }
1017 memcpy(pattern, filename, len);
1018 if (iswild(filename))
1019  pattern[len] = 0;
1020 else
1021  memcpy(&(pattern[len]), "\\*", 3);
1022 dir->handle = FindFirstFile(pattern, &(dir->data));
1023 if (dir->handle != INVALID_HANDLE_VALUE)
1024  {
1025  free(pattern);
1026  dir->first = TRUE;
1027  return dir;
1028  }
1029 err = GetLastError();
1030 free(pattern);
1031 free(dir);
1032 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1033 return NULL;
1034 }
1035 
1036 char *
1038 {
1039 for (;;)
1040  {
1041  if (!dir->first)
1042  {
1043  if (!FindNextFile(dir->handle, &(dir->data)))
1044  return NULL;
1045  }
1046  else
1047  {
1048  dir->first = FALSE;
1049  }
1050  if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1051  return dir->data.cFileName;
1052  }
1053 #ifndef _MSC_VER
1054 return NULL; /* Keep compiler happy; never executed */
1055 #endif
1056 }
1057 
1058 void
1060 {
1061 FindClose(dir->handle);
1062 free(dir);
1063 }
1064 
1065 
1066 /************* Test for regular file in Windows **********/
1067 
1068 /* I don't know how to do this, or if it can be done; assume all paths are
1069 regular if they are not directories. */
1070 
1071 int isregfile(char *filename)
1072 {
1073 return !isdirectory(filename);
1074 }
1075 
1076 
1077 /************* Test for a terminal in Windows **********/
1078 
1079 static BOOL
1080 is_stdout_tty(void)
1081 {
1082 return _isatty(_fileno(stdout));
1083 }
1084 
1085 static BOOL
1086 is_file_tty(FILE *f)
1087 {
1088 return _isatty(_fileno(f));
1089 }
1090 
1091 
1092 /************* Print optionally coloured match in Windows **********/
1093 
1094 static void
1095 print_match(const void *buf, int length)
1096 {
1097 if (length == 0) return;
1098 if (do_colour)
1099  {
1100  if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1101  else SetConsoleTextAttribute(hstdout, match_colour);
1102  }
1103 FWRITE_IGNORE(buf, 1, length, stdout);
1104 if (do_colour)
1105  {
1106  if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1107  else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1108  }
1109 }
1110 
1111 /* End of Windows functions */
1112 
1113 
1114 /************* Directory scanning when we can't do it ***********/
1115 
1116 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1117 
1118 #else
1119 
1120 #define FILESEP 0
1121 typedef void directory_type;
1122 
1123 int isdirectory(char *filename) { return 0; }
1124 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
1125 char *readdirectory(directory_type *dir) { return (char*)0;}
1127 
1128 
1129 /************* Test for regular file when we can't do it **********/
1130 
1131 /* Assume all files are regular. */
1132 
1133 int isregfile(char *filename) { return 1; }
1134 
1135 
1136 /************* Test for a terminal when we can't do it **********/
1137 
1138 static BOOL
1140 {
1141 return FALSE;
1142 }
1143 
1144 static BOOL
1146 {
1147 return FALSE;
1148 }
1149 
1150 
1151 /************* Print optionally coloured match when we can't do it **********/
1152 
1153 static void
1154 print_match(const void *buf, int length)
1155 {
1156 if (length == 0) return;
1157 FWRITE_IGNORE(buf, 1, length, stdout);
1158 }
1159 
1160 #endif /* End of system-specific functions */
1161 
1162 
1163 
1164 #ifndef HAVE_STRERROR
1165 /*************************************************
1166 * Provide strerror() for non-ANSI libraries *
1167 *************************************************/
1168 
1169 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1170 in their libraries, but can provide the same facility by this simple
1171 alternative function. */
1172 
1173 extern int sys_nerr;
1174 extern char *sys_errlist[];
1175 
1176 char *
1178 {
1179 if (n < 0 || n >= sys_nerr) return "unknown error number";
1180 return sys_errlist[n];
1181 }
1182 #endif /* HAVE_STRERROR */
1183 
1184 
1185 
1186 /*************************************************
1187 * Usage function *
1188 *************************************************/
1189 
1190 static int
1191 usage(int rc)
1192 {
1193 option_item *op;
1194 fprintf(stderr, "Usage: pcre2grep [-");
1195 for (op = optionlist; op->one_char != 0; op++)
1196  {
1197  if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1198  }
1199 fprintf(stderr, "] [long options] [pattern] [files]\n");
1200 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1201  "options.\n");
1202 return rc;
1203 }
1204 
1205 
1206 
1207 /*************************************************
1208 * Help function *
1209 *************************************************/
1210 
1211 static void
1212 help(void)
1213 {
1214 option_item *op;
1215 
1216 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1217 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1218 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1219 
1220 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1221 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1222 printf("All callout scripts in patterns are supported." STDOUT_NL);
1223 #else
1224 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1225 #endif
1226 #else
1227 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1228 #endif
1229 
1230 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1231 
1232 #ifdef SUPPORT_LIBZ
1233 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1234 #endif
1235 
1236 #ifdef SUPPORT_LIBBZ2
1237 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1238 #endif
1239 
1240 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1241 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1242 #else
1243 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1244 #endif
1245 
1246 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1247 printf("Options:" STDOUT_NL);
1248 
1249 for (op = optionlist; op->one_char != 0; op++)
1250  {
1251  int n;
1252  char s[4];
1253 
1254  if (op->one_char > 0 && (op->long_name)[0] == 0)
1255  n = 31 - printf(" -%c", op->one_char);
1256  else
1257  {
1258  if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1259  else strcpy(s, " ");
1260  n = 31 - printf(" %s --%s", s, op->long_name);
1261  }
1262 
1263  if (n < 1) n = 1;
1264  printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1265  }
1266 
1267 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1268 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1269 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1270 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1271 printf("space is removed and blank lines are ignored." STDOUT_NL);
1272 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1273 
1274 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1275 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1276 }
1277 
1278 
1279 
1280 /*************************************************
1281 * Test exclude/includes *
1282 *************************************************/
1283 
1284 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1285 there are no includes, the path must match an include pattern.
1286 
1287 Arguments:
1288  path the path to be matched
1289  ip the chain of include patterns
1290  ep the chain of exclude patterns
1291 
1292 Returns: TRUE if the path is not excluded
1293 */
1294 
1295 static BOOL
1296 test_incexc(char *path, patstr *ip, patstr *ep)
1297 {
1298 int plen = strlen((const char *)path);
1299 
1300 for (; ep != NULL; ep = ep->next)
1301  {
1302  if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1303  return FALSE;
1304  }
1305 
1306 if (ip == NULL) return TRUE;
1307 
1308 for (; ip != NULL; ip = ip->next)
1309  {
1310  if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1311  return TRUE;
1312  }
1313 
1314 return FALSE;
1315 }
1316 
1317 
1318 
1319 /*************************************************
1320 * Decode integer argument value *
1321 *************************************************/
1322 
1323 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1324 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1325 just keep it simple.
1326 
1327 Arguments:
1328  option_data the option data string
1329  op the option item (for error messages)
1330  longop TRUE if option given in long form
1331 
1332 Returns: a long integer
1333 */
1334 
1335 static long int
1336 decode_number(char *option_data, option_item *op, BOOL longop)
1337 {
1338 unsigned long int n = 0;
1339 char *endptr = option_data;
1340 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1341 while (isdigit((unsigned char)(*endptr)))
1342  n = n * 10 + (int)(*endptr++ - '0');
1343 if (toupper(*endptr) == 'K')
1344  {
1345  n *= 1024;
1346  endptr++;
1347  }
1348 else if (toupper(*endptr) == 'M')
1349  {
1350  n *= 1024*1024;
1351  endptr++;
1352  }
1353 
1354 if (*endptr != 0) /* Error */
1355  {
1356  if (longop)
1357  {
1358  char *equals = strchr(op->long_name, '=');
1359  int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1360  (int)(equals - op->long_name);
1361  fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1362  option_data, nlen, op->long_name);
1363  }
1364  else
1365  fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1366  option_data, op->one_char);
1367  pcre2grep_exit(usage(2));
1368  }
1369 
1370 return n;
1371 }
1372 
1373 
1374 
1375 /*************************************************
1376 * Add item to a chain of numbers *
1377 *************************************************/
1378 
1379 /* Used to add an item onto a chain, or just return an unconnected item if the
1380 "after" argument is NULL.
1381 
1382 Arguments:
1383  n the number to add
1384  after if not NULL points to item to insert after
1385 
1386 Returns: new number block
1387 */
1388 
1389 static omstr *
1390 add_number(int n, omstr *after)
1391 {
1392 omstr *om = (omstr *)malloc(sizeof(omstr));
1393 
1394 /* LCOV_EXCL_START - These lines won't be hit in normal testing. */
1395 
1396 if (om == NULL)
1397  {
1398  fprintf(stderr, "pcre2grep: malloc failed\n");
1399  pcre2grep_exit(2);
1400  }
1401 
1402 /* LCOV_EXCL_STOP */
1403 
1404 om->next = NULL;
1405 om->groupnum = n;
1406 
1407 if (after != NULL)
1408  {
1409  om->next = after->next;
1410  after->next = om;
1411  }
1412 return om;
1413 }
1414 
1415 
1416 
1417 /*************************************************
1418 * Read one line of input *
1419 *************************************************/
1420 
1421 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1422 BZ2_read) into a large buffer, so many lines may be read at once. However,
1423 doing this for tty input means that no output appears until a lot of input has
1424 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1425 for this, because it does not stop at a binary zero, and therefore there is no
1426 way of telling how many characters it has read, because there may be binary
1427 zeros embedded in the data. This function is also used for reading patterns
1428 from files (the -f option).
1429 
1430 Arguments:
1431  buffer the buffer to read into
1432  length the maximum number of characters to read
1433  f the file
1434 
1435 Returns: the number of characters read, zero at end of file
1436 */
1437 
1438 static PCRE2_SIZE
1439 read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
1440 {
1441 int c;
1442 PCRE2_SIZE yield = 0;
1443 while ((c = fgetc(f)) != EOF)
1444  {
1445  buffer[yield++] = c;
1446  if (c == '\n' || yield >= length) break;
1447  }
1448 return yield;
1449 }
1450 
1451 
1452 
1453 /*************************************************
1454 * Find end of line *
1455 *************************************************/
1456 
1457 /* The length of the endline sequence that is found is set via lenptr. This may
1458 be zero at the very end of the file if there is no line-ending sequence there.
1459 
1460 Arguments:
1461  p current position in line
1462  endptr end of available data
1463  lenptr where to put the length of the eol sequence
1464 
1465 Returns: pointer after the last byte of the line,
1466  including the newline byte(s)
1467 */
1468 
1469 static char *
1470 end_of_line(char *p, char *endptr, int *lenptr)
1471 {
1472 switch(endlinetype)
1473  {
1474  default: /* Just in case */
1475  case PCRE2_NEWLINE_LF:
1476  while (p < endptr && *p != '\n') p++;
1477  if (p < endptr)
1478  {
1479  *lenptr = 1;
1480  return p + 1;
1481  }
1482  *lenptr = 0;
1483  return endptr;
1484 
1485  case PCRE2_NEWLINE_CR:
1486  while (p < endptr && *p != '\r') p++;
1487  if (p < endptr)
1488  {
1489  *lenptr = 1;
1490  return p + 1;
1491  }
1492  *lenptr = 0;
1493  return endptr;
1494 
1495  case PCRE2_NEWLINE_NUL:
1496  while (p < endptr && *p != '\0') p++;
1497  if (p < endptr)
1498  {
1499  *lenptr = 1;
1500  return p + 1;
1501  }
1502  *lenptr = 0;
1503  return endptr;
1504 
1505  case PCRE2_NEWLINE_CRLF:
1506  for (;;)
1507  {
1508  while (p < endptr && *p != '\r') p++;
1509  if (++p >= endptr)
1510  {
1511  *lenptr = 0;
1512  return endptr;
1513  }
1514  if (*p == '\n')
1515  {
1516  *lenptr = 2;
1517  return p + 1;
1518  }
1519  }
1520  break;
1521 
1522  case PCRE2_NEWLINE_ANYCRLF:
1523  while (p < endptr)
1524  {
1525  int extra = 0;
1526  int c = *((unsigned char *)p);
1527 
1528  if (utf && c >= 0xc0)
1529  {
1530  int gcii, gcss;
1531  extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1532  gcss = 6*extra;
1533  c = (c & utf8_table3[extra]) << gcss;
1534  for (gcii = 1; gcii <= extra; gcii++)
1535  {
1536  gcss -= 6;
1537  c |= (p[gcii] & 0x3f) << gcss;
1538  }
1539  }
1540 
1541  p += 1 + extra;
1542 
1543  switch (c)
1544  {
1545  case '\n':
1546  *lenptr = 1;
1547  return p;
1548 
1549  case '\r':
1550  if (p < endptr && *p == '\n')
1551  {
1552  *lenptr = 2;
1553  p++;
1554  }
1555  else *lenptr = 1;
1556  return p;
1557 
1558  default:
1559  break;
1560  }
1561  } /* End of loop for ANYCRLF case */
1562 
1563  *lenptr = 0; /* Must have hit the end */
1564  return endptr;
1565 
1566  case PCRE2_NEWLINE_ANY:
1567  while (p < endptr)
1568  {
1569  int extra = 0;
1570  int c = *((unsigned char *)p);
1571 
1572  if (utf && c >= 0xc0)
1573  {
1574  int gcii, gcss;
1575  extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1576  gcss = 6*extra;
1577  c = (c & utf8_table3[extra]) << gcss;
1578  for (gcii = 1; gcii <= extra; gcii++)
1579  {
1580  gcss -= 6;
1581  c |= (p[gcii] & 0x3f) << gcss;
1582  }
1583  }
1584 
1585  p += 1 + extra;
1586 
1587  switch (c)
1588  {
1589  case '\n': /* LF */
1590  case '\v': /* VT */
1591  case '\f': /* FF */
1592  *lenptr = 1;
1593  return p;
1594 
1595  case '\r': /* CR */
1596  if (p < endptr && *p == '\n')
1597  {
1598  *lenptr = 2;
1599  p++;
1600  }
1601  else *lenptr = 1;
1602  return p;
1603 
1604 #ifndef EBCDIC
1605  case 0x85: /* Unicode NEL */
1606  *lenptr = utf? 2 : 1;
1607  return p;
1608 
1609  case 0x2028: /* Unicode LS */
1610  case 0x2029: /* Unicode PS */
1611  *lenptr = 3;
1612  return p;
1613 #endif /* Not EBCDIC */
1614 
1615  default:
1616  break;
1617  }
1618  } /* End of loop for ANY case */
1619 
1620  *lenptr = 0; /* Must have hit the end */
1621  return endptr;
1622  } /* End of overall switch */
1623 }
1624 
1625 
1626 
1627 /*************************************************
1628 * Find start of previous line *
1629 *************************************************/
1630 
1631 /* This is called when looking back for before lines to print.
1632 
1633 Arguments:
1634  p start of the subsequent line
1635  startptr start of available data
1636 
1637 Returns: pointer to the start of the previous line
1638 */
1639 
1640 static char *
1641 previous_line(char *p, char *startptr)
1642 {
1643 switch(endlinetype)
1644  {
1645  default: /* Just in case */
1646  case PCRE2_NEWLINE_LF:
1647  p--;
1648  while (p > startptr && p[-1] != '\n') p--;
1649  return p;
1650 
1651  case PCRE2_NEWLINE_CR:
1652  p--;
1653  while (p > startptr && p[-1] != '\n') p--;
1654  return p;
1655 
1656  case PCRE2_NEWLINE_NUL:
1657  p--;
1658  while (p > startptr && p[-1] != '\0') p--;
1659  return p;
1660 
1661  case PCRE2_NEWLINE_CRLF:
1662  for (;;)
1663  {
1664  p -= 2;
1665  while (p > startptr && p[-1] != '\n') p--;
1666  if (p <= startptr + 1 || p[-2] == '\r') return p;
1667  }
1668  /* Control can never get here */
1669 
1670  case PCRE2_NEWLINE_ANY:
1671  case PCRE2_NEWLINE_ANYCRLF:
1672  if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1673  if (utf) while ((*p & 0xc0) == 0x80) p--;
1674 
1675  while (p > startptr)
1676  {
1677  unsigned int c;
1678  char *pp = p - 1;
1679 
1680  if (utf)
1681  {
1682  int extra = 0;
1683  while ((*pp & 0xc0) == 0x80) pp--;
1684  c = *((unsigned char *)pp);
1685  if (c >= 0xc0)
1686  {
1687  int gcii, gcss;
1688  extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1689  gcss = 6*extra;
1690  c = (c & utf8_table3[extra]) << gcss;
1691  for (gcii = 1; gcii <= extra; gcii++)
1692  {
1693  gcss -= 6;
1694  c |= (pp[gcii] & 0x3f) << gcss;
1695  }
1696  }
1697  }
1698  else c = *((unsigned char *)pp);
1699 
1700  if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1701  {
1702  case '\n': /* LF */
1703  case '\r': /* CR */
1704  return p;
1705 
1706  default:
1707  break;
1708  }
1709 
1710  else switch (c)
1711  {
1712  case '\n': /* LF */
1713  case '\v': /* VT */
1714  case '\f': /* FF */
1715  case '\r': /* CR */
1716 #ifndef EBCDIC
1717  case 0x85: /* Unicode NEL */
1718  case 0x2028: /* Unicode LS */
1719  case 0x2029: /* Unicode PS */
1720 #endif /* Not EBCDIC */
1721  return p;
1722 
1723  default:
1724  break;
1725  }
1726 
1727  p = pp; /* Back one character */
1728  } /* End of loop for ANY case */
1729 
1730  return startptr; /* Hit start of data */
1731  } /* End of overall switch */
1732 }
1733 
1734 
1735 
1736 /*************************************************
1737 * Output newline at end *
1738 *************************************************/
1739 
1740 /* This function is called if the final line of a file has been written to
1741 stdout, but it does not have a terminating newline.
1742 
1743 Arguments: none
1744 Returns: nothing
1745 */
1746 
1747 static void
1749 {
1750 switch(endlinetype)
1751  {
1752  default: /* Just in case */
1753  case PCRE2_NEWLINE_LF:
1754  case PCRE2_NEWLINE_ANY:
1755  case PCRE2_NEWLINE_ANYCRLF:
1756  fprintf(stdout, "\n");
1757  break;
1758 
1759  case PCRE2_NEWLINE_CR:
1760  fprintf(stdout, "\r");
1761  break;
1762 
1763  case PCRE2_NEWLINE_CRLF:
1764  fprintf(stdout, "\r\n");
1765  break;
1766 
1767  case PCRE2_NEWLINE_NUL:
1768  fprintf(stdout, "%c", 0);
1769  break;
1770  }
1771 }
1772 
1773 
1774 /*************************************************
1775 * Print the previous "after" lines *
1776 *************************************************/
1777 
1778 /* This is called if we are about to lose said lines because of buffer filling,
1779 and at the end of the file. The data in the line is written using fwrite() so
1780 that a binary zero does not terminate it.
1781 
1782 Arguments:
1783  lastmatchnumber the number of the last matching line, plus one
1784  lastmatchrestart where we restarted after the last match
1785  endptr end of available data
1786  printname filename for printing
1787 
1788 Returns: nothing
1789 */
1790 
1791 static void
1792 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1793  char *endptr, const char *printname)
1794 {
1795 if (after_context > 0 && lastmatchnumber > 0)
1796  {
1797  int count = 0;
1798  int ellength = 0;
1799  while (lastmatchrestart < endptr && count < after_context)
1800  {
1801  char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1802  if (ellength == 0 && pp == main_buffer + bufsize) break;
1803  if (printname != NULL) fprintf(stdout, "%s%c", printname, printname_hyphen);
1804  if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1805  FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1806  lastmatchrestart = pp;
1807  count++;
1808  }
1809 
1810  /* If we have printed any lines, arrange for a hyphen separator if anything
1811  else follows. Also, if the last line is the final line in the file and it had
1812  no newline, add one. */
1813 
1814  if (count > 0)
1815  {
1816  hyphenpending = TRUE;
1817  if (ellength == 0 && lastmatchrestart >= endptr)
1819  }
1820  }
1821 }
1822 
1823 
1824 
1825 /*************************************************
1826 * Apply patterns to subject till one matches *
1827 *************************************************/
1828 
1829 /* This function is called to run through all the patterns, looking for a
1830 match. When all possible matches are required, for example, for colouring, it
1831 checks all patterns for matching, and returns the earliest match. Otherwise, it
1832 returns the first pattern that has matched.
1833 
1834 Arguments:
1835  matchptr the start of the subject
1836  length the length of the subject to match
1837  options options for pcre2_match
1838  startoffset where to start matching
1839  mrc address of where to put the result of pcre2_match()
1840 
1841 Returns: TRUE if there was a match, match_data and offsets are set
1842  FALSE if there was no match (but no errors)
1843  invert if there was a non-fatal error
1844 */
1845 
1846 static BOOL
1847 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1848  PCRE2_SIZE startoffset, int *mrc)
1849 {
1850 PCRE2_SIZE slen = length;
1851 int first = -1;
1852 int firstrc = 0;
1853 patstr *p = patterns;
1854 const char *msg = "this text:\n\n";
1855 
1856 if (slen > 200)
1857  {
1858  slen = 200;
1859  msg = "text that starts:\n\n";
1860  }
1861 
1862 for (int i = 1; p != NULL; p = p->next, i++)
1863  {
1864  int rc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
1865  startoffset, options, match_data, match_context);
1866  if (rc == PCRE2_ERROR_NOMATCH) continue;
1867 
1868  /* Handle a successful match. When all_matches is false, we are done.
1869  Otherwise we must save the earliest match. */
1870 
1871  if (rc >= 0)
1872  {
1873  if (!all_matches)
1874  {
1875  *mrc = rc;
1876  return TRUE;
1877  }
1878 
1879  if (first < 0 || offsets[0] < offsets_pair[first][0] ||
1880  (offsets[0] == offsets_pair[first][0] &&
1881  offsets[1] > offsets_pair[first][1]))
1882  {
1884  firstrc = rc;
1885  match_data_toggle ^= 1;
1888  }
1889  continue;
1890  }
1891 
1892  /* Deal with PCRE2 error. */
1893 
1894  fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", rc);
1895  if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1896  fprintf(stderr, "%s", msg);
1897  FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1898  fprintf(stderr, "\n\n");
1899  if (rc <= PCRE2_ERROR_UTF8_ERR1 &&
1900  rc >= PCRE2_ERROR_UTF8_ERR21)
1901  {
1902  unsigned char mbuffer[256];
1904  (void)pcre2_get_error_message(rc, mbuffer, sizeof(mbuffer));
1905  fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1906  }
1907  if (rc == PCRE2_ERROR_MATCHLIMIT || rc == PCRE2_ERROR_DEPTHLIMIT ||
1909  resource_error = TRUE;
1910  if (error_count++ > 20)
1911  {
1912  fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1913  pcre2grep_exit(2);
1914  }
1915  return invert; /* No more matching; don't show the line again */
1916  }
1917 
1918 /* We get here when all patterns have been tried. If all_matches is false,
1919 this means that none of them matched. If all_matches is true, matched_first
1920 will be non-NULL if there was at least one match, and it will point to the
1921 appropriate match_data block. */
1922 
1923 if (!all_matches || first < 0) return FALSE;
1924 
1928 *mrc = firstrc;
1929 return TRUE;
1930 }
1931 
1932 
1933 
1934 /*************************************************
1935 * Decode dollar escape sequence *
1936 *************************************************/
1937 
1938 /* Called from various places to decode $ escapes in output strings. The escape
1939 sequences are as follows:
1940 
1941 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1942 zero is never returned; '0' is substituted.
1943 
1944 $a returns bell.
1945 $b returns backspace.
1946 $e returns escape.
1947 $f returns form feed.
1948 $n returns newline.
1949 $r returns carriage return.
1950 $t returns tab.
1951 $v returns vertical tab.
1952 $o<digits> returns the character represented by the given octal
1953  number; up to three digits are processed.
1954 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1955  code points.
1956 $x<digits> returns the character represented by the given hexadecimal
1957  number; up to two digits are processed.
1958 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1959  code points.
1960 Any other character is substituted by itself. E.g: $$ is replaced by a single
1961 dollar.
1962 
1963 Arguments:
1964  begin the start of the whole string
1965  string points to the $
1966  callout TRUE if in a callout (inhibits error messages)
1967  value where to return a value
1968  last where to return pointer to the last used character
1969 
1970 Returns: DDE_ERROR after a syntax error
1971  DDE_CAPTURE if *value is a capture number
1972  DDE_CHAR if *value is a character code
1973 */
1974 
1975 static int
1978 {
1979 uint32_t c = 0;
1980 int base = 10;
1981 int dcount;
1982 int rc = DDE_CHAR;
1983 BOOL brace = FALSE;
1984 
1985 switch (*(++string))
1986  {
1987  case 0: /* Syntax error: a character must be present after $. */
1988  if (!callout)
1989  fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1990  (int)(string - begin), "no character after $");
1991  *last = string;
1992  return DDE_ERROR;
1993 
1994  case '{':
1995  brace = TRUE;
1996  string++;
1997  if (!isdigit((unsigned char)(*string))) /* Syntax error: a decimal number required. */
1998  {
1999  if (!callout)
2000  fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2001  (int)(string - begin), "decimal number expected");
2002  rc = DDE_ERROR;
2003  break;
2004  }
2005 
2006  /* Fall through */
2007 
2008  /* The maximum capture number is 65535, so any number greater than that will
2009  always be an unknown capture number. We just stop incrementing, in order to
2010  avoid overflow. */
2011 
2012  case '0': case '1': case '2': case '3': case '4':
2013  case '5': case '6': case '7': case '8': case '9':
2014  do
2015  {
2016  if (c <= 65535) c = c * 10 + (*string - '0');
2017  string++;
2018  }
2019  while (*string >= '0' && *string <= '9');
2020  string--; /* Point to last digit */
2021 
2022  /* In a callout, capture number 0 is not available. No error can be given,
2023  so just return the character '0'. */
2024 
2025  if (callout && c == 0)
2026  {
2027  *value = '0';
2028  }
2029  else
2030  {
2031  *value = c;
2032  rc = DDE_CAPTURE;
2033  }
2034  break;
2035 
2036  /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
2037  for valid Unicode code points. */
2038 
2039  case 'o':
2040  base = 8;
2041  string++;
2042  if (*string == '{')
2043  {
2044  brace = TRUE;
2045  string++;
2046  dcount = 7;
2047  }
2048  else dcount = 3;
2049  for (; dcount > 0; dcount--)
2050  {
2051  if (*string < '0' || *string > '7') break;
2052  c = c * 8 + (*string++ - '0');
2053  }
2054  *value = c;
2055  string--; /* Point to last digit */
2056  break;
2057 
2058  /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2059  for valid Unicode code points. */
2060 
2061  case 'x':
2062  base = 16;
2063  string++;
2064  if (*string == '{')
2065  {
2066  brace = TRUE;
2067  string++;
2068  dcount = 6;
2069  }
2070  else dcount = 2;
2071  for (; dcount > 0; dcount--)
2072  {
2073  if (!isxdigit(*string)) break;
2074  if (*string >= '0' && *string <= '9')
2075  c = c *16 + *string++ - '0';
2076  else
2077  c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2078  }
2079  *value = c;
2080  string--; /* Point to last digit */
2081  break;
2082 
2083  case 'a': *value = '\a'; break;
2084  case 'b': *value = '\b'; break;
2085 #ifndef EBCDIC
2086  case 'e': *value = '\033'; break;
2087 #else
2088  case 'e': *value = '\047'; break;
2089 #endif
2090  case 'f': *value = '\f'; break;
2091  case 'n': *value = STDOUT_NL_CODE; break;
2092  case 'r': *value = '\r'; break;
2093  case 't': *value = '\t'; break;
2094  case 'v': *value = '\v'; break;
2095 
2096  default: *value = *string; break;
2097  }
2098 
2099 if (brace)
2100  {
2101  c = string[1];
2102  if (c != '}')
2103  {
2104  rc = DDE_ERROR;
2105  if (!callout)
2106  {
2107  if ((base == 8 && c >= '0' && c <= '7') ||
2108  (base == 16 && isxdigit(c)))
2109  {
2110  fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2111  "too many %s digits\n", (int)(string - begin),
2112  (base == 8)? "octal" : "hex");
2113  }
2114  else
2115  {
2116  fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2117  (int)(string - begin), "missing closing brace");
2118  }
2119  }
2120  }
2121  else string++;
2122  }
2123 
2124 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2125 
2126 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2127  {
2128  uint32_t max = utf? 0x0010ffffu : 0xffu;
2129  if (*value > max)
2130  {
2131  if (!callout)
2132  fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2133  "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2134  rc = DDE_ERROR;
2135  }
2136  }
2137 
2138 *last = string;
2139 return rc;
2140 }
2141 
2142 
2143 
2144 /*************************************************
2145 * Check output text for errors *
2146 *************************************************/
2147 
2148 /* Called early, to get errors before doing anything for -O text; also called
2149 from callouts to check before outputting.
2150 
2151 Arguments:
2152  string an --output text string
2153  callout TRUE if in a callout (stops printing errors)
2154 
2155 Returns: TRUE if OK, FALSE on error
2156 */
2157 
2158 static BOOL
2160 {
2161 uint32_t value;
2162 PCRE2_SPTR begin = string;
2163 
2164 for (; *string != 0; string++)
2165  {
2166  if (*string == '$' &&
2167  decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2168  return FALSE;
2169  }
2170 
2171 return TRUE;
2172 }
2173 
2174 
2175 /*************************************************
2176 * Display output text *
2177 *************************************************/
2178 
2179 /* Display the output text, which is assumed to have already been syntax
2180 checked. Output may contain escape sequences started by the dollar sign.
2181 
2182 Arguments:
2183  string: the output text
2184  callout: TRUE for the builtin callout, FALSE for --output
2185  subject the start of the subject
2186  ovector: capture offsets
2187  capture_top: number of captures
2188 
2189 Returns: TRUE if something was output, other than newline
2190  FALSE if nothing was output, or newline was last output
2191 */
2192 
2193 static BOOL
2195  PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2196 {
2197 uint32_t value;
2198 BOOL printed = FALSE;
2199 PCRE2_SPTR begin = string;
2200 
2201 for (; *string != 0; string++)
2202  {
2203  if (*string == '$')
2204  {
2205  switch(decode_dollar_escape(begin, string, callout, &value, &string))
2206  {
2207  case DDE_CHAR:
2208  if (value == STDOUT_NL_CODE)
2209  {
2210  fprintf(stdout, STDOUT_NL);
2211  printed = FALSE;
2212  continue;
2213  }
2214  break; /* Will print value */
2215 
2216  case DDE_CAPTURE:
2217  if (value < capture_top)
2218  {
2219  PCRE2_SIZE capturesize;
2220  value *= 2;
2221  capturesize = ovector[value + 1] - ovector[value];
2222  if (capturesize > 0)
2223  {
2224  print_match(subject + ovector[value], capturesize);
2225  printed = TRUE;
2226  }
2227  }
2228  continue;
2229 
2230  /* LCOV_EXCL_START */
2231  default: /* Should not occur */
2232  break;
2233  /* LCOV_EXCL_STOP */
2234  }
2235  }
2236 
2237  else value = *string; /* Not a $ escape */
2238 
2239  if (!utf || value <= 127) fprintf(stdout, "%c", value); else
2240  {
2241  int n = ord2utf8(value);
2242  for (int i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2243  }
2244 
2245  printed = TRUE;
2246  }
2247 
2248 return printed;
2249 }
2250 
2251 
2252 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2253 
2254 /*************************************************
2255 * Parse and execute callout scripts *
2256 *************************************************/
2257 
2258 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2259 string block and executes the program specified by the string. The string is a
2260 list of substrings separated by pipe characters. The first substring represents
2261 the executable name, and the following substrings specify the arguments:
2262 
2263  program_name|param1|param2|...
2264 
2265 Any substring (including the program name) can contain escape sequences
2266 started by the dollar character. The escape sequences are substituted as
2267 follows:
2268 
2269  $<digits> or ${<digits>} is replaced by the captured substring of the given
2270  decimal number, which must be greater than zero. If the number is greater
2271  than the number of capturing substrings, or if the capture is unset, the
2272  replacement is empty.
2273 
2274  Any other character is substituted by itself. E.g: $$ is replaced by a single
2275  dollar or $| replaced by a pipe character.
2276 
2277 Alternatively, if string starts with pipe, the remainder is taken as an output
2278 string, same as --output. This is the only form that is supported if
2279 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2280 separate each callout, defaulting to newline.
2281 
2282 Example:
2283 
2284  echo -e "abcde\n12345" | pcre2grep \
2285  '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2286 
2287  Output:
2288 
2289  Arg1: [a] [bcd] [d] Arg2: |a| ()
2290  abcde
2291  Arg1: [1] [234] [4] Arg2: |1| ()
2292  12345
2293 
2294 Arguments:
2295  blockptr the callout block
2296 
2297 Returns: currently it always returns with 0
2298 */
2299 
2300 static int
2301 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2302 {
2303 PCRE2_SIZE length = calloutptr->callout_string_length;
2304 PCRE2_SPTR string = calloutptr->callout_string;
2305 PCRE2_SPTR subject = calloutptr->subject;
2306 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2307 PCRE2_SIZE capture_top = calloutptr->capture_top;
2308 
2309 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2310 PCRE2_SIZE argsvectorlen = 2;
2311 PCRE2_SIZE argslen = 1;
2312 char *args;
2313 char *argsptr;
2314 char **argsvector;
2315 char **argsvectorptr;
2316 #ifndef WIN32
2317 pid_t pid;
2318 #endif
2319 int result = 0;
2320 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2321 
2322 (void)unused; /* Avoid compiler warning */
2323 
2324 /* Only callouts with strings are supported. */
2325 
2326 if (string == NULL || length == 0) return 0;
2327 
2328 /* If there's no command, output the remainder directly. */
2329 
2330 if (*string == '|')
2331  {
2332  string++;
2333  if (!syntax_check_output_text(string, TRUE)) return 0;
2334  (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2335  return 0;
2336  }
2337 
2338 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2339 return 0;
2340 #else
2341 
2342 /* Checking syntax and compute the number of string fragments. Callout strings
2343 are silently ignored in the event of a syntax error. */
2344 
2345 while (length > 0)
2346  {
2347  if (*string == '|')
2348  {
2349  argsvectorlen++;
2350  if (argsvectorlen > 10000) return 0; /* Too many args */
2351  }
2352 
2353  else if (*string == '$')
2354  {
2355  uint32_t value;
2356  PCRE2_SPTR begin = string;
2357 
2358  switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2359  {
2360  case DDE_CAPTURE:
2361  if (value < capture_top)
2362  {
2363  value *= 2;
2364  argslen += ovector[value + 1] - ovector[value];
2365  }
2366  argslen--; /* Negate the effect of argslen++ below. */
2367  break;
2368 
2369  case DDE_CHAR:
2370  if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2371  else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2372  break;
2373 
2374  /* LCOV_EXCL_START */
2375  default: /* Should not occur */
2376  case DDE_ERROR:
2377  return 0;
2378  /* LCOV_EXCL_STOP */
2379  }
2380 
2381  length -= (string - begin);
2382  }
2383 
2384  string++;
2385  length--;
2386  argslen++;
2387  }
2388 
2389 /* Get memory for the argument vector and its strings. */
2390 
2391 args = (char*)malloc(argslen);
2392 if (args == NULL) return 0;
2393 
2394 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2395 if (argsvector == NULL)
2396  {
2397  /* LCOV_EXCL_START */
2398  free(args);
2399  return 0;
2400  /* LCOV_EXCL_STOP */
2401  }
2402 
2403 /* Now reprocess the string and set up the arguments. */
2404 
2405 argsptr = args;
2406 argsvectorptr = argsvector;
2407 *argsvectorptr++ = argsptr;
2408 
2409 length = calloutptr->callout_string_length;
2410 string = calloutptr->callout_string;
2411 
2412 while (length > 0)
2413  {
2414  if (*string == '|')
2415  {
2416  *argsptr++ = '\0';
2417  *argsvectorptr++ = argsptr;
2418  }
2419 
2420  else if (*string == '$')
2421  {
2422  uint32_t value;
2423  PCRE2_SPTR begin = string;
2424 
2425  switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2426  {
2427  case DDE_CAPTURE:
2428  if (value < capture_top)
2429  {
2430  PCRE2_SIZE capturesize;
2431  value *= 2;
2432  capturesize = ovector[value + 1] - ovector[value];
2433  memcpy(argsptr, subject + ovector[value], capturesize);
2434  argsptr += capturesize;
2435  }
2436  break;
2437 
2438  case DDE_CHAR:
2439  if (value == STDOUT_NL_CODE)
2440  {
2441  memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2442  argsptr += STDOUT_NL_LEN;
2443  }
2444  else if (utf && value > 127)
2445  {
2446  int n = ord2utf8(value);
2447  memcpy(argsptr, utf8_buffer, n);
2448  argsptr += n;
2449  }
2450  else
2451  {
2452  *argsptr++ = value;
2453  }
2454  break;
2455 
2456  /* LCOV_EXCL_START */
2457  default: /* Even though this should not occur, the string having */
2458  case DDE_ERROR: /* been checked above, we need to include the free() */
2459  free(args); /* calls so that source checkers do not complain. */
2460  free(argsvector);
2461  return 0;
2462  /* LCOV_EXCL_STOP */
2463  }
2464 
2465  length -= (string - begin);
2466  }
2467 
2468  else *argsptr++ = *string;
2469 
2470  /* Advance along the string */
2471 
2472  string++;
2473  length--;
2474  }
2475 
2476 *argsptr++ = '\0';
2477 *argsvectorptr = NULL;
2478 
2479 /* Running an external command is system-dependent. Handle Windows and VMS as
2480 necessary, otherwise assume fork(). */
2481 
2482 #ifdef WIN32
2483 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2484 
2485 #elif defined __VMS
2486  {
2487  char cmdbuf[500];
2488  short i = 0;
2489  int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2490  $DESCRIPTOR(cmd, cmdbuf);
2491 
2492  cmdbuf[0] = 0;
2493  while (argsvector[i])
2494  {
2495  strcat(cmdbuf, argsvector[i]);
2496  strcat(cmdbuf, " ");
2497  i++;
2498  }
2499  cmd.dsc$w_length = strlen(cmdbuf) - 1;
2500  status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2501  if (!(status & 1)) result = 0;
2502  else result = retstat & 1 ? 0 : 1;
2503  }
2504 
2505 #else /* Neither Windows nor VMS */
2506 pid = fork();
2507 if (pid == 0)
2508  {
2509  (void)execv(argsvector[0], argsvector);
2510  /* Control gets here if there is an error, e.g. a non-existent program */
2511  exit(1);
2512  }
2513 else if (pid > 0)
2514  {
2515  (void)fflush(stdout);
2516  (void)waitpid(pid, &result, 0);
2517  (void)fflush(stdout);
2518  }
2519 #endif /* End Windows/VMS/other handling */
2520 
2521 free(args);
2522 free(argsvector);
2523 
2524 /* Currently negative return values are not supported, only zero (match
2525 continues) or non-zero (match fails). */
2526 
2527 return result != 0;
2528 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2529 }
2530 #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2531 
2532 
2533 
2534 /*************************************************
2535 * Read a portion of the file into buffer *
2536 *************************************************/
2537 
2538 static PCRE2_SIZE
2539 fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length,
2540  BOOL input_line_buffered)
2541 {
2542 (void)frtype; /* Avoid warning when not used */
2543 
2544 #ifdef SUPPORT_LIBZ
2545 if (frtype == FR_LIBZ)
2546  return gzread((gzFile)handle, buffer, length);
2547 else
2548 #endif
2549 
2550 #ifdef SUPPORT_LIBBZ2
2551 if (frtype == FR_LIBBZ2)
2552  return (PCRE2_SIZE)BZ2_bzread((BZFILE *)handle, buffer, length);
2553 else
2554 #endif
2555 
2556 return (input_line_buffered ?
2557  read_one_line(buffer, length, (FILE *)handle) :
2558  fread(buffer, 1, length, (FILE *)handle));
2559 }
2560 
2561 
2562 
2563 /*************************************************
2564 * Grep an individual file *
2565 *************************************************/
2566 
2567 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2568 times the value of bufthird. The matching point is never allowed to stray into
2569 the top third of the buffer, thus keeping more of the file available for
2570 context printing or for multiline scanning. For large files, the pointer will
2571 be in the middle third most of the time, so the bottom third is available for
2572 "before" context printing.
2573 
2574 Arguments:
2575  handle the fopened FILE stream for a normal file
2576  the gzFile pointer when reading is via libz
2577  the BZFILE pointer when reading is via libbz2
2578  frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2579  filename the file name or NULL (for errors)
2580  printname the file name if it is to be printed for each match
2581  or NULL if the file name is not to be printed
2582  it cannot be NULL if filenames[_nomatch]_only is set
2583 
2584 Returns: 0 if there was at least one match
2585  1 otherwise (no matches)
2586  2 if an overlong line is encountered
2587  3 if there is a read error on a .bz2 file
2588 */
2589 
2590 static int
2591 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2592 {
2593 int rc = 1;
2594 int filepos = 0;
2595 unsigned long int linenumber = 1;
2596 unsigned long int lastmatchnumber = 0;
2597 unsigned long int count = 0;
2598 long int count_matched_lines = 0;
2599 char *lastmatchrestart = main_buffer;
2600 char *ptr = main_buffer;
2601 char *endptr;
2602 PCRE2_SIZE bufflength;
2603 BOOL binary = FALSE;
2604 BOOL endhyphenpending = FALSE;
2605 BOOL lines_printed = FALSE;
2606 BOOL input_line_buffered = line_buffered;
2607 FILE *in = NULL; /* Ensure initialized */
2608 long stream_start = -1; /* Only non-negative if relevant */
2609 
2610 /* Do the first read into the start of the buffer and set up the pointer to end
2611 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2612 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2613 fail. */
2614 
2615 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2616  {
2617  in = (FILE *)handle;
2618  if (feof(in)) return 1;
2619  if (is_file_tty(in)) input_line_buffered = TRUE;
2620  else
2621  {
2622  if (count_limit >= 0 && filename == stdin_name)
2623  stream_start = ftell(in);
2624  }
2625  }
2626 else input_line_buffered = FALSE;
2627 
2628 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2629  input_line_buffered);
2630 
2631 #ifdef SUPPORT_LIBBZ2
2632 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 3; /* Gotcha: bufflength is PCRE2_SIZE */
2633 #endif
2634 
2635 endptr = main_buffer + bufflength;
2636 
2637 /* Unless binary-files=text, see if we have a binary file. This uses the same
2638 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2639 file. However, when the newline convention is binary zero, we can't do this. */
2640 
2641 if (binary_files != BIN_TEXT)
2642  {
2644  binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2645  != NULL;
2646  if (binary && binary_files == BIN_NOMATCH) return 1;
2647  }
2648 
2649 /* Loop while the current pointer is not at the end of the file. For large
2650 files, endptr will be at the end of the buffer when we are in the middle of the
2651 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2652 way, the buffer is shifted left and re-filled. */
2653 
2654 while (ptr < endptr)
2655  {
2656  int endlinelength;
2657  int mrc = 0;
2658  unsigned int options = 0;
2659  BOOL match;
2660  BOOL line_matched = FALSE;
2661  char *t = ptr;
2662  PCRE2_SIZE length, linelength;
2663  PCRE2_SIZE startoffset = 0;
2664 
2665  /* If the -m option set a limit for the number of matched or non-matched
2666  lines, check it here. A limit of zero means that no matching is ever done.
2667  For stdin from a file, set the file position. */
2668 
2669  if (count_limit >= 0 && count_matched_lines >= count_limit)
2670  {
2671  if (stream_start >= 0)
2672  (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET);
2673  rc = (count_limit == 0)? 1 : 0;
2674  break;
2675  }
2676 
2677  /* At this point, ptr is at the start of a line. We need to find the length
2678  of the subject string to pass to pcre2_match(). In multiline mode, it is the
2679  length remainder of the data in the buffer. Otherwise, it is the length of
2680  the next line, excluding the terminating newline. After matching, we always
2681  advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2682  option is used for compiling, so that any match is constrained to be in the
2683  first line. */
2684 
2685  t = end_of_line(t, endptr, &endlinelength);
2686  linelength = t - ptr - endlinelength;
2687  length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2688 
2689  /* Check to see if the line we are looking at extends right to the very end
2690  of the buffer without a line terminator. This means the line is too long to
2691  handle at the current buffer size. Until the buffer reaches its maximum size,
2692  try doubling it and reading more data. */
2693 
2694  if (endlinelength == 0 && t == main_buffer + bufsize)
2695  {
2696  if (bufthird < max_bufthird)
2697  {
2698  char *new_buffer;
2699  PCRE2_SIZE new_bufthird = 2*bufthird;
2700 
2701  if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2702  new_buffer = (char *)malloc(3*new_bufthird);
2703 
2704  if (new_buffer == NULL)
2705  {
2706  /* LCOV_EXCL_START */
2707  fprintf(stderr,
2708  "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2709  "pcre2grep: not enough memory to increase the buffer size to %"
2710  SIZ_FORM "\n",
2711  linenumber,
2712  (filename == NULL)? "" : " of file ",
2713  (filename == NULL)? "" : filename,
2714  new_bufthird);
2715  return 2;
2716  /* LCOV_EXCL_STOP */
2717  }
2718 
2719  /* Copy the data and adjust pointers to the new buffer location. */
2720 
2721  memcpy(new_buffer, main_buffer, bufsize);
2722  bufthird = new_bufthird;
2723  bufsize = 3*bufthird;
2724  ptr = new_buffer + (ptr - main_buffer);
2725  lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2726  free(main_buffer);
2727  main_buffer = new_buffer;
2728 
2729  /* Read more data into the buffer and then try to find the line ending
2730  again. */
2731 
2732  bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2733  bufsize - bufflength, input_line_buffered);
2734  endptr = main_buffer + bufflength;
2735  continue;
2736  }
2737  else
2738  {
2739  fprintf(stderr,
2740  "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2741  "pcre2grep: the maximum buffer size is %" SIZ_FORM "\n"
2742  "pcre2grep: use the --max-buffer-size option to change it\n",
2743  linenumber,
2744  (filename == NULL)? "" : " of file ",
2745  (filename == NULL)? "" : filename,
2746  bufthird);
2747  return 2;
2748  }
2749  }
2750 
2751  /* We come back here after a match when only_matching_count is non-zero, in
2752  order to find any further matches in the same line. This applies to
2753  --only-matching, --file-offsets, and --line-offsets. */
2754 
2755  ONLY_MATCHING_RESTART:
2756 
2757  /* Run through all the patterns until one matches or there is an error other
2758  than NOMATCH. This code is in a subroutine so that it can be re-used for
2759  finding subsequent matches when colouring matched lines. After finding one
2760  match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2761  this line. */
2762 
2763  match = match_patterns(ptr, length, options, startoffset, &mrc);
2764  options = PCRE2_NOTEMPTY;
2765 
2766  /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2767  only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2768  return code - to output data lines, so that binary zeroes are treated as just
2769  another data character. */
2770 
2771  if (match != invert)
2772  {
2773  BOOL hyphenprinted = FALSE;
2774 
2775  /* We've failed if we want a file that doesn't have any matches. */
2776 
2777  if (filenames == FN_NOMATCH_ONLY) return 1;
2778 
2779  /* Remember that this line matched (for counting matched lines) */
2780 
2781  line_matched = TRUE;
2782 
2783  /* If all we want is a yes/no answer, we can return immediately. */
2784 
2785  if (quiet) return 0;
2786 
2787  /* Just count if just counting is wanted. */
2788 
2789  else if (count_only || show_total_count) count++;
2790 
2791  /* When handling a binary file and binary-files==binary, the "binary"
2792  variable will be set true (it's false in all other cases). In this
2793  situation we just want to output the file name. No need to scan further. */
2794 
2795  else if (binary)
2796  {
2797  fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2798  return 0;
2799  }
2800 
2801  /* Likewise, if all we want is a file name, there is no need to scan any
2802  more lines in the file. */
2803 
2804  else if (filenames == FN_MATCH_ONLY)
2805  {
2806  fprintf(stdout, "%s", printname);
2807  if (printname_nl == NULL) fprintf(stdout, "%c", 0);
2808  else fprintf(stdout, "%s", printname_nl);
2809  return 0;
2810  }
2811 
2812  /* The --only-matching option prints just the substring that matched,
2813  and/or one or more captured portions of it, as long as these strings are
2814  not empty. The --file-offsets and --line-offsets options output offsets for
2815  the matching substring (all three set only_matching_count non-zero). None
2816  of these mutually exclusive options prints any context. Afterwards, adjust
2817  the start and then jump back to look for further matches in the same line.
2818  If we are in invert mode, however, nothing is printed and we do not restart
2819  - this could still be useful because the return code is set. */
2820 
2821  else if (only_matching_count != 0)
2822  {
2823  if (!invert)
2824  {
2825  PCRE2_SIZE oldstartoffset;
2826 
2827  if (printname != NULL) fprintf(stdout, "%s%c", printname,
2828  printname_colon);
2829  if (number) fprintf(stdout, "%lu:", linenumber);
2830 
2831  /* Handle --line-offsets */
2832 
2833  if (line_offsets)
2834  fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2835  (int)(offsets[1] - offsets[0]));
2836 
2837  /* Handle --file-offsets */
2838 
2839  else if (file_offsets)
2840  fprintf(stdout, "%d,%d" STDOUT_NL,
2841  (int)(filepos + ptr + offsets[0] - ptr),
2842  (int)(offsets[1] - offsets[0]));
2843 
2844  /* Handle --output (which has already been syntax checked) */
2845 
2846  else if (output_text != NULL)
2847  {
2849  (PCRE2_SPTR)ptr, offsets, mrc);
2850  fprintf(stdout, STDOUT_NL);
2851  }
2852 
2853  /* Handle --only-matching, which may occur many times */
2854 
2855  else
2856  {
2857  BOOL printed = FALSE;
2858  omstr *om;
2859 
2860  for (om = only_matching; om != NULL; om = om->next)
2861  {
2862  int n = om->groupnum;
2863  if (n == 0 || n < mrc)
2864  {
2865  int plen = offsets[2*n + 1] - offsets[2*n];
2866  if (plen > 0)
2867  {
2868  if (printed && om_separator != NULL)
2869  fprintf(stdout, "%s", om_separator);
2870  print_match(ptr + offsets[n*2], plen);
2871  printed = TRUE;
2872  }
2873  }
2874  }
2875  if (printed || printname != NULL || number)
2876  fprintf(stdout, STDOUT_NL);
2877  }
2878 
2879  /* Prepare to repeat to find the next match in the line. */
2880 
2881  //match = FALSE;
2882  if (line_buffered) fflush(stdout);
2883  rc = 0; /* Had some success */
2884 
2885  /* If the pattern contained a lookbehind that included \K, it is
2886  possible that the end of the match might be at or before the actual
2887  starting offset we have just used. In this case, start one character
2888  further on. */
2889 
2890  startoffset = offsets[1]; /* Restart after the match */
2891  oldstartoffset = pcre2_get_startchar(match_data);
2892  if (startoffset <= oldstartoffset)
2893  {
2894  if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2895  startoffset = oldstartoffset + 1;
2896  if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2897  }
2898 
2899  /* If the current match ended past the end of the line (only possible
2900  in multiline mode), we must move on to the line in which it did end
2901  before searching for more matches. */
2902 
2903  while (startoffset > linelength)
2904  {
2905  ptr += linelength + endlinelength;
2906  filepos += (int)(linelength + endlinelength);
2907  linenumber++;
2908  startoffset -= (int)(linelength + endlinelength);
2909  t = end_of_line(ptr, endptr, &endlinelength);
2910  linelength = t - ptr - endlinelength;
2911  length = (PCRE2_SIZE)(endptr - ptr);
2912  }
2913 
2914  goto ONLY_MATCHING_RESTART;
2915  }
2916  }
2917 
2918  /* This is the default case when none of the above options is set. We print
2919  the matching lines(s), possibly preceded and/or followed by other lines of
2920  context. */
2921 
2922  else
2923  {
2924  lines_printed = TRUE;
2925 
2926  /* See if there is a requirement to print some "after" lines from a
2927  previous match. We never print any overlaps. */
2928 
2929  if (after_context > 0 && lastmatchnumber > 0)
2930  {
2931  int ellength;
2932  int linecount = 0;
2933  char *p = lastmatchrestart;
2934 
2935  while (p < ptr && linecount < after_context)
2936  {
2937  p = end_of_line(p, ptr, &ellength);
2938  linecount++;
2939  }
2940 
2941  /* It is important to advance lastmatchrestart during this printing so
2942  that it interacts correctly with any "before" printing below. Print
2943  each line's data using fwrite() in case there are binary zeroes. */
2944 
2945  while (lastmatchrestart < p)
2946  {
2947  char *pp = lastmatchrestart;
2948  if (printname != NULL) fprintf(stdout, "%s%c", printname,
2950  if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2951  pp = end_of_line(pp, endptr, &ellength);
2952  FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2953  lastmatchrestart = pp;
2954  }
2955  if (lastmatchrestart != ptr) hyphenpending = TRUE;
2956  }
2957 
2958  /* If there were non-contiguous lines printed above, insert hyphens. */
2959 
2960  if (hyphenpending)
2961  {
2962  if (group_separator != NULL)
2963  fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2964  hyphenpending = FALSE;
2965  hyphenprinted = TRUE;
2966  }
2967 
2968  /* See if there is a requirement to print some "before" lines for this
2969  match. Again, don't print overlaps. */
2970 
2971  if (before_context > 0)
2972  {
2973  int linecount = 0;
2974  char *p = ptr;
2975 
2976  while (p > main_buffer &&
2977  (lastmatchnumber == 0 || p > lastmatchrestart) &&
2978  linecount < before_context)
2979  {
2980  linecount++;
2981  p = previous_line(p, main_buffer);
2982  }
2983 
2984  if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted &&
2985  group_separator != NULL)
2986  fprintf(stdout, "%s%s", group_separator, STDOUT_NL);
2987 
2988  while (p < ptr)
2989  {
2990  int ellength;
2991  char *pp = p;
2992  if (printname != NULL) fprintf(stdout, "%s%c", printname,
2994  if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2995  pp = end_of_line(pp, endptr, &ellength);
2996  FWRITE_IGNORE(p, 1, pp - p, stdout);
2997  p = pp;
2998  }
2999  }
3000 
3001  /* Now print the matching line(s); ensure we set hyphenpending at the end
3002  of the file if any context lines are being output. */
3003 
3004  if (after_context > 0 || before_context > 0)
3005  endhyphenpending = TRUE;
3006 
3007  if (printname != NULL) fprintf(stdout, "%s%c", printname,
3008  printname_colon);
3009  if (number) fprintf(stdout, "%lu:", linenumber);
3010 
3011  /* In multiline mode, or if colouring, we have to split the line(s) up
3012  and search for further matches, but not of course if the line is a
3013  non-match. In multiline mode this is necessary in case there is another
3014  match that spans the end of the current line. When colouring we want to
3015  colour all matches. */
3016 
3017  if ((multiline || do_colour) && !invert)
3018  {
3019  int plength;
3020  PCRE2_SIZE endprevious;
3021 
3022  /* The use of \K may make the end offset earlier than the start. In
3023  this situation, swap them round. */
3024 
3025  if (offsets[0] > offsets[1])
3026  {
3027  PCRE2_SIZE temp = offsets[0];
3028  offsets[0] = offsets[1];
3029  offsets[1] = temp;
3030  }
3031 
3032  FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3033  print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3034 
3035  for (;;)
3036  {
3037  PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3038 
3039  endprevious = offsets[1];
3040  startoffset = endprevious; /* Advance after previous match. */
3041 
3042  /* If the pattern contained a lookbehind that included \K, it is
3043  possible that the end of the match might be at or before the actual
3044  starting offset we have just used. In this case, start one character
3045  further on. */
3046 
3047  if (startoffset <= oldstartoffset)
3048  {
3049  startoffset = oldstartoffset + 1;
3050  if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3051  }
3052 
3053  /* If the current match ended past the end of the line (only possible
3054  in multiline mode), we must move on to the line in which it did end
3055  before searching for more matches. Because the PCRE2_FIRSTLINE option
3056  is set, the start of the match will always be before the first
3057  newline sequence. */
3058 
3059  while (startoffset > linelength + endlinelength)
3060  {
3061  ptr += linelength + endlinelength;
3062  filepos += (int)(linelength + endlinelength);
3063  linenumber++;
3064  startoffset -= (int)(linelength + endlinelength);
3065  endprevious -= (int)(linelength + endlinelength);
3066  t = end_of_line(ptr, endptr, &endlinelength);
3067  linelength = t - ptr - endlinelength;
3068  length = (PCRE2_SIZE)(endptr - ptr);
3069  }
3070 
3071  /* If startoffset is at the exact end of the line it means this
3072  complete line was the final part of the match, so there is nothing
3073  more to do. */
3074 
3075  if (startoffset == linelength + endlinelength) break;
3076 
3077  /* Otherwise, run a match from within the final line, and if found,
3078  loop for any that may follow. */
3079 
3080  if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3081 
3082  /* The use of \K may make the end offset earlier than the start. In
3083  this situation, swap them round. */
3084 
3085  if (offsets[0] > offsets[1])
3086  {
3087  PCRE2_SIZE temp = offsets[0];
3088  offsets[0] = offsets[1];
3089  offsets[1] = temp;
3090  }
3091 
3092  FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3093  print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3094  }
3095 
3096  /* In multiline mode, we may have already printed the complete line
3097  and its line-ending characters (if they matched the pattern), so there
3098  may be no more to print. */
3099 
3100  plength = (int)((linelength + endlinelength) - endprevious);
3101  if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3102  }
3103 
3104  /* Not colouring or multiline; no need to search for further matches. */
3105 
3106  else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3107  }
3108 
3109  /* End of doing what has to be done for a match. If --line-buffered was
3110  given, flush the output. */
3111 
3112  if (line_buffered) fflush(stdout);
3113  rc = 0; /* Had some success */
3114 
3115  /* Remember where the last match happened for after_context. We remember
3116  where we are about to restart, and that line's number. */
3117 
3118  lastmatchrestart = ptr + linelength + endlinelength;
3119  lastmatchnumber = linenumber + 1;
3120 
3121  /* If a line was printed and we are now at the end of the file and the last
3122  line had no newline, output one. */
3123 
3124  if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3126  }
3127 
3128  /* For a match in multiline inverted mode (which of course did not cause
3129  anything to be printed), we have to move on to the end of the match before
3130  proceeding. */
3131 
3132  if (multiline && invert && match)
3133  {
3134  int ellength;
3135  char *endmatch = ptr + offsets[1];
3136  t = ptr;
3137  while (t < endmatch)
3138  {
3139  t = end_of_line(t, endptr, &ellength);
3140  if (t <= endmatch) linenumber++; else break;
3141  }
3142  endmatch = end_of_line(endmatch, endptr, &ellength);
3143  linelength = endmatch - ptr - ellength;
3144  }
3145 
3146  /* Advance to after the newline and increment the line number. The file
3147  offset to the current line is maintained in filepos. */
3148 
3149  END_ONE_MATCH:
3150  ptr += linelength + endlinelength;
3151  filepos += (int)(linelength + endlinelength);
3152  linenumber++;
3153 
3154  /* If there was at least one match (or a non-match, as required) in the line,
3155  increment the count for the -m option. */
3156 
3157  if (line_matched) count_matched_lines++;
3158 
3159  /* If input is line buffered, and the buffer is not yet full, read another
3160  line and add it into the buffer. */
3161 
3162  if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3163  {
3164  PCRE2_SIZE add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
3165  bufflength += add;
3166  endptr += add;
3167  }
3168 
3169  /* If we haven't yet reached the end of the file (the buffer is full), and
3170  the current point is in the top 1/3 of the buffer, slide the buffer down by
3171  1/3 and refill it. Before we do this, if some unprinted "after" lines are
3172  about to be lost, print them. */
3173 
3174  if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3175  {
3176  if (after_context > 0 &&
3177  lastmatchnumber > 0 &&
3178  lastmatchrestart < main_buffer + bufthird)
3179  {
3180  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3181  lastmatchnumber = 0; /* Indicates no after lines pending */
3182  }
3183 
3184  /* Now do the shuffle */
3185 
3187  ptr -= bufthird;
3188 
3189  bufflength = 2*bufthird + fill_buffer(handle, frtype,
3190  main_buffer + 2*bufthird, bufthird, input_line_buffered);
3191  endptr = main_buffer + bufflength;
3192 
3193  /* Adjust any last match point */
3194 
3195  if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3196  }
3197  } /* Loop through the whole file */
3198 
3199 /* End of file; print final "after" lines if wanted; do_after_lines sets
3200 hyphenpending if it prints something. */
3201 
3203  {
3204  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3205  hyphenpending |= endhyphenpending;
3206  }
3207 
3208 /* Print the file name if we are looking for those without matches and there
3209 were none. If we found a match, we won't have got this far. */
3210 
3211 if (filenames == FN_NOMATCH_ONLY)
3212  {
3213  fprintf(stdout, "%s", printname);
3214  if (printname_nl == NULL) fprintf(stdout, "%c", 0);
3215  else fprintf(stdout, "%s", printname_nl);
3216  return 0;
3217  }
3218 
3219 /* Print the match count if wanted */
3220 
3221 if (count_only && !quiet)
3222  {
3223  if (count > 0 || !omit_zero_count)
3224  {
3225  if (printname != NULL && filenames != FN_NONE)
3226  fprintf(stdout, "%s%c", printname, printname_colon);
3227  fprintf(stdout, "%lu" STDOUT_NL, count);
3228  counts_printed++;
3229  }
3230  }
3231 
3232 total_count += count; /* Can be set without count_only */
3233 return rc;
3234 }
3235 
3236 
3237 
3238 /*************************************************
3239 * Grep a file or recurse into a directory *
3240 *************************************************/
3241 
3242 /* Given a path name, if it's a directory, scan all the files if we are
3243 recursing; if it's a file, grep it.
3244 
3245 Arguments:
3246  pathname the path to investigate
3247  dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3248  only_one_at_top TRUE if the path is the only one at toplevel
3249 
3250 Returns: -1 the file/directory was skipped
3251  0 if there was at least one match
3252  1 if there were no matches
3253  2 there was some kind of error
3254 
3255 However, file opening failures are suppressed if "silent" is set.
3256 */
3257 
3258 static int
3259 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3260 {
3261 int rc = 1;
3262 int frtype;
3263 void *handle;
3264 char *lastcomp;
3265 FILE *in = NULL; /* Ensure initialized */
3266 
3267 #ifdef SUPPORT_LIBZ
3268 gzFile ingz = NULL;
3269 #endif
3270 
3271 #ifdef SUPPORT_LIBBZ2
3272 BZFILE *inbz2 = NULL;
3273 #endif
3274 
3275 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3276 int pathlen;
3277 #endif
3278 
3279 #if defined NATIVE_ZOS
3280 int zos_type;
3281 FILE *zos_test_file;
3282 #endif
3283 
3284 /* If the file name is "-" we scan stdin */
3285 
3286 if (strcmp(pathname, "-") == 0)
3287  {
3288  if (count_limit >= 0) setbuf(stdin, NULL);
3289  return pcre2grep(stdin, FR_PLAIN, stdin_name,
3290  (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3291  stdin_name : NULL);
3292  }
3293 
3294 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3295 directories, whereas --include and --exclude apply to everything else. The test
3296 is against the final component of the path. */
3297 
3298 lastcomp = strrchr(pathname, FILESEP);
3299 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3300 
3301 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3302 Otherwise, scan the directory and recurse for each path within it. The scanning
3303 code is localized so it can be made system-specific. */
3304 
3305 
3306 /* For z/OS, determine the file type. */
3307 
3308 #if defined NATIVE_ZOS
3309 zos_test_file = fopen(pathname,"rb");
3310 
3311 if (zos_test_file == NULL)
3312  {
3313  if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3314  pathname, strerror(errno));
3315  return -1;
3316  }
3317 zos_type = identifyzosfiletype (zos_test_file);
3318 fclose (zos_test_file);
3319 
3320 /* Handle a PDS in separate code */
3321 
3322 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3323  {
3324  return travelonpdsdir (pathname, only_one_at_top);
3325  }
3326 
3327 /* Deal with regular files in the normal way below. These types are:
3328  zos_type == __ZOS_PDS_MEMBER
3329  zos_type == __ZOS_PS
3330  zos_type == __ZOS_VSAM_KSDS
3331  zos_type == __ZOS_VSAM_ESDS
3332  zos_type == __ZOS_VSAM_RRDS
3333 */
3334 
3335 /* Handle a z/OS directory using common code. */
3336 
3337 else if (zos_type == __ZOS_HFS)
3338  {
3339 #endif /* NATIVE_ZOS */
3340 
3341 
3342 /* Handle directories: common code for all OS */
3343 
3344 if (isdirectory(pathname))
3345  {
3346  if (dee_action == dee_SKIP ||
3348  return -1;
3349 
3350  if (dee_action == dee_RECURSE)
3351  {
3352  char childpath[FNBUFSIZ];
3353  char *nextfile;
3354  directory_type *dir = opendirectory(pathname);
3355 
3356  if (dir == NULL)
3357  {
3358  /* LCOV_EXCL_START - this is a "never" event */
3359  if (!silent)
3360  fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3361  strerror(errno));
3362  return 2;
3363  /* LCOV_EXCL_STOP */
3364  }
3365 
3366  while ((nextfile = readdirectory(dir)) != NULL)
3367  {
3368  int frc;
3369  int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3370  if (fnlength > FNBUFSIZ)
3371  {
3372  /* LCOV_EXCL_START - this is a "never" event */
3373  fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3374  rc = 2;
3375  break;
3376  /* LCOV_EXCL_STOP */
3377  }
3378  sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3379 
3380  /* If the realpath() function is available, we can try to prevent endless
3381  recursion caused by a symlink pointing to a parent directory (GitHub
3382  issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3383  Modified to avoid using strlcat() because that isn't a standard C
3384  function, and also modified not to copy back the fully resolved path,
3385  because that affects the output from pcre2grep. */
3386 
3387 #ifdef HAVE_REALPATH
3388  {
3389  char resolvedpath[PATH_MAX];
3390  BOOL isSame;
3391  size_t rlen;
3392  if (realpath(childpath, resolvedpath) == NULL)
3393  /* LCOV_EXCL_START - this is a "never" event */
3394  continue; /* This path is invalid - we can skip processing this */
3395  /* LCOV_EXCL_STOP */
3396  isSame = strcmp(pathname, resolvedpath) == 0;
3397  if (isSame) continue; /* We have a recursion */
3398  rlen = strlen(resolvedpath);
3399  if (rlen++ < sizeof(resolvedpath) - 3)
3400  {
3401  BOOL contained;
3402  strcat(resolvedpath, "/");
3403  contained = strncmp(pathname, resolvedpath, rlen) == 0;
3404  if (contained) continue; /* We have a recursion */
3405  }
3406  }
3407 #endif /* HAVE_REALPATH */
3408 
3409  frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3410  if (frc > 1) rc = frc;
3411  else if (frc == 0 && rc == 1) rc = 0;
3412  }
3413 
3414  closedirectory(dir);
3415  return rc;
3416  }
3417  }
3418 
3419 #ifdef WIN32
3420 if (iswild(pathname))
3421  {
3422  char buffer[1024];
3423  char *nextfile;
3424  char *name;
3425  directory_type *dir = opendirectory(pathname);
3426 
3427  if (dir == NULL)
3428  return 0;
3429 
3430  for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3431  if (*nextfile == '/' || *nextfile == '\\')
3432  name = nextfile + 1;
3433  *name = 0;
3434 
3435  while ((nextfile = readdirectory(dir)) != NULL)
3436  {
3437  int frc;
3438  sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3439  frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3440  if (frc > 1) rc = frc;
3441  else if (frc == 0 && rc == 1) rc = 0;
3442  }
3443 
3444  closedirectory(dir);
3445  return rc;
3446  }
3447 #endif
3448 
3449 #if defined NATIVE_ZOS
3450  }
3451 #endif
3452 
3453 /* If the file is not a directory, check for a regular file, and if it is not,
3454 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3455 exclusion. */
3456 
3457 else if (
3458 #if defined NATIVE_ZOS
3459  (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3460 #else /* all other OS */
3461  (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3462 #endif
3464  return -1; /* File skipped */
3465 
3466 /* Control reaches here if we have a regular file, or if we have a directory
3467 and recursion or skipping was not requested, or if we have anything else and
3468 skipping was not requested. The scan proceeds. If this is the first and only
3469 argument at top level, we don't show the file name, unless we are only showing
3470 the file name, or the filename was forced (-H). */
3471 
3472 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3473 pathlen = (int)(strlen(pathname));
3474 #endif
3475 
3476 /* Open using zlib if it is supported and the file name ends with .gz. */
3477 
3478 #ifdef SUPPORT_LIBZ
3479 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3480  {
3481  ingz = gzopen(pathname, "rb");
3482  if (ingz == NULL)
3483  {
3484  /* LCOV_EXCL_START */
3485  if (!silent)
3486  fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3487  strerror(errno));
3488  return 2;
3489  /* LCOV_EXCL_STOP */
3490  }
3491  handle = (void *)ingz;
3492  frtype = FR_LIBZ;
3493  }
3494 else
3495 #endif
3496 
3497 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3498 
3499 #ifdef SUPPORT_LIBBZ2
3500 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3501  {
3502  inbz2 = BZ2_bzopen(pathname, "rb");
3503  handle = (void *)inbz2;
3504  frtype = FR_LIBBZ2;
3505  }
3506 else
3507 #endif
3508 
3509 /* Otherwise use plain fopen(). The label is so that we can come back here if
3510 an attempt to read a .bz2 file indicates that it really is a plain file. */
3511 
3512 #ifdef SUPPORT_LIBBZ2
3513 PLAIN_FILE:
3514 #endif
3515  {
3516  in = fopen(pathname, "rb");
3517  handle = (void *)in;
3518  frtype = FR_PLAIN;
3519  }
3520 
3521 /* All the opening methods return errno when they fail. */
3522 
3523 if (handle == NULL)
3524  {
3525  if (!silent)
3526  fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3527  strerror(errno));
3528  return 2;
3529  }
3530 
3531 /* Now grep the file */
3532 
3533 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3534  (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3535 
3536 /* Close in an appropriate manner. */
3537 
3538 #ifdef SUPPORT_LIBZ
3539 if (frtype == FR_LIBZ)
3540  gzclose(ingz);
3541 else
3542 #endif
3543 
3544 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3545 read failed. If the error indicates that the file isn't in fact bzipped, try
3546 again as a normal file. */
3547 
3548 #ifdef SUPPORT_LIBBZ2
3549 if (frtype == FR_LIBBZ2)
3550  {
3551  if (rc == 3)
3552  {
3553  int errnum;
3554  const char *err = BZ2_bzerror(inbz2, &errnum);
3555  if (errnum == BZ_DATA_ERROR_MAGIC)
3556  {
3557  BZ2_bzclose(inbz2);
3558  goto PLAIN_FILE;
3559  }
3560  /* LCOV_EXCL_START */
3561  else if (!silent)
3562  fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3563  pathname, err);
3564  rc = 2; /* The normal "something went wrong" code */
3565  /* LCOV_EXCL_STOP */
3566  }
3567  BZ2_bzclose(inbz2);
3568  }
3569 else
3570 #endif
3571 
3572 /* Normal file close */
3573 
3574 fclose(in);
3575 
3576 /* Pass back the yield from pcre2grep(). */
3577 
3578 return rc;
3579 }
3580 
3581 
3582 
3583 /*************************************************
3584 * Handle a no-data option *
3585 *************************************************/
3586 
3587 /* This is called when a known option has been identified. */
3588 
3589 static int
3590 handle_option(int letter, int options)
3591 {
3592 switch(letter)
3593  {
3594  case N_FOFFSETS: file_offsets = TRUE; break;
3595  case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3596  case N_LBUFFER: line_buffered = TRUE; break;
3597  case N_LOFFSETS: line_offsets = number = TRUE; break;
3598  case N_NOJIT: use_jit = FALSE; break;
3600  case N_NO_GROUP_SEPARATOR: group_separator = NULL; break;
3601  case 'a': binary_files = BIN_TEXT; break;
3602  case 'c': count_only = TRUE; break;
3603  case N_POSIX_DIGIT: posix_digit = TRUE; break;
3604  case 'E': case_restrict = TRUE; break;
3605  case 'F': options |= PCRE2_LITERAL; break;
3606  case 'H': filenames = FN_FORCE; break;
3607  case 'I': binary_files = BIN_NOMATCH; break;
3608  case 'h': filenames = FN_NONE; break;
3609  case 'i': options |= PCRE2_CASELESS; break;
3610  case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3611  case 'L': filenames = FN_NOMATCH_ONLY; break;
3612  case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3613  case 'n': number = TRUE; break;
3614 
3615  case 'o':
3618  break;
3619 
3620  case 'P': no_ucp = TRUE; break;
3621  case 'q': quiet = TRUE; break;
3622  case 'r': dee_action = dee_RECURSE; break;
3623  case 's': silent = TRUE; break;
3624  case 't': show_total_count = TRUE; break;
3625  case 'u': options |= PCRE2_UTF | PCRE2_UCP; utf = TRUE; break;
3626  case 'U': options |= PCRE2_UTF | PCRE2_MATCH_INVALID_UTF | PCRE2_UCP;
3627  utf = TRUE; break;
3628  case 'v': invert = TRUE; break;
3629 
3630  case 'V':
3631  {
3632  unsigned char buffer[128];
3634  fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3635  }
3636  pcre2grep_exit(0);
3637  break; /* LCOV_EXCL_LINE - statement kept to avoid compiler warning */
3638 
3639  case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3640  case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3641  case 'Z': printname_colon = printname_hyphen = 0; printname_nl = NULL; break;
3642 
3643  /* LCOV_EXCL_START - this is a "never event" */
3644  default:
3645  fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3646  pcre2grep_exit(usage(2));
3647  /* LCOV_EXCL_STOP */
3648  }
3649 
3650 return options;
3651 }
3652 
3653 
3654 
3655 /*************************************************
3656 * Construct printed ordinal *
3657 *************************************************/
3658 
3659 /* This turns a number into "1st", "3rd", etc. */
3660 
3661 static char *
3662 ordin(int n)
3663 {
3664 static char buffer[14];
3665 char *p = buffer;
3666 sprintf(p, "%d", n);
3667 while (*p != 0) p++;
3668 n %= 100;
3669 if (n >= 11 && n <= 13) n = 0;
3670 switch (n%10)
3671  {
3672  case 1: strcpy(p, "st"); break;
3673  case 2: strcpy(p, "nd"); break;
3674  case 3: strcpy(p, "rd"); break;
3675  default: strcpy(p, "th"); break;
3676  }
3677 return buffer;
3678 }
3679 
3680 
3681 
3682 /*************************************************
3683 * Compile a single pattern *
3684 *************************************************/
3685 
3686 /* Do nothing if the pattern has already been compiled. This is the case for
3687 include/exclude patterns read from a file.
3688 
3689 When the -F option has been used, each "pattern" may be a list of strings,
3690 separated by line breaks. They will be matched literally. We split such a
3691 string and compile the first substring, inserting an additional block into the
3692 pattern chain.
3693 
3694 Arguments:
3695  p points to the pattern block
3696  options the PCRE options
3697  fromfile TRUE if the pattern was read from a file
3698  fromtext file name or identifying text (e.g. "include")
3699  count 0 if this is the only command line pattern, or
3700  number of the command line pattern, or
3701  linenumber for a pattern from a file
3702 
3703 Returns: TRUE on success, FALSE after an error
3704 */
3705 
3706 static BOOL
3707 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3708  int count)
3709 {
3710 char *ps;
3711 int errcode;
3712 PCRE2_SIZE patlen, erroffset;
3713 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3714 
3715 if (p->compiled != NULL) return TRUE;
3716 ps = p->string;
3717 patlen = p->length;
3718 
3719 if ((options & PCRE2_LITERAL) != 0)
3720  {
3721  int ellength;
3722  char *eop = ps + patlen;
3723  char *pe = end_of_line(ps, eop, &ellength);
3724 
3725  if (ellength != 0)
3726  {
3727  patlen = pe - ps - ellength;
3728  if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3729  }
3730  }
3731 
3732 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3733  &erroffset, compile_context);
3734 
3735 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3736 ignore any JIT compiler errors, relying falling back to interpreting if
3737 anything goes wrong with JIT. */
3738 
3739 if (p->compiled != NULL)
3740  {
3741 #ifdef SUPPORT_PCRE2GREP_JIT
3743 #endif
3744  return TRUE;
3745  }
3746 
3747 /* Handle compile errors */
3748 
3749 if (erroffset > patlen) erroffset = patlen;
3750 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3751 
3752 if (fromfile)
3753  {
3754  fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3755  "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3756  }
3757 else
3758  {
3759  if (count == 0)
3760  fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3761  fromtext, (int)erroffset, errmessbuffer);
3762  else
3763  fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3764  ordin(count), fromtext, (int)erroffset, errmessbuffer);
3765  }
3766 
3767 return FALSE;
3768 }
3769 
3770 
3771 
3772 /*************************************************
3773 * Read and compile a file of patterns *
3774 *************************************************/
3775 
3776 /* This is used for --filelist, --include-from, and --exclude-from.
3777 
3778 Arguments:
3779  name the name of the file; "-" is stdin
3780  patptr pointer to the pattern chain anchor
3781  patlastptr pointer to the last pattern pointer
3782 
3783 Returns: TRUE if all went well
3784 */
3785 
3786 static BOOL
3787 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3788 {
3789 int linenumber = 0;
3790 PCRE2_SIZE patlen;
3791 FILE *f;
3792 const char *filename;
3793 char buffer[MAXPATLEN+20];
3794 
3795 if (strcmp(name, "-") == 0)
3796  {
3797  f = stdin;
3798  filename = stdin_name;
3799  }
3800 else
3801  {
3802  f = fopen(name, "r");
3803  if (f == NULL)
3804  {
3805  fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3806  return FALSE;
3807  }
3808  filename = name;
3809  }
3810 
3811 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3812  {
3813  while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3814  linenumber++;
3815  if (patlen == 0) continue; /* Skip blank lines */
3816 
3817  /* Note: this call to add_pattern() puts a pointer to the local variable
3818  "buffer" into the pattern chain. However, that pointer is used only when
3819  compiling the pattern, which happens immediately below, so we flatten it
3820  afterwards, as a precaution against any later code trying to use it. */
3821 
3822  *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3823  if (*patlastptr == NULL)
3824  {
3825  /* LCOV_EXCL_START - won't happen in testing */
3826  if (f != stdin) fclose(f);
3827  return FALSE;
3828  /* LCOV_EXCL_STOP */
3829  }
3830  if (*patptr == NULL) *patptr = *patlastptr;
3831 
3832  /* This loop is needed because compiling a "pattern" when -F is set may add
3833  on additional literal patterns if the original contains a newline. In the
3834  common case, it never will, because read_one_line() stops at a newline.
3835  However, the -N option can be used to give pcre2grep a different newline
3836  setting. */
3837 
3838  for(;;)
3839  {
3840  if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3841  linenumber))
3842  {
3843  if (f != stdin) fclose(f);
3844  return FALSE;
3845  }
3846  (*patlastptr)->string = NULL; /* Insurance */
3847  if ((*patlastptr)->next == NULL) break;
3848  *patlastptr = (*patlastptr)->next;
3849  }
3850  }
3851 
3852 if (f != stdin) fclose(f);
3853 return TRUE;
3854 }
3855 
3856 
3857 
3858 /*************************************************
3859 * Main program *
3860 *************************************************/
3861 
3862 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3863 
3864 int
3865 main(int argc, char **argv)
3866 {
3867 int i, j;
3868 int rc = 1;
3869 BOOL only_one_at_top;
3870 patstr *cp;
3871 fnstr *fn;
3872 omstr *om;
3873 const char *locale_from = "--locale";
3874 
3875 #ifdef SUPPORT_PCRE2GREP_JIT
3877 #endif
3878 
3879 /* In Windows, stdout is set up as a text stream, which means that \n is
3880 converted to \r\n. This causes output lines that are copied from the input to
3881 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3882 that stdout is a binary stream. Note that this means all other output to stdout
3883 must use STDOUT_NL to terminate lines. */
3884 
3885 #ifdef WIN32
3886 _setmode(_fileno(stdout), _O_BINARY);
3887 #endif
3888 
3889 /* Process the options */
3890 
3891 for (i = 1; i < argc; i++)
3892  {
3893  option_item *op = NULL;
3894  char *option_data = (char *)""; /* default to keep compiler happy */
3895  BOOL longop;
3896  BOOL longopwasequals = FALSE;
3897 
3898  if (argv[i][0] != '-') break;
3899 
3900  /* If we hit an argument that is just "-", it may be a reference to STDIN,
3901  but only if we have previously had -e or -f to define the patterns. */
3902 
3903  if (argv[i][1] == 0)
3904  {
3905  if (pattern_files != NULL || patterns != NULL) break;
3906  else pcre2grep_exit(usage(2));
3907  }
3908 
3909  /* Handle a long name option, or -- to terminate the options */
3910 
3911  if (argv[i][1] == '-')
3912  {
3913  char *arg = argv[i] + 2;
3914  char *argequals = strchr(arg, '=');
3915 
3916  if (*arg == 0) /* -- terminates options */
3917  {
3918  i++;
3919  break; /* out of the options-handling loop */
3920  }
3921 
3922  longop = TRUE;
3923 
3924  /* Some long options have data that follows after =, for example file=name.
3925  Some options have variations in the long name spelling: specifically, we
3926  allow "regexp" because GNU grep allows it, though I personally go along
3927  with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3928  These options are entered in the table as "regex(p)". Options can be in
3929  both these categories. */
3930 
3931  for (op = optionlist; op->one_char != 0; op++)
3932  {
3933  char *opbra = strchr(op->long_name, '(');
3934  char *equals = strchr(op->long_name, '=');
3935 
3936  /* Handle options with only one spelling of the name */
3937 
3938  if (opbra == NULL) /* Does not contain '(' */
3939  {
3940  if (equals == NULL) /* Not thing=data case */
3941  {
3942  if (strcmp(arg, op->long_name) == 0) break;
3943  }
3944  else /* Special case xxx=data */
3945  {
3946  int oplen = (int)(equals - op->long_name);
3947  int arglen = (argequals == NULL)?
3948  (int)strlen(arg) : (int)(argequals - arg);
3949  if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3950  {
3951  option_data = arg + arglen;
3952  if (*option_data == '=')
3953  {
3954  option_data++;
3955  longopwasequals = TRUE;
3956  }
3957  break;
3958  }
3959  }
3960  }
3961 
3962  /* Handle options with an alternate spelling of the name */
3963 
3964  else
3965  {
3966  char buff1[24];
3967  char buff2[24];
3968  int ret;
3969 
3970  int baselen = (int)(opbra - op->long_name);
3971  int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3972  int arglen = (argequals == NULL || equals == NULL)?
3973  (int)strlen(arg) : (int)(argequals - arg);
3974 
3975  if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3976  ret < 0 || ret > (int)sizeof(buff1)) ||
3977  (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3978  fulllen - baselen - 2, opbra + 1),
3979  ret < 0 || ret > (int)sizeof(buff2)))
3980  {
3981  /* LCOV_EXCL_START - this is a "never" event */
3982  fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3983  op->long_name);
3984  pcre2grep_exit(2);
3985  /* LCOV_EXCL_STOP */
3986  }
3987 
3988  if (strncmp(arg, buff1, arglen) == 0 ||
3989  strncmp(arg, buff2, arglen) == 0)
3990  {
3991  if (equals != NULL && argequals != NULL)
3992  {
3993  option_data = argequals;
3994  if (*option_data == '=')
3995  {
3996  option_data++;
3997  longopwasequals = TRUE;
3998  }
3999  }
4000  break;
4001  }
4002  }
4003  }
4004 
4005  if (op->one_char == 0)
4006  {
4007  fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
4008  pcre2grep_exit(usage(2));
4009  }
4010  }
4011 
4012  /* One-char options; many that have no data may be in a single argument; we
4013  continue till we hit the last one or one that needs data. */
4014 
4015  else
4016  {
4017  char *s = argv[i] + 1;
4018  longop = FALSE;
4019 
4020  while (*s != 0)
4021  {
4022  for (op = optionlist; op->one_char != 0; op++)
4023  {
4024  if (*s == op->one_char) break;
4025  }
4026  if (op->one_char == 0)
4027  {
4028  fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4029  *s, argv[i]);
4030  pcre2grep_exit(usage(2));
4031  }
4032 
4033  option_data = s+1;
4034 
4035  /* Break out if this is the last character in the string; it's handled
4036  below like a single multi-char option. */
4037 
4038  if (*option_data == 0) break;
4039 
4040  /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4041  are used for ones that either have a numerical number or defaults, i.e.
4042  the data is optional. If a digit follows, there is data; if not, carry on
4043  with other single-character options in the same string. */
4044 
4045  if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4046  {
4047  if (isdigit((unsigned char)(s[1]))) break;
4048  }
4049  else /* Check for an option with data */
4050  {
4051  if (op->type != OP_NODATA) break;
4052  }
4053 
4054  /* Handle a single-character option with no data, then loop for the
4055  next character in the string. */
4056 
4058  }
4059  }
4060 
4061  /* At this point we should have op pointing to a matched option. If the type
4062  is NO_DATA, it means that there is no data, and the option might set
4063  something in the PCRE options. */
4064 
4065  if (op->type == OP_NODATA)
4066  {
4068  continue;
4069  }
4070 
4071  /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4072  either has a value or defaults to something. It cannot have data in a
4073  separate item. At the moment, the only such options are "colo(u)r",
4074  and "only-matching". */
4075 
4076  if (*option_data == 0 &&
4077  (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4078  op->type == OP_OP_NUMBERS))
4079  {
4080  switch (op->one_char)
4081  {
4082  case N_COLOUR:
4083  colour_option = "auto";
4084  break;
4085 
4086  case 'o':
4089  break;
4090  }
4091  continue;
4092  }
4093 
4094  /* Otherwise, find the data string for the option. */
4095 
4096  if (*option_data == 0)
4097  {
4098  if (i >= argc - 1 || longopwasequals)
4099  {
4100  fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4101  pcre2grep_exit(usage(2));
4102  }
4103  option_data = argv[++i];
4104  }
4105 
4106  /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4107  added to a chain of numbers. */
4108 
4109  if (op->type == OP_OP_NUMBERS)
4110  {
4111  unsigned long int n = decode_number(option_data, op, longop);
4112  omdatastr *omd = (omdatastr *)op->dataptr;
4113  *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4114  if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4115  }
4116 
4117  /* If the option type is OP_PATLIST, it's the -e option, or one of the
4118  include/exclude options, which can be called multiple times to create lists
4119  of patterns. */
4120 
4121  else if (op->type == OP_PATLIST)
4122  {
4123  patdatastr *pd = (patdatastr *)op->dataptr;
4124  *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4125  *(pd->lastptr));
4126  if (*(pd->lastptr) == NULL) goto EXIT2;
4127  if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4128  }
4129 
4130  /* If the option type is OP_FILELIST, it's one of the options that names a
4131  file. */
4132 
4133  else if (op->type == OP_FILELIST)
4134  {
4135  fndatastr *fd = (fndatastr *)op->dataptr;
4136  fn = (fnstr *)malloc(sizeof(fnstr));
4137  if (fn == NULL)
4138  {
4139  /* LCOV_EXCL_START */
4140  fprintf(stderr, "pcre2grep: malloc failed\n");
4141  goto EXIT2;
4142  /* LCOV_EXCL_STOP */
4143  }
4144  fn->next = NULL;
4145  fn->name = option_data;
4146  if (*(fd->anchor) == NULL)
4147  *(fd->anchor) = fn;
4148  else
4149  (*(fd->lastptr))->next = fn;
4150  *(fd->lastptr) = fn;
4151  }
4152 
4153  /* Handle OP_BINARY_FILES */
4154 
4155  else if (op->type == OP_BINFILES)
4156  {
4157  if (strcmp(option_data, "binary") == 0)
4159  else if (strcmp(option_data, "without-match") == 0)
4161  else if (strcmp(option_data, "text") == 0)
4163  else
4164  {
4165  fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4166  option_data);
4167  pcre2grep_exit(usage(2));
4168  }
4169  }
4170 
4171  /* Otherwise, deal with a single string or numeric data value. */
4172 
4173  else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4174  op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4175  {
4176  *((char **)op->dataptr) = option_data;
4177  }
4178  else
4179  {
4180  unsigned long int n = decode_number(option_data, op, longop);
4181  if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4182  else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4183  else *((int *)op->dataptr) = n;
4184  }
4185  }
4186 
4187 /* Options have been decoded. If -C was used, its value is used as a default
4188 for -A and -B. */
4189 
4190 if (both_context > 0)
4191  {
4194  }
4195 
4196 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4197 permitted. They display, each in their own way, only the data that has matched.
4198 */
4199 
4202 
4203 if (only_matching_count > 1)
4204  {
4205  fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4206  "--file-offsets and/or --line-offsets\n");
4207  pcre2grep_exit(usage(2));
4208  }
4209 
4210 /* Check that there is a big enough ovector for all -o settings. */
4211 
4212 for (om = only_matching; om != NULL; om = om->next)
4213  {
4214  int n = om->groupnum;
4215  if (n > (int)capture_max)
4216  {
4217  fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4218  fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4219  goto EXIT2;
4220  }
4221  }
4222 
4223 /* Check the text supplied to --output for errors. */
4224 
4225 if (output_text != NULL &&
4227  goto EXIT2;
4228 
4229 /* Set up default compile and match contexts and match data blocks. */
4230 
4231 offset_size = capture_max + 1;
4239 offsets = offsets_pair[0];
4240 match_data_toggle = 0;
4241 
4242 /* If string (script) callouts are supported, set up the callout processing
4243 function in the match context. */
4244 
4245 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4246 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4247 #endif
4248 
4249 /* Put limits into the match context. */
4250 
4254 
4255 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4256 LC_ALL environment variable is set, and if so, use it. */
4257 
4258 if (locale == NULL)
4259  {
4260  locale = getenv("LC_ALL");
4261  locale_from = "LC_ALL";
4262  }
4263 
4264 if (locale == NULL)
4265  {
4266  locale = getenv("LC_CTYPE");
4267  locale_from = "LC_CTYPE";
4268  }
4269 
4270 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4271 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4272 
4273 if (locale != NULL)
4274  {
4275  if (setlocale(LC_CTYPE, locale) == NULL)
4276  {
4277  fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4278  locale, locale_from);
4279  goto EXIT2;
4280  }
4283  }
4284 
4285 /* Sort out colouring */
4286 
4287 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4288  {
4289  if (strcmp(colour_option, "always") == 0)
4290 #ifdef WIN32
4291  do_ansi = !is_stdout_tty(),
4292 #endif
4293  do_colour = TRUE;
4294  else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4295  else
4296  {
4297  fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4298  colour_option);
4299  goto EXIT2;
4300  }
4301  if (do_colour)
4302  {
4303  char *cs = getenv("PCRE2GREP_COLOUR");
4304  if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4305  if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4306  if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4307  if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4308  if (cs == NULL) cs = getenv("GREP_COLOR");
4309  if (cs != NULL)
4310  {
4311  if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4312  }
4313 #ifdef WIN32
4314  init_colour_output();
4315 #endif
4316  }
4317  }
4318 
4319 /* When colouring or otherwise identifying matching substrings, we need to find
4320 all possible matches when there are multiple patterns. */
4321 
4323 
4324 /* Sort out a newline setting. */
4325 
4326 if (newline_arg != NULL)
4327  {
4328  for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4329  endlinetype++)
4330  {
4331  if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4332  }
4333  if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4335  else
4336  {
4337  fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4338  newline_arg);
4339  goto EXIT2;
4340  }
4341  }
4342 
4343 /* Find default newline convention */
4344 
4345 else
4346  {
4348  }
4349 
4350 /* Interpret the text values for -d and -D */
4351 
4352 if (dee_option != NULL)
4353  {
4354  if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4355  else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4356  else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4357  else
4358  {
4359  fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4360  goto EXIT2;
4361  }
4362  }
4363 
4364 if (DEE_option != NULL)
4365  {
4366  if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4367  else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4368  else
4369  {
4370  fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4371  goto EXIT2;
4372  }
4373  }
4374 
4375 /* If no_ucp is set, remove PCRE2_UCP from the compile options. */
4376 
4377 if (no_ucp) pcre2_options &= ~PCRE2_UCP;
4378 
4379 /* adjust the extra options. */
4380 
4382 if (posix_digit)
4384 
4385 /* Set the extra options in the compile context. */
4386 
4388 
4389 /* If use_jit is set, check whether JIT is available. If not, do not try
4390 to use JIT. */
4391 
4392 if (use_jit)
4393  {
4394  uint32_t answer;
4395  (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4396  if (!answer) use_jit = FALSE;
4397  }
4398 
4399 /* Get memory for the main buffer. */
4400 
4401 if (bufthird <= 0)
4402  {
4403  fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4404  goto EXIT2;
4405  }
4406 
4407 bufsize = 3*bufthird;
4408 main_buffer = (char *)malloc(bufsize);
4409 
4410 if (main_buffer == NULL)
4411  {
4412  /* LCOV_EXCL_START */
4413  fprintf(stderr, "pcre2grep: malloc failed\n");
4414  goto EXIT2;
4415  /* LCOV_EXCL_STOP */
4416  }
4417 
4418 /* If no patterns were provided by -e, and there are no files provided by -f,
4419 the first argument is the one and only pattern, and it must exist. */
4420 
4421 if (patterns == NULL && pattern_files == NULL)
4422  {
4423  if (i >= argc) return usage(2);
4424  patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4425  NULL);
4426  i++;
4427  if (patterns == NULL) goto EXIT2;
4428  }
4429 
4430 /* Compile the patterns that were provided on the command line, either by
4431 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4432 after all the command-line options are read so that we know which PCRE options
4433 to use. When -F is used, compile_pattern() may add another block into the
4434 chain, so we must not access the next pointer till after the compile. */
4435 
4436 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4437  {
4438  if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4439  (j == 1 && patterns->next == NULL)? 0 : j))
4440  goto EXIT2;
4441  }
4442 
4443 /* Read and compile the regular expressions that are provided in files. */
4444 
4445 for (fn = pattern_files; fn != NULL; fn = fn->next)
4446  {
4447  if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4448  }
4449 
4450 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4451 
4452 #ifdef SUPPORT_PCRE2GREP_JIT
4453 if (use_jit)
4454  {
4455  jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4456  if (jit_stack != NULL )
4458  }
4459 #endif
4460 
4461 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4462 adjust the options. */
4463 
4466 
4467 /* If there are include or exclude patterns read from the command line, compile
4468 them. */
4469 
4470 for (j = 0; j < 4; j++)
4471  {
4472  int k;
4473  for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4474  {
4476  (k == 1 && cp->next == NULL)? 0 : k))
4477  goto EXIT2;
4478  }
4479  }
4480 
4481 /* Read and compile include/exclude patterns from files. */
4482 
4483 for (fn = include_from; fn != NULL; fn = fn->next)
4484  {
4486  goto EXIT2;
4487  }
4488 
4489 for (fn = exclude_from; fn != NULL; fn = fn->next)
4490  {
4492  goto EXIT2;
4493  }
4494 
4495 /* If there are no files that contain lists of files to search, and there are
4496 no file arguments, search stdin, and then exit. */
4497 
4498 if (file_lists == NULL && i >= argc)
4499  {
4500  /* Using a buffered stdin, that then is seek is not portable,
4501  so attempt to remove the buffer, to workaround reported issues
4502  affecting several BSD and AIX */
4503  if (count_limit >= 0)
4504  setbuf(stdin, NULL);
4505  rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4507  goto EXIT;
4508  }
4509 
4510 /* If any files that contains a list of files to search have been specified,
4511 read them line by line and search the given files. */
4512 
4513 for (fn = file_lists; fn != NULL; fn = fn->next)
4514  {
4515  char buffer[FNBUFSIZ];
4516  FILE *fl;
4517  if (strcmp(fn->name, "-") == 0) fl = stdin; else
4518  {
4519  fl = fopen(fn->name, "rb");
4520  if (fl == NULL)
4521  {
4522  fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4523  strerror(errno));
4524  goto EXIT2;
4525  }
4526  }
4527  while (fgets(buffer, sizeof(buffer), fl) != NULL)
4528  {
4529  int frc;
4530  char *end = buffer + (int)strlen(buffer);
4531  while (end > buffer && isspace((unsigned char)(end[-1]))) end--;
4532  *end = 0;
4533  if (*buffer != 0)
4534  {
4536  if (frc > 1) rc = frc;
4537  else if (frc == 0 && rc == 1) rc = 0;
4538  }
4539  }
4540  if (fl != stdin) fclose(fl);
4541  }
4542 
4543 /* After handling file-list, work through remaining arguments. Pass in the fact
4544 that there is only one argument at top level - this suppresses the file name if
4545 the argument is not a directory and filenames are not otherwise forced. */
4546 
4547 only_one_at_top = i == argc - 1 && file_lists == NULL;
4548 
4549 for (; i < argc; i++)
4550  {
4551  int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4552  only_one_at_top);
4553  if (frc > 1) rc = frc;
4554  else if (frc == 0 && rc == 1) rc = 0;
4555  }
4556 
4557 /* Show the total number of matches if requested, but not if only one file's
4558 count was printed. */
4559 
4561  {
4562  if (counts_printed != 0 && filenames >= FN_DEFAULT)
4563  fprintf(stdout, "TOTAL:");
4564  fprintf(stdout, "%lu" STDOUT_NL, total_count);
4565  }
4566 
4567 EXIT:
4568 #ifdef SUPPORT_PCRE2GREP_JIT
4571 #endif
4572 
4573 free(main_buffer);
4575 
4580 
4586 
4591 
4592 while (only_matching != NULL)
4593  {
4594  omstr *this = only_matching;
4595  only_matching = this->next;
4596  free(this);
4597  }
4598 
4599 pcre2grep_exit(rc);
4600 
4601 EXIT2:
4602 rc = 2;
4603 goto EXIT;
4604 }
4605 
4606 /* End of pcre2grep */
BZFILE * BZ2_bzopen(const char *path, const char *mode)
Definition: bzlib.c:1461
int BZ2_bzread(BZFILE *b, void *buf, int len)
Definition: bzlib.c:1478
#define BZ_DATA_ERROR_MAGIC
Definition: bzlib.h:42
const char * BZ2_bzerror(BZFILE *b, int *errnum)
Definition: bzlib.c:1559
void BZ2_bzclose(BZFILE *b)
Definition: bzlib.c:1514
void BZFILE
Definition: bzlib.h:137
static uch flags
static const char ip[]
Definition: des.c:75
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
#define strcat(s, k)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
int BOOL
Definition: sybdb.h:150
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
unsigned char uint8_t
Uint4 uint32_t
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
#define PATH_MAX
Definition: ncbifile.hpp:106
#define INVALID_HANDLE_VALUE
A value for an invalid file handle.
Definition: mdb.c:389
#define HANDLE
An abstraction for a file handle.
Definition: mdb.c:383
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
exit(2)
char * buf
int i
yy_size_t n
int len
static const CS_INT unused
Definition: long_binary.c:20
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int strncmp(const char *str1, const char *str2, size_t count)
Definition: odbc_utils.hpp:133
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define fseek
EIPRangeType t
Definition: ncbi_localip.c:101
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int isxdigit(Uchar c)
Definition: ncbictype.hpp:71
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
int toupper(Uchar c)
Definition: ncbictype.hpp:73
T max(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
#define PCRE2_LITERAL
Definition: pcre2.h:150
#define PCRE2_CONFIG_NEWLINE
Definition: pcre2.h:456
#define PCRE2_ERROR_HEAPLIMIT
Definition: pcre2.h:411
#define PCRE2_UNSET
Definition: pcre2.h:488
#define PCRE2_ERROR_NOMATCH
Definition: pcre2.h:333
#define PCRE2_UCP
Definition: pcre2.h:142
#define PCRE2_UCHAR
Definition: pcre2.h:825
#define PCRE2_ERROR_UTF8_ERR21
Definition: pcre2.h:358
#define pcre2_jit_stack
Definition: pcre2.h:830
#define pcre2_code
Definition: pcre2.h:828
#define PCRE2_MATCH_INVALID_UTF
Definition: pcre2.h:151
#define PCRE2_NEWLINE_ANYCRLF
Definition: pcre2.h:220
#define PCRE2_ERROR_UTF8_ERR1
Definition: pcre2.h:338
#define PCRE2_NEWLINE_CR
Definition: pcre2.h:216
#define PCRE2_EXTRA_ASCII_BSD
Definition: pcre2.h:163
#define PCRE2_SIZE
Definition: pcre2.h:485
#define PCRE2_SPTR
Definition: pcre2.h:826
#define PCRE2_CASELESS
Definition: pcre2.h:128
#define pcre2_match_data
Definition: pcre2.h:850
#define pcre2_match_context
Definition: pcre2.h:849
#define PCRE2_ERROR_DEPTHLIMIT
Definition: pcre2.h:400
#define PCRE2_EXTRA_MATCH_WORD
Definition: pcre2.h:157
#define PCRE2_ERROR_JIT_STACKLIMIT
Definition: pcre2.h:393
#define PCRE2_EXTRA_CASELESS_RESTRICT
Definition: pcre2.h:162
#define PCRE2_ERROR_MATCHLIMIT
Definition: pcre2.h:394
#define PCRE2_EXTRA_MATCH_LINE
Definition: pcre2.h:158
#define PCRE2_JIT_COMPLETE
Definition: pcre2.h:171
#define PCRE2_NEWLINE_CRLF
Definition: pcre2.h:218
#define PCRE2_MULTILINE
Definition: pcre2.h:135
#define PCRE2_EXTRA_ASCII_DIGIT
Definition: pcre2.h:167
#define PCRE2_UTF
Definition: pcre2.h:144
#define PCRE2_CONFIG_JIT
Definition: pcre2.h:452
#define PCRE2_NEWLINE_NUL
Definition: pcre2.h:221
#define pcre2_callout_block
Definition: pcre2.h:843
#define PCRE2_FIRSTLINE
Definition: pcre2.h:133
#define PCRE2_NOTEMPTY
Definition: pcre2.h:184
#define PCRE2_NEWLINE_ANY
Definition: pcre2.h:219
#define PCRE2_NEWLINE_LF
Definition: pcre2.h:217
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
Definition: pcre2.h:161
#define pcre2_compile_context
Definition: pcre2.h:847
#define PCRE2_CONFIG_VERSION
Definition: pcre2.h:463
PCRE2_EXPORT pcre2_code *PCRE2_CALL_CONVENTION pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options, int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
PCRE2_EXPORT void PCRE2_CALL_CONVENTION pcre2_code_free(pcre2_code *code)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_config(uint32_t what, void *where)
Definition: pcre2_config.c:78
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
PCRE2_EXPORT void PCRE2_CALL_CONVENTION pcre2_match_context_free(pcre2_match_context *mcontext)
PCRE2_EXPORT pcre2_match_context *PCRE2_CALL_CONVENTION pcre2_match_context_create(pcre2_general_context *gcontext)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
PCRE2_EXPORT pcre2_compile_context *PCRE2_CALL_CONVENTION pcre2_compile_context_create(pcre2_general_context *gcontext)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
PCRE2_EXPORT void PCRE2_CALL_CONVENTION pcre2_compile_context_free(pcre2_compile_context *ccontext)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_callout(pcre2_match_context *mcontext, int(*callout)(pcre2_callout_block *, void *), void *callout_data)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_set_character_tables(pcre2_compile_context *ccontext, const uint8_t *tables)
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
Definition: pcre2_error.c:301
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_jit_compile(pcre2_code *code, uint32_t options)
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback, void *callback_data)
PCRE2_EXP_DEFN pcre2_jit_stack *PCRE2_CALL_CONVENTION pcre2_jit_stack_create(size_t startsize, size_t maxsize, pcre2_general_context *gcontext)
PCRE2_EXPORT void PCRE2_CALL_CONVENTION pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
PCRE2_EXPORT const uint8_t *PCRE2_CALL_CONVENTION pcre2_maketables(pcre2_general_context *gcontext)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
PCRE2_EXPORT int PCRE2_CALL_CONVENTION pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext)
Definition: pcre2_match.c:6530
PCRE2_EXPORT pcre2_match_data *PCRE2_CALL_CONVENTION pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
PCRE2_EXPORT PCRE2_SIZE *PCRE2_CALL_CONVENTION pcre2_get_ovector_pointer(pcre2_match_data *match_data)
PCRE2_EXPORT void PCRE2_CALL_CONVENTION pcre2_match_data_free(pcre2_match_data *match_data)
PCRE2_EXPORT PCRE2_SIZE PCRE2_CALL_CONVENTION pcre2_get_startchar(pcre2_match_data *match_data)
struct omstr omstr
static int before_context
Definition: pcre2grep.c:226
static patstr * exclude_dir_patterns
Definition: pcre2grep.c:364
#define STDOUT_NL_CODE
Definition: pcre2grep.c:200
int BOOL
Definition: pcre2grep.c:130
#define N_GROUP_SEPARATOR
Definition: pcre2grep.c:429
static BOOL syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
Definition: pcre2grep.c:2159
static BOOL number
Definition: pcre2grep.c:285
static int after_context
Definition: pcre2grep.c:225
static patstr * include_dir_patterns_last
Definition: pcre2grep.c:363
#define N_H_LIMIT
Definition: pcre2grep.c:415
#define N_FILE_LIST
Definition: pcre2grep.c:420
#define SIZ_FORM
Definition: pcre2grep.c:121
static BOOL is_file_tty(FILE *f)
Definition: pcre2grep.c:1145
const int utf8_table1_size
Definition: pcre2grep.c:515
static PCRE2_SIZE read_one_line(char *buffer, PCRE2_SIZE length, FILE *f)
Definition: pcre2grep.c:1439
static uint32_t depth_limit
Definition: pcre2grep.c:261
const char utf8_table4[]
Definition: pcre2grep.c:520
static BOOL is_stdout_tty(void)
Definition: pcre2grep.c:1139
static const char * stdin_name
Definition: pcre2grep.c:216
static int ord2utf8(uint32_t value)
Definition: pcre2grep.c:573
#define STDOUT_NL
Definition: pcre2grep.c:191
static patstr * exclude_patterns_last
Definition: pcre2grep.c:361
static BOOL invert
Definition: pcre2grep.c:280
directory_type * opendirectory(char *filename)
Definition: pcre2grep.c:1124
struct omdatastr omdatastr
struct fndatastr fndatastr
static BOOL file_offsets
Definition: pcre2grep.c:278
struct fnstr fnstr
const int utf8_table3[]
Definition: pcre2grep.c:518
static char * main_buffer
Definition: pcre2grep.c:219
static fnstr * include_from_last
Definition: pcre2grep.c:327
#define MAXPATLEN
Definition: pcre2grep.c:137
static patstr * include_patterns
Definition: pcre2grep.c:358
static int printname_colon
Definition: pcre2grep.c:222
static int filenames
Definition: pcre2grep.c:247
static omdatastr only_matching_data
Definition: pcre2grep.c:315
static BOOL case_restrict
Definition: pcre2grep.c:272
static BOOL omit_zero_count
Definition: pcre2grep.c:286
static fnstr * file_lists
Definition: pcre2grep.c:329
static BOOL quiet
Definition: pcre2grep.c:288
static int printname_hyphen
Definition: pcre2grep.c:223
static fndatastr exclude_from_data
Definition: pcre2grep.c:341
static const char * locale
Definition: pcre2grep.c:212
static patstr * add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
Definition: pcre2grep.c:692
int main(int argc, char **argv)
Definition: pcre2grep.c:3865
static BOOL use_jit
Definition: pcre2grep.c:252
#define N_OM_CAPTURE
Definition: pcre2grep.c:426
struct patstr patstr
static void pcre2grep_exit(int rc)
Definition: pcre2grep.c:649
static const char * dee_option
Definition: pcre2grep.c:210
static void free_pattern_chain(patstr *pc)
Definition: pcre2grep.c:738
static patstr * exclude_dir_patterns_last
Definition: pcre2grep.c:365
static fnstr * pattern_files
Definition: pcre2grep.c:331
#define N_LBUFFER
Definition: pcre2grep.c:414
static BOOL compile_pattern(patstr *p, int options, int fromfile, const char *fromtext, int count)
Definition: pcre2grep.c:3707
char * sys_errlist[]
static omstr * only_matching_last
Definition: pcre2grep.c:305
static const char * group_separator
Definition: pcre2grep.c:214
static fndatastr file_lists_data
Definition: pcre2grep.c:343
static int DEE_action
Definition: pcre2grep.c:245
static char * ordin(int n)
Definition: pcre2grep.c:3662
#define N_COLOUR
Definition: pcre2grep.c:403
#define QUOT
Definition: pcre2grep.c:193
static int handle_option(int letter, int options)
Definition: pcre2grep.c:3590
static int only_matching_count
Definition: pcre2grep.c:306
@ BIN_BINARY
Definition: pcre2grep.c:166
@ BIN_TEXT
Definition: pcre2grep.c:166
@ BIN_NOMATCH
Definition: pcre2grep.c:166
static uint32_t match_limit
Definition: pcre2grep.c:260
static uint32_t offset_size
Definition: pcre2grep.c:268
int sys_nerr
static PCRE2_SIZE max_bufthird
Definition: pcre2grep.c:236
#define N_EXCLUDE_FROM
Definition: pcre2grep.c:422
#define N_INCLUDE_DIR
Definition: pcre2grep.c:408
static unsigned long int counts_printed
Definition: pcre2grep.c:232
char * strerror(int n)
Definition: pcre2grep.c:1177
static uint32_t capture_max
Definition: pcre2grep.c:269
static int match_data_toggle
Definition: pcre2grep.c:267
#define N_INCLUDE_FROM
Definition: pcre2grep.c:423
void closedirectory(directory_type *dir)
Definition: pcre2grep.c:1126
#define N_MAX_BUFSIZE
Definition: pcre2grep.c:425
static patstr * include_dir_patterns
Definition: pcre2grep.c:362
static PCRE2_SIZE heap_limit
Definition: pcre2grep.c:259
static BOOL multiline
Definition: pcre2grep.c:283
#define N_LOCALE
Definition: pcre2grep.c:410
static patdatastr match_patdata
Definition: pcre2grep.c:375
#define N_M_LIMIT
Definition: pcre2grep.c:416
static BOOL do_colour
Definition: pcre2grep.c:274
#define N_INCLUDE
Definition: pcre2grep.c:407
static unsigned long int total_count
Definition: pcre2grep.c:233
#define STDOUT_NL_LEN
Definition: pcre2grep.c:192
static void do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, char *endptr, const char *printname)
Definition: pcre2grep.c:1792
#define FILESEP
Definition: pcre2grep.c:1120
static PCRE2_SIZE bufsize
Definition: pcre2grep.c:237
const int utf8_table2[]
Definition: pcre2grep.c:517
@ DDE_CAPTURE
Definition: pcre2grep.c:170
@ DDE_ERROR
Definition: pcre2grep.c:170
@ DDE_CHAR
Definition: pcre2grep.c:170
static BOOL hyphenpending
Definition: pcre2grep.c:279
static PCRE2_SIZE * offsets_pair[2]
Definition: pcre2grep.c:266
static fndatastr pattern_files_data
Definition: pcre2grep.c:344
static uint32_t extra_options
Definition: pcre2grep.c:258
static fnstr * file_lists_last
Definition: pcre2grep.c:330
static BOOL silent
Definition: pcre2grep.c:290
static patstr * patterns
Definition: pcre2grep.c:356
static BOOL display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject, PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
Definition: pcre2grep.c:2194
static void * emulated_memmove(void *d, const void *s, size_t n)
Definition: pcre2grep.c:537
int isregfile(char *filename)
Definition: pcre2grep.c:1133
#define N_M_LIMIT_DEP
Definition: pcre2grep.c:417
static int count_limit
Definition: pcre2grep.c:231
static BOOL line_buffered
Definition: pcre2grep.c:281
struct patdatastr patdatastr
#define N_ALLABSK
Definition: pcre2grep.c:427
static BOOL posix_digit
Definition: pcre2grep.c:292
void directory_type
Definition: pcre2grep.c:1121
#define N_EXCLUDE
Definition: pcre2grep.c:404
static pcre2_match_context * match_context
Definition: pcre2grep.c:264
static BOOL line_offsets
Definition: pcre2grep.c:282
static omstr * only_matching
Definition: pcre2grep.c:304
static BOOL resource_error
Definition: pcre2grep.c:287
static const char * printname_nl
Definition: pcre2grep.c:221
#define N_FOFFSETS
Definition: pcre2grep.c:413
static int strcmpic(const char *str1, const char *str2)
Definition: pcre2grep.c:596
static uint8_t utf8_buffer[8]
Definition: pcre2grep.c:294
#define DEFAULT_CAPTURE_MAX
Definition: pcre2grep.c:132
static BOOL test_incexc(char *path, patstr *ip, patstr *ep)
Definition: pcre2grep.c:1296
static int pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
Definition: pcre2grep.c:2591
static patdatastr include_patdata
Definition: pcre2grep.c:376
static const uint8_t * character_tables
Definition: pcre2grep.c:255
static fnstr * include_from
Definition: pcre2grep.c:326
#define TRUE
Definition: pcre2grep.c:128
#define FALSE
Definition: pcre2grep.c:127
static patdatastr exclude_patdata
Definition: pcre2grep.c:377
static void print_match(const void *buf, int length)
Definition: pcre2grep.c:1154
static void write_final_newline(void)
Definition: pcre2grep.c:1748
#define N_EXCLUDE_DIR
Definition: pcre2grep.c:405
struct option_item option_item
@ FR_PLAIN
Definition: pcre2grep.c:151
@ FR_LIBBZ2
Definition: pcre2grep.c:151
@ FR_LIBZ
Definition: pcre2grep.c:151
#define FNBUFSIZ
Definition: pcre2grep.c:140
static BOOL read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
Definition: pcre2grep.c:3787
static PCRE2_SIZE * offsets
Definition: pcre2grep.c:266
static int grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
Definition: pcre2grep.c:3259
static const char * output_text
Definition: pcre2grep.c:217
static patstr ** incexlist[4]
Definition: pcre2grep.c:381
static BOOL utf
Definition: pcre2grep.c:291
static const char * incexname[4]
Definition: pcre2grep.c:384
#define N_POSIX_DIGIT
Definition: pcre2grep.c:428
#define N_HELP
Definition: pcre2grep.c:406
static int both_context
Definition: pcre2grep.c:228
static int error_count
Definition: pcre2grep.c:246
static const char * DEE_option
Definition: pcre2grep.c:211
static const char * newlines[]
Definition: pcre2grep.c:508
static void help(void)
Definition: pcre2grep.c:1212
#define memmove(d, s, n)
Definition: pcre2grep.c:561
@ OP_OP_NUMBER
Definition: pcre2grep.c:390
@ OP_OP_STRING
Definition: pcre2grep.c:389
@ OP_SIZE
Definition: pcre2grep.c:389
@ OP_U32NUMBER
Definition: pcre2grep.c:389
@ OP_NODATA
Definition: pcre2grep.c:389
@ OP_BINFILES
Definition: pcre2grep.c:390
@ OP_PATLIST
Definition: pcre2grep.c:390
@ OP_OP_NUMBERS
Definition: pcre2grep.c:390
@ OP_NUMBER
Definition: pcre2grep.c:389
@ OP_STRING
Definition: pcre2grep.c:389
@ OP_FILELIST
Definition: pcre2grep.c:390
#define N_LOFFSETS
Definition: pcre2grep.c:412
#define N_NULL
Definition: pcre2grep.c:411
static char * previous_line(char *p, char *startptr)
Definition: pcre2grep.c:1641
static PCRE2_SIZE fill_buffer(void *handle, int frtype, char *buffer, PCRE2_SIZE length, BOOL input_line_buffered)
Definition: pcre2grep.c:2539
static pcre2_match_data * match_data
Definition: pcre2grep.c:265
static int usage(int rc)
Definition: pcre2grep.c:1191
static fnstr * pattern_files_last
Definition: pcre2grep.c:332
static BOOL count_only
Definition: pcre2grep.c:273
static patstr * exclude_patterns
Definition: pcre2grep.c:360
#define N_NO_GROUP_SEPARATOR
Definition: pcre2grep.c:430
static fnstr * exclude_from_last
Definition: pcre2grep.c:325
static omstr * add_number(int n, omstr *after)
Definition: pcre2grep.c:1390
static char * end_of_line(char *p, char *endptr, int *lenptr)
Definition: pcre2grep.c:1470
static const char * om_separator
Definition: pcre2grep.c:215
static int endlinetype
Definition: pcre2grep.c:229
static void free_file_chain(fnstr *fn)
Definition: pcre2grep.c:760
static patstr * patterns_last
Definition: pcre2grep.c:357
static const char * newline_arg
Definition: pcre2grep.c:213
#define N_BUFSIZE
Definition: pcre2grep.c:418
static patstr * include_patterns_last
Definition: pcre2grep.c:359
#define N_LABEL
Definition: pcre2grep.c:409
@ DEE_READ
Definition: pcre2grep.c:156
@ DEE_SKIP
Definition: pcre2grep.c:156
static fnstr * exclude_from
Definition: pcre2grep.c:324
#define N_BINARY_FILES
Definition: pcre2grep.c:421
static uint32_t pcre2_options
Definition: pcre2grep.c:257
static fndatastr include_from_data
Definition: pcre2grep.c:342
static int binary_files
Definition: pcre2grep.c:227
@ FN_FORCE
Definition: pcre2grep.c:147
@ FN_DEFAULT
Definition: pcre2grep.c:147
@ FN_NOMATCH_ONLY
Definition: pcre2grep.c:147
@ FN_NONE
Definition: pcre2grep.c:147
@ FN_MATCH_ONLY
Definition: pcre2grep.c:147
int isdirectory(char *filename)
Definition: pcre2grep.c:1123
static PCRE2_SIZE bufthird
Definition: pcre2grep.c:235
static patdatastr exclude_dir_patdata
Definition: pcre2grep.c:379
static const char * colour_option
Definition: pcre2grep.c:209
char * readdirectory(directory_type *dir)
Definition: pcre2grep.c:1125
const int utf8_table1[]
Definition: pcre2grep.c:513
static char * parse_grep_colors(const char *gc)
Definition: pcre2grep.c:620
#define FWRITE_IGNORE(a, b, c, d)
Definition: pcre2grep.c:178
#define ERRBUFSIZ
Definition: pcre2grep.c:141
static int dee_action
Definition: pcre2grep.c:242
static BOOL no_ucp
Definition: pcre2grep.c:284
@ dee_SKIP
Definition: pcre2grep.c:155
@ dee_READ
Definition: pcre2grep.c:155
@ dee_RECURSE
Definition: pcre2grep.c:155
static BOOL match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options, PCRE2_SIZE startoffset, int *mrc)
Definition: pcre2grep.c:1847
static long int decode_number(char *option_data, option_item *op, BOOL longop)
Definition: pcre2grep.c:1336
static pcre2_match_data * match_data_pair[2]
Definition: pcre2grep.c:265
static patdatastr include_dir_patdata
Definition: pcre2grep.c:378
static pcre2_compile_context * compile_context
Definition: pcre2grep.c:263
static int decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout, uint32_t *value, PCRE2_SPTR *last)
Definition: pcre2grep.c:1976
#define N_OM_SEPARATOR
Definition: pcre2grep.c:424
#define N_NOJIT
Definition: pcre2grep.c:419
static BOOL show_total_count
Definition: pcre2grep.c:289
static const char * colour_string
Definition: pcre2grep.c:208
static BOOL all_matches
Definition: pcre2grep.c:271
static option_item optionlist[]
Definition: pcre2grep.c:432
#define count
static PCRE2_JIT_STACK * jit_stack
Definition: pcre2test.c:954
static uint8_t * buffer
Definition: pcre2test.c:1016
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
unsigned short WORD
Definition: sqltypes.h:94
unsigned int DWORD
Definition: sqltypes.h:98
#define PCRE2GREP_BUFSIZE
Definition: config.h:197
#define PCRE2GREP_MAX_BUFSIZE
Definition: config.h:205
CRef< objects::CObjectManager > om
static string subject
Definition: inftrees.h:24
fnstr ** anchor
Definition: pcre2grep.c:337
fnstr ** lastptr
Definition: pcre2grep.c:338
struct fnstr * next
Definition: pcre2grep.c:320
char * name
Definition: pcre2grep.c:321
omstr ** anchor
Definition: pcre2grep.c:311
omstr ** lastptr
Definition: pcre2grep.c:312
int groupnum
Definition: pcre2grep.c:301
struct omstr * next
Definition: pcre2grep.c:300
int one_char
Definition: pcre2grep.c:394
const char * long_name
Definition: pcre2grep.c:396
const char * help_text
Definition: pcre2grep.c:397
void * dataptr
Definition: pcre2grep.c:395
patstr ** anchor
Definition: pcre2grep.c:371
patstr ** lastptr
Definition: pcre2grep.c:372
char * string
Definition: pcre2grep.c:351
pcre2_code * compiled
Definition: pcre2grep.c:353
PCRE2_SIZE length
Definition: pcre2grep.c:352
struct patstr * next
Definition: pcre2grep.c:350
else result
Definition: token2.c:20
static Uint4 letter(char c)
#define SEEK_SET
Definition: zconf.h:500
#define gzclose
Definition: zconf_cf.h:68
#define gzopen
Definition: zconf_cf.h:81
#define gzread
Definition: zconf_cf.h:90
void free(voidpf ptr)
voidp malloc(uInt size)
Modified on Fri Sep 20 14:57:27 2024 by modify_doxy.py rev. 669887